[mod] 500px rewrite
This commit is contained in:
		
							parent
							
								
									f6e9c074bb
								
							
						
					
					
						commit
						1a82ed6f54
					
				| @ -12,12 +12,10 @@ | |||||||
|  @todo        rewrite to api |  @todo        rewrite to api | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| 
 | from json import loads | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from urlparse import urljoin | from urlparse import urljoin | ||||||
| from lxml import html | from xml.sax.saxutils import escape | ||||||
| import re |  | ||||||
| from searx.engines.xpath import extract_text |  | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['images'] | categories = ['images'] | ||||||
| @ -25,13 +23,27 @@ paging = True | |||||||
| 
 | 
 | ||||||
| # search-url | # search-url | ||||||
| base_url = 'https://500px.com' | base_url = 'https://500px.com' | ||||||
| search_url = base_url + '/search?search?page={pageno}&type=photos&{query}' | search_url = 'https://api.500px.com/v1/photos/search?type=photos'\ | ||||||
|  |     '&{query}'\ | ||||||
|  |     '&image_size%5B%5D=4'\ | ||||||
|  |     '&image_size%5B%5D=20'\ | ||||||
|  |     '&image_size%5B%5D=21'\ | ||||||
|  |     '&image_size%5B%5D=1080'\ | ||||||
|  |     '&image_size%5B%5D=1600'\ | ||||||
|  |     '&image_size%5B%5D=2048'\ | ||||||
|  |     '&include_states=true'\ | ||||||
|  |     '&formats=jpeg%2Clytro'\ | ||||||
|  |     '&include_tags=true'\ | ||||||
|  |     '&exclude_nude=true'\ | ||||||
|  |     '&page={pageno}'\ | ||||||
|  |     '&rpp=50'\ | ||||||
|  |     '&sdk_key=b68e60cff4c929bedea36ca978830c5caca790c3' | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # do search-request | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     params['url'] = search_url.format(pageno=params['pageno'], |     params['url'] = search_url.format(pageno=params['pageno'], | ||||||
|                                       query=urlencode({'q': query})) |                                       query=urlencode({'term': query})) | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| @ -40,19 +52,16 @@ def request(query, params): | |||||||
| def response(resp): | def response(resp): | ||||||
|     results = [] |     results = [] | ||||||
| 
 | 
 | ||||||
|     dom = html.fromstring(resp.text) |     response_json = loads(resp.text) | ||||||
|     regex = re.compile(r'3\.jpg.*$') |  | ||||||
| 
 | 
 | ||||||
|     # parse results |     # parse results | ||||||
|     for result in dom.xpath('//div[@class="photo"]'): |     for result in response_json['photos']: | ||||||
|         link = result.xpath('.//a')[0] |         url = urljoin(base_url, result['url']) | ||||||
|         url = urljoin(base_url, link.attrib.get('href')) |         title = escape(result['name']) | ||||||
|         title = extract_text(result.xpath('.//div[@class="title"]')) |         # last index is the biggest resolution | ||||||
|         thumbnail_src = link.xpath('.//img')[0].attrib.get('src') |         img_src = result['image_url'][-1] | ||||||
|         # To have a bigger thumbnail, uncomment the next line |         thumbnail_src = result['image_url'][0] | ||||||
|         # thumbnail_src = regex.sub('4.jpg', thumbnail_src) |         content = escape(result['description'] or '') | ||||||
|         content = extract_text(result.xpath('.//div[@class="info"]')) |  | ||||||
|         img_src = regex.sub('2048.jpg', thumbnail_src) |  | ||||||
| 
 | 
 | ||||||
|         # append result |         # append result | ||||||
|         results.append({'url': url, |         results.append({'url': url, | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user