Merge pull request #149 from Cqoicebordel/Flickr-engine
Rework Flickr Engine
This commit is contained in:
		
						commit
						af41607410
					
				
							
								
								
									
										102
									
								
								searx/engines/flickr-noapi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								searx/engines/flickr-noapi.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,102 @@ | |||||||
|  | #!/usr/bin/env python | ||||||
|  | 
 | ||||||
|  | ## Flickr (Images) | ||||||
|  | #  | ||||||
|  | # @website     https://www.flickr.com | ||||||
|  | # @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)  | ||||||
|  | #  | ||||||
|  | # @using-api   no | ||||||
|  | # @results     HTML | ||||||
|  | # @stable      no | ||||||
|  | # @parse       url, title, thumbnail, img_src | ||||||
|  | 
 | ||||||
|  | from urllib import urlencode | ||||||
|  | from json import loads | ||||||
|  | from urlparse import urljoin | ||||||
|  | from lxml import html | ||||||
|  | import re | ||||||
|  | 
 | ||||||
|  | categories = ['images'] | ||||||
|  | 
 | ||||||
|  | url = 'https://secure.flickr.com/' | ||||||
|  | search_url = url+'search/?{query}&page={page}' | ||||||
|  | photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' | ||||||
|  | regex = re.compile(r"\"search-photos-models\",\"photos\":(.*}),\"totalItems\":", re.DOTALL) | ||||||
|  | 
 | ||||||
|  | paging = True | ||||||
|  | 
 | ||||||
|  | def build_flickr_url(user_id, photo_id): | ||||||
|  |     return photo_url.format(userid=user_id,photoid=photo_id) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def request(query, params): | ||||||
|  |     params['url'] = search_url.format(query=urlencode({'text': query}), | ||||||
|  |                                       page=params['pageno']) | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  |      | ||||||
|  |     matches = regex.search(resp.text) | ||||||
|  |      | ||||||
|  |     if matches == None: | ||||||
|  |         return results | ||||||
|  | 
 | ||||||
|  |     match = matches.group(1) | ||||||
|  |     search_results = loads(match) | ||||||
|  |      | ||||||
|  |     if not '_data' in search_results: | ||||||
|  |         return [] | ||||||
|  |      | ||||||
|  |     photos = search_results['_data'] | ||||||
|  |      | ||||||
|  |     for photo in photos: | ||||||
|  |          | ||||||
|  |         # In paged configuration, the first pages' photos are represented by a None object | ||||||
|  |         if photo == None: | ||||||
|  |             continue | ||||||
|  |          | ||||||
|  |         # From the biggest to the lowest format | ||||||
|  |         if 'o' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['o']['displayUrl'] | ||||||
|  |         elif 'k' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['k']['displayUrl'] | ||||||
|  |         elif 'h' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['h']['displayUrl'] | ||||||
|  |         elif 'b' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['b']['displayUrl'] | ||||||
|  |         elif 'c' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['c']['displayUrl'] | ||||||
|  |         elif 'z' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['z']['displayUrl'] | ||||||
|  |         elif 'n' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['n']['displayUrl'] | ||||||
|  |         elif 'm' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['m']['displayUrl'] | ||||||
|  |         elif 't' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['to']['displayUrl'] | ||||||
|  |         elif 'q' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['q']['displayUrl'] | ||||||
|  |         elif 's' in photo['sizes']: | ||||||
|  |             img_src = photo['sizes']['s']['displayUrl'] | ||||||
|  |         else: | ||||||
|  |             continue | ||||||
|  |          | ||||||
|  |         url = build_flickr_url(photo['owner']['id'], photo['id']) | ||||||
|  | 
 | ||||||
|  |         title = photo['title'] | ||||||
|  |          | ||||||
|  |         content = '<span class="photo-author">'+ photo['owner']['username'] +'</span><br />' | ||||||
|  |          | ||||||
|  |         if 'description' in photo: | ||||||
|  |             content = content + '<span class="description">' + photo['description'] + '</span>' | ||||||
|  | 
 | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': url, | ||||||
|  |                         'title': title, | ||||||
|  |                         'img_src': img_src, | ||||||
|  |                         'content': content, | ||||||
|  |                         'template': 'images.html'}) | ||||||
|  |          | ||||||
|  |     return results | ||||||
| @ -1,54 +1,80 @@ | |||||||
| #!/usr/bin/env python | #!/usr/bin/env python | ||||||
| 
 | 
 | ||||||
|  | ## Flickr (Images) | ||||||
|  | #  | ||||||
|  | # @website     https://www.flickr.com | ||||||
|  | # @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html)  | ||||||
|  | #  | ||||||
|  | # @using-api   yes | ||||||
|  | # @results     JSON | ||||||
|  | # @stable      yes | ||||||
|  | # @parse       url, title, thumbnail, img_src | ||||||
|  | #More info on api-key : https://www.flickr.com/services/apps/create/ | ||||||
|  | 
 | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| #from json import loads | from json import loads | ||||||
| from urlparse import urljoin |  | ||||||
| from lxml import html |  | ||||||
| from time import time |  | ||||||
| 
 | 
 | ||||||
| categories = ['images'] | categories = ['images'] | ||||||
| 
 | 
 | ||||||
| url = 'https://secure.flickr.com/' | nb_per_page = 15 | ||||||
| search_url = url+'search/?{query}&page={page}' | paging = True | ||||||
| results_xpath = '//div[@class="view display-item-tile"]/figure/div' | api_key= None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' | ||||||
|  | photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' | ||||||
| 
 | 
 | ||||||
| paging = True | paging = True | ||||||
| 
 | 
 | ||||||
|  | def build_flickr_url(user_id, photo_id): | ||||||
|  |     return photo_url.format(userid=user_id,photoid=photo_id) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     params['url'] = search_url.format(query=urlencode({'text': query}), |     params['url'] = url.format(text=urlencode({'text': query}), | ||||||
|  |                                api_key=api_key, | ||||||
|  |                                nb_per_page=nb_per_page, | ||||||
|                                page=params['pageno']) |                                page=params['pageno']) | ||||||
|     time_string = str(int(time())-3) |  | ||||||
|     params['cookies']['BX'] = '3oqjr6d9nmpgl&b=3&s=dh' |  | ||||||
|     params['cookies']['xb'] = '421409' |  | ||||||
|     params['cookies']['localization'] = 'en-us' |  | ||||||
|     params['cookies']['flrbp'] = time_string +\ |  | ||||||
|         '-3a8cdb85a427a33efda421fbda347b2eaf765a54' |  | ||||||
|     params['cookies']['flrbs'] = time_string +\ |  | ||||||
|         '-ed142ae8765ee62c9ec92a9513665e0ee1ba6776' |  | ||||||
|     params['cookies']['flrb'] = '9' |  | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def response(resp): | def response(resp): | ||||||
|     results = [] |     results = [] | ||||||
|     dom = html.fromstring(resp.text) |  | ||||||
|     for result in dom.xpath(results_xpath): |  | ||||||
|         img = result.xpath('.//img') |  | ||||||
|      |      | ||||||
|         if not img: |     search_results = loads(resp.text) | ||||||
|  | 
 | ||||||
|  |     # return empty array if there are no results | ||||||
|  |     if not 'photos' in search_results: | ||||||
|  |         return [] | ||||||
|  | 
 | ||||||
|  |     if not 'photo' in search_results['photos']: | ||||||
|  |         return [] | ||||||
|  | 
 | ||||||
|  |     photos = search_results['photos']['photo'] | ||||||
|  | 
 | ||||||
|  |     # parse results | ||||||
|  |     for photo in photos: | ||||||
|  |         if 'url_o' in photo: | ||||||
|  |             img_src = photo['url_o'] | ||||||
|  |         elif 'url_z' in photo: | ||||||
|  |             img_src = photo['url_z'] | ||||||
|  |         else: | ||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
|         img = img[0] |         url = build_flickr_url(photo['owner'], photo['id']) | ||||||
|         img_src = 'https:'+img.attrib.get('src') |  | ||||||
| 
 | 
 | ||||||
|         if not img_src: |         title = photo['title'] | ||||||
|             continue |  | ||||||
|          |          | ||||||
|         href = urljoin(url, result.xpath('.//a')[0].attrib.get('href')) |         content = '<span class="photo-author">'+ photo['ownername'] +'</span><br />' | ||||||
|         title = img.attrib.get('alt', '') |          | ||||||
|         results.append({'url': href, |         content = content + '<span class="description">' + photo['description']['_content'] + '</span>' | ||||||
|  |          | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': url, | ||||||
|                         'title': title, |                         'title': title, | ||||||
|                         'img_src': img_src, |                         'img_src': img_src, | ||||||
|  |                         'content': content, | ||||||
|                         'template': 'images.html'}) |                         'template': 'images.html'}) | ||||||
|  | 
 | ||||||
|  |     # return results | ||||||
|     return results |     return results | ||||||
|  | |||||||
| @ -70,10 +70,14 @@ engines: | |||||||
|     shortcut : px |     shortcut : px | ||||||
| 
 | 
 | ||||||
|   - name : flickr |   - name : flickr | ||||||
|     engine : flickr |  | ||||||
|     categories : images |     categories : images | ||||||
|     shortcut : fl |     shortcut : fl | ||||||
|     timeout: 3.0 | # You can use the engine using the official stable API, but you need an API key | ||||||
|  | # See : https://www.flickr.com/services/apps/create/ | ||||||
|  | #    engine : flickr | ||||||
|  | #    api_key: 'apikey' # required! | ||||||
|  | # Or you can use the html non-stable engine, activated by default | ||||||
|  |     engine : flickr-noapi | ||||||
| 
 | 
 | ||||||
|   - name : general-file |   - name : general-file | ||||||
|     engine : generalfile |     engine : generalfile | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user