Merge pull request #1456 from return42/fix-1449
[fix] engine tineye: handle 422 response of not supported img format
This commit is contained in:
		
						commit
						39d0156f38
					
				| @ -17,6 +17,7 @@ billion images `[tineye.com] <https://tineye.com/how>`_. | |||||||
| 
 | 
 | ||||||
| from urllib.parse import urlencode | from urllib.parse import urlencode | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
|  | from flask_babel import gettext | ||||||
| 
 | 
 | ||||||
| about = { | about = { | ||||||
|     "website": 'https://tineye.com', |     "website": 'https://tineye.com', | ||||||
| @ -28,20 +29,41 @@ about = { | |||||||
| } | } | ||||||
| 
 | 
 | ||||||
| engine_type = 'online_url_search' | engine_type = 'online_url_search' | ||||||
|  | """:py:obj:`searx.search.processors.online_url_search`""" | ||||||
|  | 
 | ||||||
| categories = ['general'] | categories = ['general'] | ||||||
| paging = True | paging = True | ||||||
| safesearch = False | safesearch = False | ||||||
| base_url = 'https://tineye.com' | base_url = 'https://tineye.com' | ||||||
| search_string = '/result_json/?page={page}&{query}' | search_string = '/result_json/?page={page}&{query}' | ||||||
| 
 | 
 | ||||||
|  | FORMAT_NOT_SUPPORTED = gettext( | ||||||
|  |     "Could not read that image url. This may be due to an unsupported file" | ||||||
|  |     " format. TinEye only supports images that are JPEG, PNG, GIF, BMP, TIFF or WebP." | ||||||
|  | ) | ||||||
|  | """TinEye error message""" | ||||||
|  | 
 | ||||||
|  | NO_SIGNATURE_ERROR = gettext( | ||||||
|  |     "The image is too simple to find matches. TinEye requires a basic level of" | ||||||
|  |     " visual detail to successfully identify matches." | ||||||
|  | ) | ||||||
|  | """TinEye error message""" | ||||||
|  | 
 | ||||||
|  | DOWNLOAD_ERROR = gettext("The image could not be downloaded.") | ||||||
|  | """TinEye error message""" | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def request(query, params): | def request(query, params): | ||||||
|  |     """Build TinEye HTTP request using ``search_urls`` of a :py:obj:`engine_type`.""" | ||||||
|  | 
 | ||||||
|  |     params['raise_for_httperror'] = False | ||||||
| 
 | 
 | ||||||
|     if params['search_urls']['data:image']: |     if params['search_urls']['data:image']: | ||||||
|         query = params['search_urls']['data:image'] |         query = params['search_urls']['data:image'] | ||||||
|     elif params['search_urls']['http']: |     elif params['search_urls']['http']: | ||||||
|         query = params['search_urls']['http'] |         query = params['search_urls']['http'] | ||||||
| 
 | 
 | ||||||
|  |     logger.debug("query URL: %s", query) | ||||||
|     query = urlencode({'url': query}) |     query = urlencode({'url': query}) | ||||||
| 
 | 
 | ||||||
|     # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py |     # see https://github.com/TinEye/pytineye/blob/main/pytineye/api.py | ||||||
| @ -59,45 +81,145 @@ def request(query, params): | |||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | def parse_tineye_match(match_json): | ||||||
|  |     """Takes parsed JSON from the API server and turns it into a :py:obj:`dict` | ||||||
|  |     object. | ||||||
|  | 
 | ||||||
|  |     Attributes `(class Match) <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__ | ||||||
|  | 
 | ||||||
|  |     - `image_url`, link to the result image. | ||||||
|  |     - `domain`, domain this result was found on. | ||||||
|  |     - `score`, a number (0 to 100) that indicates how closely the images match. | ||||||
|  |     - `width`, image width in pixels. | ||||||
|  |     - `height`, image height in pixels. | ||||||
|  |     - `size`, image area in pixels. | ||||||
|  |     - `format`, image format. | ||||||
|  |     - `filesize`, image size in bytes. | ||||||
|  |     - `overlay`, overlay URL. | ||||||
|  |     - `tags`, whether this match belongs to a collection or stock domain. | ||||||
|  | 
 | ||||||
|  |     - `backlinks`, a list of Backlink objects pointing to the original websites | ||||||
|  |       and image URLs. List items are instances of :py:obj:`dict`, (`Backlink | ||||||
|  |       <https://github.com/TinEye/pytineye/blob/main/pytineye/api.py>`__): | ||||||
|  | 
 | ||||||
|  |       - `url`, the image URL to the image. | ||||||
|  |       - `backlink`, the original website URL. | ||||||
|  |       - `crawl_date`, the date the image was crawled. | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     # HINT: there exists an alternative backlink dict in the domains list / e.g.:: | ||||||
|  |     # | ||||||
|  |     #     match_json['domains'][0]['backlinks'] | ||||||
|  | 
 | ||||||
|  |     backlinks = [] | ||||||
|  |     if "backlinks" in match_json: | ||||||
|  | 
 | ||||||
|  |         for backlink_json in match_json["backlinks"]: | ||||||
|  |             if not isinstance(backlink_json, dict): | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             crawl_date = backlink_json.get("crawl_date") | ||||||
|  |             if crawl_date: | ||||||
|  |                 crawl_date = datetime.fromisoformat(crawl_date[:-3]) | ||||||
|  |             else: | ||||||
|  |                 crawl_date = datetime.min | ||||||
|  | 
 | ||||||
|  |             backlinks.append( | ||||||
|  |                 { | ||||||
|  |                     'url': backlink_json.get("url"), | ||||||
|  |                     'backlink': backlink_json.get("backlink"), | ||||||
|  |                     'crawl_date': crawl_date, | ||||||
|  |                     'image_name': backlink_json.get("image_name"), | ||||||
|  |                 } | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |     return { | ||||||
|  |         'image_url': match_json.get("image_url"), | ||||||
|  |         'domain': match_json.get("domain"), | ||||||
|  |         'score': match_json.get("score"), | ||||||
|  |         'width': match_json.get("width"), | ||||||
|  |         'height': match_json.get("height"), | ||||||
|  |         'size': match_json.get("size"), | ||||||
|  |         'image_format': match_json.get("format"), | ||||||
|  |         'filesize': match_json.get("filesize"), | ||||||
|  |         'overlay': match_json.get("overlay"), | ||||||
|  |         'tags': match_json.get("tags"), | ||||||
|  |         'backlinks': backlinks, | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def response(resp): | def response(resp): | ||||||
|  |     """Parse HTTP response from TinEye.""" | ||||||
|     results = [] |     results = [] | ||||||
| 
 | 
 | ||||||
|     # Define wanted results |     try: | ||||||
|     json_data = resp.json() |         json_data = resp.json() | ||||||
|     number_of_results = json_data['num_matches'] |     except Exception as exc:  # pylint: disable=broad-except | ||||||
|  |         msg = "can't parse JSON response // %s" % exc | ||||||
|  |         logger.error(msg) | ||||||
|  |         json_data = {'error': msg} | ||||||
| 
 | 
 | ||||||
|     for i in json_data['matches']: |     # handle error codes from Tineye | ||||||
|         image_format = i['format'] |  | ||||||
|         width = i['width'] |  | ||||||
|         height = i['height'] |  | ||||||
|         thumbnail_src = i['image_url'] |  | ||||||
|         backlink = i['domains'][0]['backlinks'][0] |  | ||||||
|         url = backlink['backlink'] |  | ||||||
|         source = backlink['url'] |  | ||||||
|         title = backlink['image_name'] |  | ||||||
|         img_src = backlink['url'] |  | ||||||
| 
 | 
 | ||||||
|         # Get and convert published date |     if resp.is_error: | ||||||
|         api_date = backlink['crawl_date'][:-3] |         if resp.status_code in (400, 422): | ||||||
|         publishedDate = datetime.fromisoformat(api_date) |  | ||||||
| 
 | 
 | ||||||
|         # Append results |             message = 'HTTP status: %s' % resp.status_code | ||||||
|  |             error = json_data.get('error') | ||||||
|  |             s_key = json_data.get('suggestions', {}).get('key', '') | ||||||
|  | 
 | ||||||
|  |             if error and s_key: | ||||||
|  |                 message = "%s (%s)" % (error, s_key) | ||||||
|  |             elif error: | ||||||
|  |                 message = error | ||||||
|  | 
 | ||||||
|  |             if s_key == "Invalid image URL": | ||||||
|  |                 # test https://docs.searxng.org/_static/searxng-wordmark.svg | ||||||
|  |                 message = FORMAT_NOT_SUPPORTED | ||||||
|  |             elif s_key == 'NO_SIGNATURE_ERROR': | ||||||
|  |                 # test https://pngimg.com/uploads/dot/dot_PNG4.png | ||||||
|  |                 message = NO_SIGNATURE_ERROR | ||||||
|  |             elif s_key == 'Download Error': | ||||||
|  |                 # test https://notexists | ||||||
|  |                 message = DOWNLOAD_ERROR | ||||||
|  | 
 | ||||||
|  |             # see https://github.com/searxng/searxng/pull/1456#issuecomment-1193105023 | ||||||
|  |             # results.append({'answer': message}) | ||||||
|  |             logger.error(message) | ||||||
|  | 
 | ||||||
|  |             return results | ||||||
|  | 
 | ||||||
|  |         resp.raise_for_status() | ||||||
|  | 
 | ||||||
|  |     # append results from matches | ||||||
|  | 
 | ||||||
|  |     for match_json in json_data['matches']: | ||||||
|  | 
 | ||||||
|  |         tineye_match = parse_tineye_match(match_json) | ||||||
|  |         if not tineye_match['backlinks']: | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         backlink = tineye_match['backlinks'][0] | ||||||
|         results.append( |         results.append( | ||||||
|             { |             { | ||||||
|                 'template': 'images.html', |                 'template': 'images.html', | ||||||
|                 'url': url, |                 'url': backlink['backlink'], | ||||||
|                 'thumbnail_src': thumbnail_src, |                 'thumbnail_src': tineye_match['image_url'], | ||||||
|                 'source': source, |                 'source': backlink['url'], | ||||||
|                 'title': title, |                 'title': backlink['image_name'], | ||||||
|                 'img_src': img_src, |                 'img_src': backlink['url'], | ||||||
|                 'format': image_format, |                 'format': tineye_match['image_format'], | ||||||
|                 'widht': width, |                 'widht': tineye_match['width'], | ||||||
|                 'height': height, |                 'height': tineye_match['height'], | ||||||
|                 'publishedDate': publishedDate, |                 'publishedDate': backlink['crawl_date'], | ||||||
|             } |             } | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     # Append number of results |     # append number of results | ||||||
|     results.append({'number_of_results': number_of_results}) | 
 | ||||||
|  |     number_of_results = json_data.get('num_matches') | ||||||
|  |     if number_of_results: | ||||||
|  |         results.append({'number_of_results': number_of_results}) | ||||||
| 
 | 
 | ||||||
|     return results |     return results | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user