[mod] improve seekr engines and add documentation
Tis patch adds some more fields to the result items and changed paging to the ``nextResultSet`` given in seekr's JSON response. Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									2bab658d39
								
							
						
					
					
						commit
						9100a48541
					
				
							
								
								
									
										13
									
								
								docs/dev/engines/online/seekr.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								docs/dev/engines/online/seekr.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | |||||||
|  | .. _seekr engine: | ||||||
|  | 
 | ||||||
|  | ============= | ||||||
|  | Seekr Engines | ||||||
|  | ============= | ||||||
|  | 
 | ||||||
|  | .. contents:: Contents | ||||||
|  |    :depth: 2 | ||||||
|  |    :local: | ||||||
|  |    :backlinks: entry | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.engines.seekr | ||||||
|  |   :members: | ||||||
| @ -1,50 +1,120 @@ | |||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
| # lint: pylint | # lint: pylint | ||||||
| """Seekr (images, videos, news) | """seekr.com Seeker Score | ||||||
|  | 
 | ||||||
|  | Seekr is a privately held search and content evaluation engine that prioritizes | ||||||
|  | credibility over popularity. | ||||||
|  | 
 | ||||||
|  | Configuration | ||||||
|  | ============= | ||||||
|  | 
 | ||||||
|  | The engine has the following additional settings: | ||||||
|  | 
 | ||||||
|  | - :py:obj:`seekr_category` | ||||||
|  | - :py:obj:`api_key` | ||||||
|  | 
 | ||||||
|  | This implementation is used by seekr engines in the :ref:`settings.yml | ||||||
|  | <settings engine>`: | ||||||
|  | 
 | ||||||
|  | .. code:: yaml | ||||||
|  | 
 | ||||||
|  |   - name: seekr news | ||||||
|  |     seekr_category: news | ||||||
|  |     ... | ||||||
|  |   - name: seekr images | ||||||
|  |     seekr_category: images | ||||||
|  |     ... | ||||||
|  |   - name: seekr videos | ||||||
|  |     seekr_category: videos | ||||||
|  |     ... | ||||||
|  | 
 | ||||||
|  | Known Quirks | ||||||
|  | ============ | ||||||
|  | 
 | ||||||
|  | The implementation to support :py:obj:`paging <searx.enginelib.Engine.paging>` | ||||||
|  | is based on the *nextpage* method of Seekr's REST API.  This feature is *next | ||||||
|  | page driven* and plays well with the :ref:`infinite_scroll <settings ui>` | ||||||
|  | setting in SearXNG but it does not really fit into SearXNG's UI to select a page | ||||||
|  | by number. | ||||||
|  | 
 | ||||||
|  | Implementations | ||||||
|  | =============== | ||||||
|  | 
 | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from json import loads | from json import loads | ||||||
| from urllib.parse import urlencode | from urllib.parse import urlencode | ||||||
|  | from flask_babel import gettext | ||||||
| 
 | 
 | ||||||
| about = { | about = { | ||||||
|     "website": 'https://seekr.com/', |     "website": 'https://seekr.com/', | ||||||
|     "official_api_documentation": None, |     "official_api_documentation": None, | ||||||
|     "use_official_api": True, |     "use_official_api": False, | ||||||
|     "require_api_key": True, |     "require_api_key": True, | ||||||
|     "results": 'JSON', |     "results": 'JSON', | ||||||
|  |     "language": 'en', | ||||||
| } | } | ||||||
| paging = True  # news search doesn't support paging |  | ||||||
| 
 | 
 | ||||||
| base_url = "https://api.seekr.com" | base_url = "https://api.seekr.com" | ||||||
| # v2/newssearch, v1/imagetab, v1/videotab | paging = True | ||||||
| seekr_path = "newssearch" | 
 | ||||||
| seekr_api_version = "v2" |  | ||||||
| api_key = "srh1-22fb-sekr" | api_key = "srh1-22fb-sekr" | ||||||
| results_per_page = 10 | """API key / reversed engineered / is still the same one since 2022.""" | ||||||
|  | 
 | ||||||
|  | seekr_category: str = 'unset' | ||||||
|  | """Search category, any of ``news``, ``videos`` or ``images``.""" | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def init(engine_settings): | ||||||
|  | 
 | ||||||
|  |     # global paging | ||||||
|  |     if engine_settings['seekr_category'] not in ['news', 'videos', 'images']: | ||||||
|  |         raise ValueError(f"Unsupported seekr category: {engine_settings['seekr_category']}") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def request(query, params): | def request(query, params): | ||||||
|  | 
 | ||||||
|  |     if not query: | ||||||
|  |         return None | ||||||
|  | 
 | ||||||
|     args = { |     args = { | ||||||
|         'query': query, |         'query': query, | ||||||
|         'apiKey': api_key, |         'apiKey': api_key, | ||||||
|         'limit': results_per_page, |  | ||||||
|         'offset': (params['pageno'] - 1) * results_per_page, |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     path = f"{seekr_api_version}/{seekr_path}" |     api_url = base_url + '/engine' | ||||||
|     if seekr_api_version == "v1": |     if seekr_category == 'news': | ||||||
|         path = seekr_path |         api_url += '/v2/newssearch' | ||||||
| 
 | 
 | ||||||
|     params['url'] = f"{base_url}/engine/{path}?{urlencode(args)}" |     elif seekr_category == 'images': | ||||||
|  |         api_url += '/imagetab' | ||||||
|  | 
 | ||||||
|  |     elif seekr_category == 'videos': | ||||||
|  |         api_url += '/videotab' | ||||||
|  | 
 | ||||||
|  |     params['url'] = f"{api_url}?{urlencode(args)}" | ||||||
|  |     if params['pageno'] > 1: | ||||||
|  |         nextpage = params['engine_data'].get('nextpage') | ||||||
|  |         if nextpage: | ||||||
|  |             params['url'] = nextpage | ||||||
| 
 | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _images_response(json): | def _images_response(json): | ||||||
|     results = [] |  | ||||||
| 
 | 
 | ||||||
|     for result in json['expertResponses'][0]['advice']['results']: |     search_results = json.get('expertResponses') | ||||||
|  |     if search_results: | ||||||
|  |         search_results = search_results[0].get('advice') | ||||||
|  |     else:  # response from a 'nextResultSet' | ||||||
|  |         search_results = json.get('advice') | ||||||
|  | 
 | ||||||
|  |     results = [] | ||||||
|  |     if not search_results: | ||||||
|  |         return results | ||||||
|  | 
 | ||||||
|  |     for result in search_results['results']: | ||||||
|         summary = loads(result['summary']) |         summary = loads(result['summary']) | ||||||
|         results.append( |         results.append( | ||||||
|             { |             { | ||||||
| @ -53,52 +123,96 @@ def _images_response(json): | |||||||
|                 'title': result['title'], |                 'title': result['title'], | ||||||
|                 'img_src': result['url'], |                 'img_src': result['url'], | ||||||
|                 'img_format': f"{summary['width']}x{summary['height']}", |                 'img_format': f"{summary['width']}x{summary['height']}", | ||||||
|  |                 'thumbnail_src': 'https://media.seekr.com/engine/rp/' + summary['tg'] + '/?src= ' + result['thumbnail'], | ||||||
|             } |             } | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  |     if search_results.get('nextResultSet'): | ||||||
|  |         results.append( | ||||||
|  |             { | ||||||
|  |                 "engine_data": search_results.get('nextResultSet'), | ||||||
|  |                 "key": "nextpage", | ||||||
|  |             } | ||||||
|  |         ) | ||||||
|     return results |     return results | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _videos_response(json): | def _videos_response(json): | ||||||
|     results = [] |  | ||||||
| 
 | 
 | ||||||
|     for result in json['expertResponses'][0]['advice']['results']: |     search_results = json.get('expertResponses') | ||||||
|  |     if search_results: | ||||||
|  |         search_results = search_results[0].get('advice') | ||||||
|  |     else:  # response from a 'nextResultSet' | ||||||
|  |         search_results = json.get('advice') | ||||||
|  | 
 | ||||||
|  |     results = [] | ||||||
|  |     if not search_results: | ||||||
|  |         return results | ||||||
|  | 
 | ||||||
|  |     for result in search_results['results']: | ||||||
|  |         summary = loads(result['summary']) | ||||||
|         results.append( |         results.append( | ||||||
|             { |             { | ||||||
|                 'template': 'videos.html', |                 'template': 'videos.html', | ||||||
|                 'url': result['url'], |                 'url': result['url'], | ||||||
|                 'title': result['title'], |                 'title': result['title'], | ||||||
|  |                 'thumbnail': 'https://media.seekr.com/engine/rp/' + summary['tg'] + '/?src= ' + result['thumbnail'], | ||||||
|             } |             } | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  |     if search_results.get('nextResultSet'): | ||||||
|  |         results.append( | ||||||
|  |             { | ||||||
|  |                 "engine_data": search_results.get('nextResultSet'), | ||||||
|  |                 "key": "nextpage", | ||||||
|  |             } | ||||||
|  |         ) | ||||||
|     return results |     return results | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _news_response(json): | def _news_response(json): | ||||||
|     results = [] |  | ||||||
| 
 | 
 | ||||||
|     for result in json['expertResponses'][0]['advice']['categorySearchResult']['searchResult']['results']: |     search_results = json.get('expertResponses') | ||||||
|  |     if search_results: | ||||||
|  |         search_results = search_results[0]['advice']['categorySearchResult']['searchResult'] | ||||||
|  |     else:  # response from a 'nextResultSet' | ||||||
|  |         search_results = json.get('advice') | ||||||
|  | 
 | ||||||
|  |     results = [] | ||||||
|  |     if not search_results: | ||||||
|  |         return results | ||||||
|  | 
 | ||||||
|  |     for result in search_results['results']: | ||||||
|  | 
 | ||||||
|         results.append( |         results.append( | ||||||
|             { |             { | ||||||
|                 'url': result['url'], |                 'url': result['url'], | ||||||
|                 'title': result['title'], |                 'title': result['title'], | ||||||
|                 'content': result['summary'], |                 'content': result['summary'] or result["topCategory"] or result["displayUrl"] or '', | ||||||
|                 'thumbnail': result.get('thumbnail', ''), |                 'thumbnail': result.get('thumbnail', ''), | ||||||
|                 'publishedDate': datetime.strptime(result['pubDate'][:19], '%Y-%m-%d %H:%M:%S'), |                 'publishedDate': datetime.strptime(result['pubDate'][:19], '%Y-%m-%d %H:%M:%S'), | ||||||
|  |                 'metadata': gettext("Language") + ': ' + result.get('language', ''), | ||||||
|             } |             } | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|  |     if search_results.get('nextResultSet'): | ||||||
|  |         results.append( | ||||||
|  |             { | ||||||
|  |                 "engine_data": search_results.get('nextResultSet'), | ||||||
|  |                 "key": "nextpage", | ||||||
|  |             } | ||||||
|  |         ) | ||||||
|     return results |     return results | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def response(resp): | def response(resp): | ||||||
|     json = resp.json() |     json = resp.json() | ||||||
| 
 | 
 | ||||||
|     if seekr_path == "videotab": |     if seekr_category == "videos": | ||||||
|         return _videos_response(json) |         return _videos_response(json) | ||||||
|     if seekr_path == "imagetab": |     if seekr_category == "images": | ||||||
|         return _images_response(json) |         return _images_response(json) | ||||||
|     if seekr_path == "newssearch": |     if seekr_category == "news": | ||||||
|         return _news_response(json) |         return _news_response(json) | ||||||
| 
 | 
 | ||||||
|     raise ValueError(f"Unsupported seekr path: {seekr_path}") |     raise ValueError(f"Unsupported seekr category: {seekr_category}") | ||||||
|  | |||||||
| @ -1809,27 +1809,25 @@ engines: | |||||||
| 
 | 
 | ||||||
|   - name: seekr news |   - name: seekr news | ||||||
|     engine: seekr |     engine: seekr | ||||||
|     paging: false |  | ||||||
|     shortcut: senews |     shortcut: senews | ||||||
|     categories: news |     categories: news | ||||||
|     seekr_path: newssearch |     seekr_category: news | ||||||
|     seekr_api_version: v2 |  | ||||||
|     disabled: true |     disabled: true | ||||||
| 
 | 
 | ||||||
|   - name: seekr images |   - name: seekr images | ||||||
|     engine: seekr |     engine: seekr | ||||||
|  |     network: seekr news | ||||||
|     shortcut: seimg |     shortcut: seimg | ||||||
|     categories: images |     categories: images | ||||||
|     seekr_path: imagetab |     seekr_category: images | ||||||
|     seekr_api_version: v1 |  | ||||||
|     disabled: true |     disabled: true | ||||||
| 
 | 
 | ||||||
|   - name: seekr videos |   - name: seekr videos | ||||||
|     engine: seekr |     engine: seekr | ||||||
|  |     network: seekr news | ||||||
|     shortcut: sevid |     shortcut: sevid | ||||||
|     categories: videos |     categories: videos | ||||||
|     seekr_path: videotab |     seekr_category: videos | ||||||
|     seekr_api_version: v1 |  | ||||||
|     disabled: true |     disabled: true | ||||||
| 
 | 
 | ||||||
|   - name: sjp.pwn |   - name: sjp.pwn | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user