[mod] add doc to tavily and slightly improve the engine
- Config options like ``search_type`` renamed to follow the upstream
  API (``topic``).
- Default ``max_results`` is set to 5
- use image description if one exists
- add an init function to check engine's settings
- settings example: additional category 'ai'
To review the added documentation of this path::
    make docs.live
and jump to: http://0.0.0.0:8000/dev/engines/online/tavily.html
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									d8a4d589eb
								
							
						
					
					
						commit
						1273ed7f7d
					
				
							
								
								
									
										8
									
								
								docs/dev/engines/online/tavily.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								docs/dev/engines/online/tavily.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,8 @@ | |||||||
|  | .. _tavily engine: | ||||||
|  | 
 | ||||||
|  | ====== | ||||||
|  | Tavily | ||||||
|  | ====== | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.engines.tavily | ||||||
|  |    :members: | ||||||
| @ -1,81 +1,213 @@ | |||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
| """ | """ | ||||||
| Tavily AI Engine | 
 | ||||||
|  | .. sidebar:: info | ||||||
|  | 
 | ||||||
|  |    Before reporting an issue with this engine, | ||||||
|  |    please consult `API error codes`_. | ||||||
|  | 
 | ||||||
|  | Tavily_ search API (AI engine).  This engine implements the REST API | ||||||
|  | (`POST /search`_) and does not make use of the `Tavily Python Wrapper`_. | ||||||
|  | 
 | ||||||
|  | From the API response this engine generates *result items* (shown in the main | ||||||
|  | result list) and an *answer result* (shown on top of the main result list). | ||||||
|  | If the *answer* from Tavily contains an image, the *answer result* is turned | ||||||
|  | into a *infobox result*. | ||||||
|  | 
 | ||||||
|  | .. attention:: | ||||||
|  | 
 | ||||||
|  |    AI queries take considerably longer to process than queries to conventional | ||||||
|  |    search engines.  The ``timeout`` should therefore also be set considerably | ||||||
|  |    higher, but it is not recommended to activate AI queries by default | ||||||
|  |    (set ``disabled: true``), as otherwise all user searches will have to wait | ||||||
|  |    for the AI. | ||||||
|  | 
 | ||||||
|  | .. _Tavily: https://tavily.com/ | ||||||
|  | .. _Tavily Python Wrapper: https://pypi.org/project/tavily-python/ | ||||||
|  | .. _POST /search: https://docs.tavily.com/docs/rest-api/api-reference#endpoint-post-search | ||||||
|  | .. _Tavily API Credit Deduction: | ||||||
|  |    https://docs.tavily.com/docs/rest-api/api-reference#tavily-api-credit-deduction-overview | ||||||
|  | .. _Getting started: https://docs.tavily.com/docs/welcome#getting-started | ||||||
|  | .. _API error codes: https://docs.tavily.com/docs/rest-api/api-reference#error-codes | ||||||
|  | 
 | ||||||
|  | Configuration | ||||||
|  | ============= | ||||||
|  | 
 | ||||||
|  | The engine has the following mandatory setting: | ||||||
|  | 
 | ||||||
|  | - :py:obj:`api_key` | ||||||
|  | - :py:obj:`topic` | ||||||
|  | 
 | ||||||
|  | Optional settings are: | ||||||
|  | 
 | ||||||
|  | - :py:obj:`days` | ||||||
|  | - :py:obj:`search_depth` | ||||||
|  | - :py:obj:`max_results` | ||||||
|  | - :py:obj:`include_images` | ||||||
|  | - :py:obj:`include_domains` | ||||||
|  | - :py:obj:`exclude_domains` | ||||||
|  | 
 | ||||||
|  | Example configuration for general search queries: | ||||||
|  | 
 | ||||||
|  | .. code:: yaml | ||||||
|  | 
 | ||||||
|  |   - name: tavily | ||||||
|  |     engine: tavily | ||||||
|  |     shortcut: tav | ||||||
|  |     categories: [general, ai] | ||||||
|  |     api_key: xxxxxxxx | ||||||
|  |     topic: general | ||||||
|  |     include_images: true | ||||||
|  |     timeout: 15 | ||||||
|  |     disabled: true | ||||||
|  | 
 | ||||||
|  | Example configuration for news search: | ||||||
|  | 
 | ||||||
|  | .. code:: yaml | ||||||
|  | 
 | ||||||
|  |   - name: tavily news | ||||||
|  |     engine: tavily | ||||||
|  |     shortcut: tavnews | ||||||
|  |     categories: [news, ai] | ||||||
|  |     api_key: xxxxxxxx | ||||||
|  |     topic: news | ||||||
|  |     timeout: 15 | ||||||
|  |     disabled: true | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | Implementation | ||||||
|  | ============== | ||||||
|  | 
 | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from json import dumps | from json import dumps | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| from searx.exceptions import SearxEngineAPIException | from flask_babel import gettext | ||||||
| 
 | 
 | ||||||
| # about | # about | ||||||
| about = { | about = { | ||||||
|     "website": 'https://tavily.com/', |     "website": "https://tavily.com/", | ||||||
|     "wikidata_id": None, |     "wikidata_id": None, | ||||||
|     "official_api_documentation": 'https://docs.tavily.com/docs/rest-api/api-reference', |     "official_api_documentation": "https://docs.tavily.com/docs/rest-api/api-reference", | ||||||
|     "use_official_api": True, |     "use_official_api": True, | ||||||
|     "require_api_key": True, |     "require_api_key": True, | ||||||
|     "results": 'JSON', |     "results": 'JSON', | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| search_url = 'https://api.tavily.com/search' | search_url = "https://api.tavily.com/search" | ||||||
| paging = False | paging = False | ||||||
| time_range_support = True | time_range_support = True | ||||||
| 
 | 
 | ||||||
| search_type = 'search'  # possible values: search, news | api_key: str = "unset" | ||||||
| api_key = 'unset' | """Tavily API Key (`Getting started`_).""" | ||||||
| max_results = 20 | 
 | ||||||
| search_depth = 'basic'  # The depth of the search. It can be "basic" or "advanced". | search_depth: str = "basic" | ||||||
| include_images = False  # Include query-related images. Turns answer into infobox with first image. | """The depth of the search.  It can be ``basic`` or ``advanced``.  Default is | ||||||
| include_domains = []  # A list of domains to specifically include in the search results. | ``basic`` unless specified otherwise in a given method. | ||||||
| exclude_domains = []  # A list of domains to specifically exclude from the search results. | 
 | ||||||
|  | - have an eye on your `Tavily API Credit Deduction`_! | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | topic: str = "" | ||||||
|  | """The category of the search.  This will determine which of tavily's agents | ||||||
|  | will be used for the search.  Currently: only ``general`` and ``news`` are | ||||||
|  | supported and ``general`` will implicitly activate ``include_answer`` in the | ||||||
|  | `POST /search`_ API.""" | ||||||
|  | 
 | ||||||
|  | days: int = 3 | ||||||
|  | """The number of days back from the current date to include in the search results. | ||||||
|  | This specifies the time frame of data to be retrieved.  Please note that this | ||||||
|  | feature is only available when using the ``news`` search topic. Default is 3.""" | ||||||
|  | 
 | ||||||
|  | max_results: int = 5 | ||||||
|  | """The maximum number of search results to return.  Default is 5.""" | ||||||
|  | 
 | ||||||
|  | include_images: bool = False | ||||||
|  | """Include a list of query-related images in the response.  Turns answer into | ||||||
|  | infobox with first image (as far there are any images in the response).  Will | ||||||
|  | implicitly activate ``include_image_descriptions`` in the `POST /search`_ API | ||||||
|  | (adds descriptive text for each image). | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | include_domains: list[str] = [] | ||||||
|  | """A list of domains to specifically include in the search results. Default | ||||||
|  | is ``[]```, which includes all domains.""" | ||||||
|  | 
 | ||||||
|  | exclude_domains: list[str] = [] | ||||||
|  | """A list of domains to specifically exclude from the search results. Default | ||||||
|  | is ``[]``, which doesn't exclude any domains. | ||||||
|  | """ | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     if api_key == 'unset': |  | ||||||
|         raise SearxEngineAPIException('missing Tavily API key') |  | ||||||
| 
 | 
 | ||||||
|     data = { |     data = { | ||||||
|         'query': query, |         "query": query, | ||||||
|         'api_key': api_key, |         "api_key": api_key, | ||||||
|         'search_depth': 'basic', |         "search_depth": search_depth, | ||||||
|         'time_range': params["time_range"], |         "topic": topic, | ||||||
|         'max_results': max_results, |         "time_range": params["time_range"], | ||||||
|         'include_images': include_images, |         "max_results": max_results, | ||||||
|         'include_domains': include_domains, |         "include_images": include_images, | ||||||
|         'exclude_domains': exclude_domains, |         "include_domains": include_domains, | ||||||
|  |         "exclude_domains": exclude_domains, | ||||||
|     } |     } | ||||||
|     if search_type == 'search': |  | ||||||
|         data['include_answer'] = True |  | ||||||
|     elif search_type == 'news': |  | ||||||
|         data['topic'] = 'news' |  | ||||||
|     else: |  | ||||||
|         raise ValueError(f"Invalid search type {search_type}") |  | ||||||
| 
 | 
 | ||||||
|     params['url'] = search_url |     if include_images: | ||||||
|     params['method'] = 'POST' |         data["include_image_descriptions"] = True | ||||||
|     params['headers']['content-type'] = 'application/json' | 
 | ||||||
|     params['data'] = dumps(data) |     if topic == "general": | ||||||
|  |         data["include_answer"] = True | ||||||
|  | 
 | ||||||
|  |     elif topic == "news": | ||||||
|  |         data["topic"] = "news" | ||||||
|  |         data["days"] = days | ||||||
|  | 
 | ||||||
|  |     params["url"] = search_url | ||||||
|  |     params["method"] = "POST" | ||||||
|  |     params["headers"]["Content-type"] = "application/json" | ||||||
|  |     params["data"] = dumps(data) | ||||||
|  | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def response(resp): | def response(resp): | ||||||
|     results = [] |     results = [] | ||||||
|     json_resp = resp.json() |     data = resp.json() | ||||||
| 
 | 
 | ||||||
|     for result in json_resp.get('results', []): |     for result in data.get("results", []): | ||||||
|         results.append( |         results.append( | ||||||
|             { |             { | ||||||
|                 'title': result['title'], |                 "title": f"[{gettext('ai')}] {result['title']}", | ||||||
|                 'url': result['url'], |                 "url": result["url"], | ||||||
|                 'content': result['content'], |                 "content": result["content"], | ||||||
|                 'publishedDate': _parse_date(result.get('published_date')), |                 "publishedDate": _parse_date(result.get("published_date")), | ||||||
|             } |             } | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|     if json_resp['images']: |     img_list = data.get("images") | ||||||
|         results.append({'infobox': 'Tavily', 'img_src': json_resp['images'][0], 'content': json_resp['answer']}) |     if img_list: | ||||||
|     elif json_resp['answer']: |         content = data.get("answer") | ||||||
|         results.append({'answer': json_resp['answer']}) |         img_src = img_list[0] | ||||||
|  |         if isinstance(img_list[0], dict): | ||||||
|  |             img_src = img_list[0]["url"] | ||||||
|  |             img_caption = gettext("Image caption") + ": " + img_list[0]["description"] | ||||||
|  |             if not content: | ||||||
|  |                 gettext("Image caption") | ||||||
|  |                 content = img_caption | ||||||
|  |             else: | ||||||
|  |                 content += "//" + img_caption | ||||||
|  | 
 | ||||||
|  |         results.append( | ||||||
|  |             { | ||||||
|  |                 "infobox": f"Tavily [{gettext('ai')}]", | ||||||
|  |                 "img_src": img_src, | ||||||
|  |                 "content": content, | ||||||
|  |             } | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     elif data["answer"]: | ||||||
|  |         results.append({"answer": data["answer"]}) | ||||||
| 
 | 
 | ||||||
|     return results |     return results | ||||||
| 
 | 
 | ||||||
| @ -83,7 +215,26 @@ def response(resp): | |||||||
| def _parse_date(pubDate): | def _parse_date(pubDate): | ||||||
|     if pubDate is not None: |     if pubDate is not None: | ||||||
|         try: |         try: | ||||||
|             return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %Z') |             return datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z") | ||||||
|         except (ValueError, TypeError) as e: |         except (ValueError, TypeError) as e: | ||||||
|             logger.debug("ignore exception (publishedDate): %s", e) |             logger.debug("ignore exception (publishedDate): %s", e) | ||||||
|     return None |     return None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def init(engine_settings: dict): | ||||||
|  |     msg = [] | ||||||
|  | 
 | ||||||
|  |     val = engine_settings.get("api_key") or api_key | ||||||
|  |     if not val or val == "unset": | ||||||
|  |         msg.append("missing api_key") | ||||||
|  | 
 | ||||||
|  |     val = engine_settings.get("topic") or topic | ||||||
|  |     if val not in ["general", "news"]: | ||||||
|  |         msg.append(f"invalid topic: '{val}'") | ||||||
|  | 
 | ||||||
|  |     val = engine_settings.get("search_depth") or search_depth | ||||||
|  |     if val not in ["basic", "advanced"]: | ||||||
|  |         msg.append(f"invalid search_depth: '{val}'") | ||||||
|  | 
 | ||||||
|  |     if msg: | ||||||
|  |         raise ValueError(f"[{engine_settings['name']}] engine's settings: {' / '.join(msg)}") | ||||||
|  | |||||||
| @ -1828,21 +1828,28 @@ engines: | |||||||
|     shortcut: tm |     shortcut: tm | ||||||
|     disabled: true |     disabled: true | ||||||
| 
 | 
 | ||||||
|  |   # Tavily requires an API key as well as other configurations. Before you | ||||||
|  |   # activate these engines you should read the documentation. | ||||||
|  |   # --> https://docs.searxng.org/dev/engines/online/tavily.html | ||||||
|  |   # | ||||||
|   # - name: tavily |   # - name: tavily | ||||||
|   #   engine: tavily |   #   engine: tavily | ||||||
|   #   shortcut: tav |   #   shortcut: tav | ||||||
|   #   categories: general |   #   categories: [general, ai] | ||||||
|   #   # API key required, see: https://docs.tavily.com/docs/welcome#getting-started |   #   api_key: unset | ||||||
|   #   api_key: 'unset' |   #   topic: general | ||||||
|   #   include_images: false |   #   include_images: true | ||||||
|   #   timeout: 15.0 |   #   timeout: 15 | ||||||
|  |   #   disabled: true | ||||||
|  |   # | ||||||
|   # - name: tavily news |   # - name: tavily news | ||||||
|   #   engine: tavily |   #   engine: tavily | ||||||
|   #   shortcut: tavnews |   #   shortcut: tavnews | ||||||
|   #   categories: news |   #   categories: [news, ai] | ||||||
|   #   api_key: 'unset' |   #   api_key: unset | ||||||
|   #   search_type: news |   #   topic: news | ||||||
|   #   timeout: 15.0 |   #   timeout: 15 | ||||||
|  |   #   disabled: true | ||||||
| 
 | 
 | ||||||
|   # Requires Tor |   # Requires Tor | ||||||
|   - name: torch |   - name: torch | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user