[mod] add doc to tavily and slightly improve the engine
- Config options like ``search_type`` renamed to follow the upstream
  API (``topic``).
- Default ``max_results`` is set to 5
- use image description if one exists
- add an init function to check engine's settings
- settings example: additional category 'ai'
To review the added documentation of this path::
    make docs.live
and jump to: http://0.0.0.0:8000/dev/engines/online/tavily.html
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									d8a4d589eb
								
							
						
					
					
						commit
						1273ed7f7d
					
				
							
								
								
									
										8
									
								
								docs/dev/engines/online/tavily.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								docs/dev/engines/online/tavily.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,8 @@ | ||||
| .. _tavily engine: | ||||
| 
 | ||||
| ====== | ||||
| Tavily | ||||
| ====== | ||||
| 
 | ||||
| .. automodule:: searx.engines.tavily | ||||
|    :members: | ||||
| @ -1,81 +1,213 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
| Tavily AI Engine | ||||
| 
 | ||||
| .. sidebar:: info | ||||
| 
 | ||||
|    Before reporting an issue with this engine, | ||||
|    please consult `API error codes`_. | ||||
| 
 | ||||
| Tavily_ search API (AI engine).  This engine implements the REST API | ||||
| (`POST /search`_) and does not make use of the `Tavily Python Wrapper`_. | ||||
| 
 | ||||
| From the API response this engine generates *result items* (shown in the main | ||||
| result list) and an *answer result* (shown on top of the main result list). | ||||
| If the *answer* from Tavily contains an image, the *answer result* is turned | ||||
| into a *infobox result*. | ||||
| 
 | ||||
| .. attention:: | ||||
| 
 | ||||
|    AI queries take considerably longer to process than queries to conventional | ||||
|    search engines.  The ``timeout`` should therefore also be set considerably | ||||
|    higher, but it is not recommended to activate AI queries by default | ||||
|    (set ``disabled: true``), as otherwise all user searches will have to wait | ||||
|    for the AI. | ||||
| 
 | ||||
| .. _Tavily: https://tavily.com/ | ||||
| .. _Tavily Python Wrapper: https://pypi.org/project/tavily-python/ | ||||
| .. _POST /search: https://docs.tavily.com/docs/rest-api/api-reference#endpoint-post-search | ||||
| .. _Tavily API Credit Deduction: | ||||
|    https://docs.tavily.com/docs/rest-api/api-reference#tavily-api-credit-deduction-overview | ||||
| .. _Getting started: https://docs.tavily.com/docs/welcome#getting-started | ||||
| .. _API error codes: https://docs.tavily.com/docs/rest-api/api-reference#error-codes | ||||
| 
 | ||||
| Configuration | ||||
| ============= | ||||
| 
 | ||||
| The engine has the following mandatory setting: | ||||
| 
 | ||||
| - :py:obj:`api_key` | ||||
| - :py:obj:`topic` | ||||
| 
 | ||||
| Optional settings are: | ||||
| 
 | ||||
| - :py:obj:`days` | ||||
| - :py:obj:`search_depth` | ||||
| - :py:obj:`max_results` | ||||
| - :py:obj:`include_images` | ||||
| - :py:obj:`include_domains` | ||||
| - :py:obj:`exclude_domains` | ||||
| 
 | ||||
| Example configuration for general search queries: | ||||
| 
 | ||||
| .. code:: yaml | ||||
| 
 | ||||
|   - name: tavily | ||||
|     engine: tavily | ||||
|     shortcut: tav | ||||
|     categories: [general, ai] | ||||
|     api_key: xxxxxxxx | ||||
|     topic: general | ||||
|     include_images: true | ||||
|     timeout: 15 | ||||
|     disabled: true | ||||
| 
 | ||||
| Example configuration for news search: | ||||
| 
 | ||||
| .. code:: yaml | ||||
| 
 | ||||
|   - name: tavily news | ||||
|     engine: tavily | ||||
|     shortcut: tavnews | ||||
|     categories: [news, ai] | ||||
|     api_key: xxxxxxxx | ||||
|     topic: news | ||||
|     timeout: 15 | ||||
|     disabled: true | ||||
| 
 | ||||
| 
 | ||||
| Implementation | ||||
| ============== | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| from json import dumps | ||||
| from datetime import datetime | ||||
| from searx.exceptions import SearxEngineAPIException | ||||
| from flask_babel import gettext | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://tavily.com/', | ||||
|     "website": "https://tavily.com/", | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": 'https://docs.tavily.com/docs/rest-api/api-reference', | ||||
|     "official_api_documentation": "https://docs.tavily.com/docs/rest-api/api-reference", | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": True, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| search_url = 'https://api.tavily.com/search' | ||||
| search_url = "https://api.tavily.com/search" | ||||
| paging = False | ||||
| time_range_support = True | ||||
| 
 | ||||
| search_type = 'search'  # possible values: search, news | ||||
| api_key = 'unset' | ||||
| max_results = 20 | ||||
| search_depth = 'basic'  # The depth of the search. It can be "basic" or "advanced". | ||||
| include_images = False  # Include query-related images. Turns answer into infobox with first image. | ||||
| include_domains = []  # A list of domains to specifically include in the search results. | ||||
| exclude_domains = []  # A list of domains to specifically exclude from the search results. | ||||
| api_key: str = "unset" | ||||
| """Tavily API Key (`Getting started`_).""" | ||||
| 
 | ||||
| search_depth: str = "basic" | ||||
| """The depth of the search.  It can be ``basic`` or ``advanced``.  Default is | ||||
| ``basic`` unless specified otherwise in a given method. | ||||
| 
 | ||||
| - have an eye on your `Tavily API Credit Deduction`_! | ||||
| """ | ||||
| 
 | ||||
| topic: str = "" | ||||
| """The category of the search.  This will determine which of tavily's agents | ||||
| will be used for the search.  Currently: only ``general`` and ``news`` are | ||||
| supported and ``general`` will implicitly activate ``include_answer`` in the | ||||
| `POST /search`_ API.""" | ||||
| 
 | ||||
| days: int = 3 | ||||
| """The number of days back from the current date to include in the search results. | ||||
| This specifies the time frame of data to be retrieved.  Please note that this | ||||
| feature is only available when using the ``news`` search topic. Default is 3.""" | ||||
| 
 | ||||
| max_results: int = 5 | ||||
| """The maximum number of search results to return.  Default is 5.""" | ||||
| 
 | ||||
| include_images: bool = False | ||||
| """Include a list of query-related images in the response.  Turns answer into | ||||
| infobox with first image (as far there are any images in the response).  Will | ||||
| implicitly activate ``include_image_descriptions`` in the `POST /search`_ API | ||||
| (adds descriptive text for each image). | ||||
| """ | ||||
| 
 | ||||
| include_domains: list[str] = [] | ||||
| """A list of domains to specifically include in the search results. Default | ||||
| is ``[]```, which includes all domains.""" | ||||
| 
 | ||||
| exclude_domains: list[str] = [] | ||||
| """A list of domains to specifically exclude from the search results. Default | ||||
| is ``[]``, which doesn't exclude any domains. | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     if api_key == 'unset': | ||||
|         raise SearxEngineAPIException('missing Tavily API key') | ||||
| 
 | ||||
|     data = { | ||||
|         'query': query, | ||||
|         'api_key': api_key, | ||||
|         'search_depth': 'basic', | ||||
|         'time_range': params["time_range"], | ||||
|         'max_results': max_results, | ||||
|         'include_images': include_images, | ||||
|         'include_domains': include_domains, | ||||
|         'exclude_domains': exclude_domains, | ||||
|         "query": query, | ||||
|         "api_key": api_key, | ||||
|         "search_depth": search_depth, | ||||
|         "topic": topic, | ||||
|         "time_range": params["time_range"], | ||||
|         "max_results": max_results, | ||||
|         "include_images": include_images, | ||||
|         "include_domains": include_domains, | ||||
|         "exclude_domains": exclude_domains, | ||||
|     } | ||||
|     if search_type == 'search': | ||||
|         data['include_answer'] = True | ||||
|     elif search_type == 'news': | ||||
|         data['topic'] = 'news' | ||||
|     else: | ||||
|         raise ValueError(f"Invalid search type {search_type}") | ||||
| 
 | ||||
|     params['url'] = search_url | ||||
|     params['method'] = 'POST' | ||||
|     params['headers']['content-type'] = 'application/json' | ||||
|     params['data'] = dumps(data) | ||||
|     if include_images: | ||||
|         data["include_image_descriptions"] = True | ||||
| 
 | ||||
|     if topic == "general": | ||||
|         data["include_answer"] = True | ||||
| 
 | ||||
|     elif topic == "news": | ||||
|         data["topic"] = "news" | ||||
|         data["days"] = days | ||||
| 
 | ||||
|     params["url"] = search_url | ||||
|     params["method"] = "POST" | ||||
|     params["headers"]["Content-type"] = "application/json" | ||||
|     params["data"] = dumps(data) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     results = [] | ||||
|     json_resp = resp.json() | ||||
|     data = resp.json() | ||||
| 
 | ||||
|     for result in json_resp.get('results', []): | ||||
|     for result in data.get("results", []): | ||||
|         results.append( | ||||
|             { | ||||
|                 'title': result['title'], | ||||
|                 'url': result['url'], | ||||
|                 'content': result['content'], | ||||
|                 'publishedDate': _parse_date(result.get('published_date')), | ||||
|                 "title": f"[{gettext('ai')}] {result['title']}", | ||||
|                 "url": result["url"], | ||||
|                 "content": result["content"], | ||||
|                 "publishedDate": _parse_date(result.get("published_date")), | ||||
|             } | ||||
|         ) | ||||
| 
 | ||||
|     if json_resp['images']: | ||||
|         results.append({'infobox': 'Tavily', 'img_src': json_resp['images'][0], 'content': json_resp['answer']}) | ||||
|     elif json_resp['answer']: | ||||
|         results.append({'answer': json_resp['answer']}) | ||||
|     img_list = data.get("images") | ||||
|     if img_list: | ||||
|         content = data.get("answer") | ||||
|         img_src = img_list[0] | ||||
|         if isinstance(img_list[0], dict): | ||||
|             img_src = img_list[0]["url"] | ||||
|             img_caption = gettext("Image caption") + ": " + img_list[0]["description"] | ||||
|             if not content: | ||||
|                 gettext("Image caption") | ||||
|                 content = img_caption | ||||
|             else: | ||||
|                 content += "//" + img_caption | ||||
| 
 | ||||
|         results.append( | ||||
|             { | ||||
|                 "infobox": f"Tavily [{gettext('ai')}]", | ||||
|                 "img_src": img_src, | ||||
|                 "content": content, | ||||
|             } | ||||
|         ) | ||||
| 
 | ||||
|     elif data["answer"]: | ||||
|         results.append({"answer": data["answer"]}) | ||||
| 
 | ||||
|     return results | ||||
| 
 | ||||
| @ -83,7 +215,26 @@ def response(resp): | ||||
| def _parse_date(pubDate): | ||||
|     if pubDate is not None: | ||||
|         try: | ||||
|             return datetime.strptime(pubDate, '%a, %d %b %Y %H:%M:%S %Z') | ||||
|             return datetime.strptime(pubDate, "%a, %d %b %Y %H:%M:%S %Z") | ||||
|         except (ValueError, TypeError) as e: | ||||
|             logger.debug("ignore exception (publishedDate): %s", e) | ||||
|     return None | ||||
| 
 | ||||
| 
 | ||||
| def init(engine_settings: dict): | ||||
|     msg = [] | ||||
| 
 | ||||
|     val = engine_settings.get("api_key") or api_key | ||||
|     if not val or val == "unset": | ||||
|         msg.append("missing api_key") | ||||
| 
 | ||||
|     val = engine_settings.get("topic") or topic | ||||
|     if val not in ["general", "news"]: | ||||
|         msg.append(f"invalid topic: '{val}'") | ||||
| 
 | ||||
|     val = engine_settings.get("search_depth") or search_depth | ||||
|     if val not in ["basic", "advanced"]: | ||||
|         msg.append(f"invalid search_depth: '{val}'") | ||||
| 
 | ||||
|     if msg: | ||||
|         raise ValueError(f"[{engine_settings['name']}] engine's settings: {' / '.join(msg)}") | ||||
|  | ||||
| @ -1828,21 +1828,28 @@ engines: | ||||
|     shortcut: tm | ||||
|     disabled: true | ||||
| 
 | ||||
|   # Tavily requires an API key as well as other configurations. Before you | ||||
|   # activate these engines you should read the documentation. | ||||
|   # --> https://docs.searxng.org/dev/engines/online/tavily.html | ||||
|   # | ||||
|   # - name: tavily | ||||
|   #   engine: tavily | ||||
|   #   shortcut: tav | ||||
|   #   categories: general | ||||
|   #   # API key required, see: https://docs.tavily.com/docs/welcome#getting-started | ||||
|   #   api_key: 'unset' | ||||
|   #   include_images: false | ||||
|   #   timeout: 15.0 | ||||
|   #   categories: [general, ai] | ||||
|   #   api_key: unset | ||||
|   #   topic: general | ||||
|   #   include_images: true | ||||
|   #   timeout: 15 | ||||
|   #   disabled: true | ||||
|   # | ||||
|   # - name: tavily news | ||||
|   #   engine: tavily | ||||
|   #   shortcut: tavnews | ||||
|   #   categories: news | ||||
|   #   api_key: 'unset' | ||||
|   #   search_type: news | ||||
|   #   timeout: 15.0 | ||||
|   #   categories: [news, ai] | ||||
|   #   api_key: unset | ||||
|   #   topic: news | ||||
|   #   timeout: 15 | ||||
|   #   disabled: true | ||||
| 
 | ||||
|   # Requires Tor | ||||
|   - name: torch | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user