| 
									
										
										
										
											2021-05-21 17:31:22 +02:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | # lint: pylint | 
					
						
							| 
									
										
										
										
											2021-09-06 08:49:13 +02:00
										 |  |  | # pylint: disable=missing-module-docstring, too-few-public-methods | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-10 18:08:14 +02:00
										 |  |  | import typing | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | import threading | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  | from timeit import default_timer | 
					
						
							| 
									
										
										
										
											2016-09-06 00:36:33 +02:00
										 |  |  | from uuid import uuid4 | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  | from searx import settings | 
					
						
							| 
									
										
										
										
											2016-11-19 20:53:51 +01:00
										 |  |  | from searx.answerers import ask | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  | from searx.external_bang import get_bang_url | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  | from searx.results import ResultContainer | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | from searx import logger | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | from searx.plugins import plugins | 
					
						
							| 
									
										
										
										
											2021-01-05 11:24:39 +01:00
										 |  |  | from searx.search.models import EngineRef, SearchQuery | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  | from searx.engines import load_engines | 
					
						
							| 
									
										
										
										
											2021-09-23 11:31:29 +02:00
										 |  |  | from searx.network import initialize as initialize_network, check_network_configuration | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  | from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  | from searx.search.processors import PROCESSORS, initialize as initialize_processors | 
					
						
							|  |  |  | from searx.search.checker import initialize as initialize_checker | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | logger = logger.getChild('search') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-23 11:31:29 +02:00
										 |  |  | def initialize(settings_engines=None, enable_checker=False, check_network=False): | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  |     settings_engines = settings_engines or settings['engines'] | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  |     load_engines(settings_engines) | 
					
						
							|  |  |  |     initialize_network(settings_engines, settings['outgoing']) | 
					
						
							| 
									
										
										
										
											2021-09-23 11:31:29 +02:00
										 |  |  |     if check_network: | 
					
						
							|  |  |  |         check_network_configuration() | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  |     initialize_metrics([engine['name'] for engine in settings_engines]) | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  |     initialize_processors(settings_engines) | 
					
						
							| 
									
										
										
										
											2021-01-05 11:24:39 +01:00
										 |  |  |     if enable_checker: | 
					
						
							|  |  |  |         initialize_checker() | 
					
						
							| 
									
										
										
										
											2020-12-24 09:28:16 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-22 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  | class Search: | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     """Search information container""" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-10 18:08:14 +02:00
										 |  |  |     __slots__ = "search_query", "result_container", "start_time", "actual_timeout" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-09 11:23:57 +02:00
										 |  |  |     def __init__(self, search_query: SearchQuery): | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         # init vars | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  |         super().__init__() | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         self.search_query = search_query | 
					
						
							|  |  |  |         self.result_container = ResultContainer() | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         self.start_time = None | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         self.actual_timeout = None | 
					
						
							| 
									
										
										
										
											2016-02-20 00:21:56 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |     def search_external_bang(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Check if there is a external bang. | 
					
						
							|  |  |  |         If yes, update self.result_container and return True | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  |         if self.search_query.external_bang: | 
					
						
							|  |  |  |             self.result_container.redirect_url = get_bang_url(self.search_query) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # This means there was a valid bang and the | 
					
						
							|  |  |  |             # rest of the search does not need to be continued | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  |             if isinstance(self.result_container.redirect_url, str): | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |                 return True | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def search_answerers(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Check if an answer return a result. | 
					
						
							|  |  |  |         If yes, update self.result_container and return True | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2016-11-19 20:53:51 +01:00
										 |  |  |         answerers_results = ask(self.search_query) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if answerers_results: | 
					
						
							|  |  |  |             for results in answerers_results: | 
					
						
							|  |  |  |                 self.result_container.extend('answer', results) | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             return True | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # do search-request | 
					
						
							|  |  |  |     def _get_requests(self): | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # init vars | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         requests = [] | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |         # max of all selected engine timeout | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         default_timeout = 0 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # start search-reqest for all selected engines | 
					
						
							| 
									
										
										
										
											2020-09-22 16:22:22 +02:00
										 |  |  |         for engineref in self.search_query.engineref_list: | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  |             processor = PROCESSORS[engineref.name] | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 |  |  |             # stop the request now if the engine is suspend | 
					
						
							|  |  |  |             if processor.extend_container_if_suspended(self.result_container): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # set default request parameters | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  |             request_params = processor.get_params(self.search_query, engineref.category) | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             if request_params is None: | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  |             counter_inc('engine', engineref.name, 'search', 'count', 'sent') | 
					
						
							| 
									
										
										
										
											2020-12-17 16:49:48 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |             # append request to list | 
					
						
							| 
									
										
										
										
											2020-09-22 16:22:22 +02:00
										 |  |  |             requests.append((engineref.name, self.search_query.query, request_params)) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |             # update default_timeout | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  |             default_timeout = max(default_timeout, processor.engine.timeout) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # adjust timeout | 
					
						
							| 
									
										
										
										
											2021-06-01 08:07:26 +02:00
										 |  |  |         max_request_timeout = settings['outgoing']['max_request_timeout'] | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         actual_timeout = default_timeout | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         query_timeout = self.search_query.timeout_limit | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if max_request_timeout is None and query_timeout is None: | 
					
						
							|  |  |  |             # No max, no user query: default_timeout | 
					
						
							|  |  |  |             pass | 
					
						
							|  |  |  |         elif max_request_timeout is None and query_timeout is not None: | 
					
						
							|  |  |  |             # No max, but user query: From user query except if above default | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             actual_timeout = min(default_timeout, query_timeout) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         elif max_request_timeout is not None and query_timeout is None: | 
					
						
							|  |  |  |             # Max, no user query: Default except if above max | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             actual_timeout = min(default_timeout, max_request_timeout) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         elif max_request_timeout is not None and query_timeout is not None: | 
					
						
							|  |  |  |             # Max & user query: From user query except if above max | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             actual_timeout = min(query_timeout, max_request_timeout) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         logger.debug( | 
					
						
							|  |  |  |             "actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})".format( | 
					
						
							|  |  |  |                 actual_timeout, default_timeout, query_timeout, max_request_timeout | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         return requests, actual_timeout | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |     def search_multiple_requests(self, requests): | 
					
						
							| 
									
										
										
										
											2021-05-21 17:31:22 +02:00
										 |  |  |         # pylint: disable=protected-access | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |         search_id = uuid4().__str__() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for engine_name, query, request_params in requests: | 
					
						
							| 
									
										
										
										
											2021-05-21 17:31:22 +02:00
										 |  |  |             th = threading.Thread(  # pylint: disable=invalid-name | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  |                 target=PROCESSORS[engine_name].search, | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  |                 args=(query, request_params, self.result_container, self.start_time, self.actual_timeout), | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |                 name=search_id, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             th._timeout = False | 
					
						
							|  |  |  |             th._engine_name = engine_name | 
					
						
							|  |  |  |             th.start() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-21 17:31:22 +02:00
										 |  |  |         for th in threading.enumerate():  # pylint: disable=invalid-name | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |             if th.name == search_id: | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  |                 remaining_time = max(0.0, self.actual_timeout - (default_timer() - self.start_time)) | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |                 th.join(remaining_time) | 
					
						
							|  |  |  |                 if th.is_alive(): | 
					
						
							|  |  |  |                     th._timeout = True | 
					
						
							|  |  |  |                     self.result_container.add_unresponsive_engine(th._engine_name, 'timeout') | 
					
						
							| 
									
										
										
										
											2021-09-06 19:46:08 +02:00
										 |  |  |                     PROCESSORS[th._engine_name].logger.error('engine timeout') | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |     def search_standard(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Update self.result_container, self.actual_timeout | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         requests, self.actual_timeout = self._get_requests() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         # send all search-request | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |         if requests: | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |             self.search_multiple_requests(requests) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # return results, suggestions, answers and infoboxes | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # do search-request | 
					
						
							| 
									
										
										
										
											2021-09-09 11:23:57 +02:00
										 |  |  |     def search(self) -> ResultContainer: | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  |         self.start_time = default_timer() | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         if not self.search_external_bang(): | 
					
						
							|  |  |  |             if not self.search_answerers(): | 
					
						
							|  |  |  |                 self.search_standard() | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         return self.result_container | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SearchWithPlugins(Search): | 
					
						
							| 
									
										
										
										
											2021-09-09 11:23:57 +02:00
										 |  |  |     """Inherit from the Search class, add calls to the plugins.""" | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-10 18:08:14 +02:00
										 |  |  |     __slots__ = 'ordered_plugin_list', 'request' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-09 11:23:57 +02:00
										 |  |  |     def __init__(self, search_query: SearchQuery, ordered_plugin_list, request: "flask.Request"): | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  |         super().__init__(search_query) | 
					
						
							| 
									
										
										
										
											2017-01-02 12:06:04 +01:00
										 |  |  |         self.ordered_plugin_list = ordered_plugin_list | 
					
						
							| 
									
										
										
										
											2021-09-06 08:49:13 +02:00
										 |  |  |         self.result_container.on_result = self._on_result | 
					
						
							|  |  |  |         # pylint: disable=line-too-long | 
					
						
							|  |  |  |         # get the "real" request to use it outside the Flask context. | 
					
						
							|  |  |  |         # see | 
					
						
							|  |  |  |         # * https://github.com/pallets/flask/blob/d01d26e5210e3ee4cbbdef12f05c886e08e92852/src/flask/globals.py#L55 | 
					
						
							|  |  |  |         # * https://github.com/pallets/werkzeug/blob/3c5d3c9bd0d9ce64590f0af8997a38f3823b368d/src/werkzeug/local.py#L548-L559 | 
					
						
							|  |  |  |         # * https://werkzeug.palletsprojects.com/en/2.0.x/local/#werkzeug.local.LocalProxy._get_current_object | 
					
						
							|  |  |  |         # pylint: enable=line-too-long | 
					
						
							|  |  |  |         self.request = request._get_current_object() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _on_result(self, result): | 
					
						
							|  |  |  |         return plugins.call(self.ordered_plugin_list, 'on_result', self.request, self, result) | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-09 11:23:57 +02:00
										 |  |  |     def search(self) -> ResultContainer: | 
					
						
							| 
									
										
										
										
											2017-01-02 12:06:04 +01:00
										 |  |  |         if plugins.call(self.ordered_plugin_list, 'pre_search', self.request, self): | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  |             super().search() | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-02 12:06:04 +01:00
										 |  |  |         plugins.call(self.ordered_plugin_list, 'post_search', self.request, self) | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-06 08:49:13 +02:00
										 |  |  |         self.result_container.close() | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         return self.result_container |