| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | '''
 | 
					
						
							|  |  |  | searx is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | searx is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | GNU Affero General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | 
					
						
							|  |  |  | '''
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-10 18:08:14 +02:00
										 |  |  | import typing | 
					
						
							| 
									
										
										
										
											2016-07-31 23:39:58 +02:00
										 |  |  | import gc | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | import threading | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  | from timeit import default_timer | 
					
						
							| 
									
										
										
										
											2016-09-06 00:36:33 +02:00
										 |  |  | from uuid import uuid4 | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from _thread import start_new_thread | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  | from searx import settings | 
					
						
							| 
									
										
										
										
											2016-11-19 20:53:51 +01:00
										 |  |  | from searx.answerers import ask | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  | from searx.external_bang import get_bang_url | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  | from searx.results import ResultContainer | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | from searx import logger | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | from searx.plugins import plugins | 
					
						
							| 
									
										
										
										
											2021-01-05 11:24:39 +01:00
										 |  |  | from searx.search.models import EngineRef, SearchQuery | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  | from searx.engines import load_engines | 
					
						
							|  |  |  | from searx.network import initialize as initialize_network | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  | from searx.metrics import initialize as initialize_metrics, counter_inc, histogram_observe_time | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  | from searx.search.processors import PROCESSORS, initialize as initialize_processors | 
					
						
							|  |  |  | from searx.search.checker import initialize as initialize_checker | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | logger = logger.getChild('search') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  | max_request_timeout = settings.get('outgoing', {}).get('max_request_timeout' or None) | 
					
						
							|  |  |  | if max_request_timeout is None: | 
					
						
							|  |  |  |     logger.info('max_request_timeout={0}'.format(max_request_timeout)) | 
					
						
							|  |  |  | else: | 
					
						
							|  |  |  |     if isinstance(max_request_timeout, float): | 
					
						
							|  |  |  |         logger.info('max_request_timeout={0} second(s)'.format(max_request_timeout)) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         logger.critical('outgoing.max_request_timeout if defined has to be float') | 
					
						
							| 
									
										
										
										
											2020-11-16 09:43:23 +01:00
										 |  |  |         import sys | 
					
						
							|  |  |  |         sys.exit(1) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-05 11:24:39 +01:00
										 |  |  | def initialize(settings_engines=None, enable_checker=False): | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  |     settings_engines = settings_engines or settings['engines'] | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  |     load_engines(settings_engines) | 
					
						
							|  |  |  |     initialize_network(settings_engines, settings['outgoing']) | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  |     initialize_metrics([engine['name'] for engine in settings_engines]) | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  |     initialize_processors(settings_engines) | 
					
						
							| 
									
										
										
										
											2021-01-05 11:24:39 +01:00
										 |  |  |     if enable_checker: | 
					
						
							|  |  |  |         initialize_checker() | 
					
						
							| 
									
										
										
										
											2020-12-24 09:28:16 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-22 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  | class Search: | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     """Search information container""" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-10 18:08:14 +02:00
										 |  |  |     __slots__ = "search_query", "result_container", "start_time", "actual_timeout" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     def __init__(self, search_query): | 
					
						
							|  |  |  |         # init vars | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  |         super().__init__() | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         self.search_query = search_query | 
					
						
							|  |  |  |         self.result_container = ResultContainer() | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         self.start_time = None | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         self.actual_timeout = None | 
					
						
							| 
									
										
										
										
											2016-02-20 00:21:56 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |     def search_external_bang(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Check if there is a external bang. | 
					
						
							|  |  |  |         If yes, update self.result_container and return True | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  |         if self.search_query.external_bang: | 
					
						
							|  |  |  |             self.result_container.redirect_url = get_bang_url(self.search_query) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # This means there was a valid bang and the | 
					
						
							|  |  |  |             # rest of the search does not need to be continued | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  |             if isinstance(self.result_container.redirect_url, str): | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |                 return True | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def search_answerers(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Check if an answer return a result. | 
					
						
							|  |  |  |         If yes, update self.result_container and return True | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2016-11-19 20:53:51 +01:00
										 |  |  |         answerers_results = ask(self.search_query) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if answerers_results: | 
					
						
							|  |  |  |             for results in answerers_results: | 
					
						
							|  |  |  |                 self.result_container.extend('answer', results) | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             return True | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # do search-request | 
					
						
							|  |  |  |     def _get_requests(self): | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # init vars | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         requests = [] | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |         # max of all selected engine timeout | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         default_timeout = 0 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # start search-reqest for all selected engines | 
					
						
							| 
									
										
										
										
											2020-09-22 16:22:22 +02:00
										 |  |  |         for engineref in self.search_query.engineref_list: | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  |             processor = PROCESSORS[engineref.name] | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 |  |  |             # stop the request now if the engine is suspend | 
					
						
							|  |  |  |             if processor.extend_container_if_suspended(self.result_container): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # set default request parameters | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  |             request_params = processor.get_params(self.search_query, engineref.category) | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             if request_params is None: | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  |             counter_inc('engine', engineref.name, 'search', 'count', 'sent') | 
					
						
							| 
									
										
										
										
											2020-12-17 16:49:48 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |             # append request to list | 
					
						
							| 
									
										
										
										
											2020-09-22 16:22:22 +02:00
										 |  |  |             requests.append((engineref.name, self.search_query.query, request_params)) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |             # update default_timeout | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  |             default_timeout = max(default_timeout, processor.engine.timeout) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # adjust timeout | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         actual_timeout = default_timeout | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         query_timeout = self.search_query.timeout_limit | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if max_request_timeout is None and query_timeout is None: | 
					
						
							|  |  |  |             # No max, no user query: default_timeout | 
					
						
							|  |  |  |             pass | 
					
						
							|  |  |  |         elif max_request_timeout is None and query_timeout is not None: | 
					
						
							|  |  |  |             # No max, but user query: From user query except if above default | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             actual_timeout = min(default_timeout, query_timeout) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         elif max_request_timeout is not None and query_timeout is None: | 
					
						
							|  |  |  |             # Max, no user query: Default except if above max | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             actual_timeout = min(default_timeout, max_request_timeout) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         elif max_request_timeout is not None and query_timeout is not None: | 
					
						
							|  |  |  |             # Max & user query: From user query except if above max | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |             actual_timeout = min(query_timeout, max_request_timeout) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         logger.debug("actual_timeout={0} (default_timeout={1}, ?timeout_limit={2}, max_request_timeout={3})" | 
					
						
							| 
									
										
										
										
											2020-09-10 18:08:14 +02:00
										 |  |  |                      .format(actual_timeout, default_timeout, query_timeout, max_request_timeout)) | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         return requests, actual_timeout | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |     def search_multiple_requests(self, requests): | 
					
						
							|  |  |  |         search_id = uuid4().__str__() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for engine_name, query, request_params in requests: | 
					
						
							|  |  |  |             th = threading.Thread( | 
					
						
							| 
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 |  |  |                 target=PROCESSORS[engine_name].search, | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  |                 args=(query, request_params, self.result_container, self.start_time, self.actual_timeout), | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |                 name=search_id, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             th._timeout = False | 
					
						
							|  |  |  |             th._engine_name = engine_name | 
					
						
							|  |  |  |             th.start() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for th in threading.enumerate(): | 
					
						
							|  |  |  |             if th.name == search_id: | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  |                 remaining_time = max(0.0, self.actual_timeout - (default_timer() - self.start_time)) | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |                 th.join(remaining_time) | 
					
						
							|  |  |  |                 if th.is_alive(): | 
					
						
							|  |  |  |                     th._timeout = True | 
					
						
							|  |  |  |                     self.result_container.add_unresponsive_engine(th._engine_name, 'timeout') | 
					
						
							|  |  |  |                     logger.warning('engine timeout: {0}'.format(th._engine_name)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |     def search_standard(self): | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         Update self.result_container, self.actual_timeout | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         requests, self.actual_timeout = self._get_requests() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         # send all search-request | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |         if requests: | 
					
						
							| 
									
										
										
										
											2020-12-15 14:50:17 +01:00
										 |  |  |             self.search_multiple_requests(requests) | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |             start_new_thread(gc.collect, tuple()) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # return results, suggestions, answers and infoboxes | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # do search-request | 
					
						
							|  |  |  |     def search(self): | 
					
						
							| 
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 |  |  |         self.start_time = default_timer() | 
					
						
							| 
									
										
										
										
											2020-09-14 13:21:21 +02:00
										 |  |  |         if not self.search_external_bang(): | 
					
						
							|  |  |  |             if not self.search_answerers(): | 
					
						
							|  |  |  |                 self.search_standard() | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         return self.result_container | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SearchWithPlugins(Search): | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  |     """Similar to the Search class but call the plugins.""" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-10 18:08:14 +02:00
										 |  |  |     __slots__ = 'ordered_plugin_list', 'request' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-02 12:06:04 +01:00
										 |  |  |     def __init__(self, search_query, ordered_plugin_list, request): | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  |         super().__init__(search_query) | 
					
						
							| 
									
										
										
										
											2017-01-02 12:06:04 +01:00
										 |  |  |         self.ordered_plugin_list = ordered_plugin_list | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         self.request = request | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def search(self): | 
					
						
							| 
									
										
										
										
											2017-01-02 12:06:04 +01:00
										 |  |  |         if plugins.call(self.ordered_plugin_list, 'pre_search', self.request, self): | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  |             super().search() | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-01-02 12:06:04 +01:00
										 |  |  |         plugins.call(self.ordered_plugin_list, 'post_search', self.request, self) | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         results = self.result_container.get_ordered_results() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for result in results: | 
					
						
							| 
									
										
										
										
											2017-01-02 12:06:04 +01:00
										 |  |  |             plugins.call(self.ordered_plugin_list, 'on_result', self.request, self, result) | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         return self.result_container |