2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# SPDX-License-Identifier: AGPL-3.0-or-later
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 09:53:03 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								"""Abstract base classes for engine request processors.
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-27 15:13:39 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"""
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								import threading
							 | 
						
					
						
							
								
									
										
										
										
											2021-01-17 16:14:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from abc import abstractmethod, ABC
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from timeit import default_timer
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-17 09:04:51 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from typing import Dict, Union
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-06 19:46:08 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from searx import settings, logger
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from searx.engines import engines
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from searx.network import get_time_for_thread, get_network
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from searx.metrics import histogram_observe, counter_inc, count_exception, count_error
							 | 
						
					
						
							
								
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineResponseException
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from searx.utils import get_engine_from_settings
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								logger = logger.getChild('searx.search.processor')
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-17 09:04:51 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								SUSPENDED_STATUS: Dict[Union[int, str], 'SuspendedStatus'] = {}
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								class SuspendedStatus:
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-27 15:13:39 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Class to handle suspend state."""
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    __slots__ = 'suspend_end_time', 'suspend_reason', 'continuous_errors', 'lock'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def __init__(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.lock = threading.Lock()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.continuous_errors = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.suspend_end_time = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.suspend_reason = None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    @property
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def is_suspended(self):
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return self.suspend_end_time >= default_timer()
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def suspend(self, suspended_time, suspend_reason):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        with self.lock:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # update continuous_errors / suspend_end_time
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.continuous_errors += 1
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if suspended_time is None:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								                suspended_time = min(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    settings['search']['max_ban_time_on_fail'],
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                    self.continuous_errors * settings['search']['ban_time_on_fail'],
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                )
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            self.suspend_end_time = default_timer() + suspended_time
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            self.suspend_reason = suspend_reason
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-06 19:46:08 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            logger.debug('Suspend for %i seconds', suspended_time)
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def resume(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        with self.lock:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # reset the suspend variables
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.continuous_errors = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.suspend_end_time = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.suspend_reason = None
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-01-17 16:14:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								class EngineProcessor(ABC):
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 09:53:03 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    """Base classes used for all types of request processors."""
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-06 19:46:08 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    __slots__ = 'engine', 'engine_name', 'lock', 'suspended_status', 'logger'
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-01-17 09:04:51 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def __init__(self, engine, engine_name: str):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.engine = engine
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.engine_name = engine_name
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-06 19:46:08 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        self.logger = engines[engine_name].logger
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        key = get_network(self.engine_name)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        key = id(key) if key else self.engine_name
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        self.suspended_status = SUSPENDED_STATUS.setdefault(key, SuspendedStatus())
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def initialize(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.engine.init(get_engine_from_settings(self.engine_name))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        except SearxEngineResponseException as exc:
							 | 
						
					
						
							
								
									
										
										
										
											2023-05-19 16:05:29 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            self.logger.warning('Fail to initialize // %s', exc)
							 | 
						
					
						
							
								
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        except Exception:  # pylint: disable=broad-except
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-06 19:46:08 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            self.logger.exception('Fail to initialize')
							 | 
						
					
						
							
								
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							
								
									
										
										
										
											2021-09-06 19:46:08 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            self.logger.debug('Initialized')
							 | 
						
					
						
							
								
									
										
										
										
											2021-05-05 13:08:54 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    @property
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def has_initialize_function(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return hasattr(self.engine, 'init')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-26 11:12:02 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def handle_exception(self, result_container, exception_or_message, suspend=False):
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # update result_container
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-26 11:12:02 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if isinstance(exception_or_message, BaseException):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            exception_class = exception_or_message.__class__
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            module_name = getattr(exception_class, '__module__', 'builtins')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            module_name = '' if module_name == 'builtins' else module_name + '.'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            error_message = module_name + exception_class.__qualname__
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            error_message = exception_or_message
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        result_container.add_unresponsive_engine(self.engine_name, error_message)
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # metrics
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        counter_inc('engine', self.engine_name, 'search', 'count', 'error')
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-26 11:12:02 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        if isinstance(exception_or_message, BaseException):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            count_exception(self.engine_name, exception_or_message)
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-26 11:12:02 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            count_error(self.engine_name, exception_or_message)
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # suspend the engine ?
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if suspend:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            suspended_time = None
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-26 11:12:02 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            if isinstance(exception_or_message, SearxEngineAccessDeniedException):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                suspended_time = exception_or_message.suspended_time
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.suspended_status.suspend(suspended_time, error_message)  # pylint: disable=no-member
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def _extend_container_basic(self, result_container, start_time, search_results):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # update result_container
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        result_container.extend(self.engine_name, search_results)
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        engine_time = default_timer() - start_time
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        page_load_time = get_time_for_thread()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        result_container.add_timing(self.engine_name, engine_time, page_load_time)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # metrics
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-14 17:23:15 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        counter_inc('engine', self.engine_name, 'search', 'count', 'successful')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        histogram_observe(engine_time, 'engine', self.engine_name, 'time', 'total')
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if page_load_time is not None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            histogram_observe(page_load_time, 'engine', self.engine_name, 'time', 'http')
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def extend_container(self, result_container, start_time, search_results):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if getattr(threading.current_thread(), '_timeout', False):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # the main thread is not waiting anymore
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-26 11:12:02 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            self.handle_exception(result_container, 'timeout', None)
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            # check if the engine accepted the request
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            if search_results is not None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                self._extend_container_basic(result_container, start_time, search_results)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            self.suspended_status.resume()
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def extend_container_if_suspended(self, result_container):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if self.suspended_status.is_suspended:
							 | 
						
					
						
							
								
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            result_container.add_unresponsive_engine(
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                self.engine_name, self.suspended_status.suspend_reason, suspended=True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            )
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-13 15:21:53 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								            return True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return False
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def get_params(self, search_query, engine_category):
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-29 20:54:46 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        """Returns a set of (see :ref:`request params <engine request arguments>`) or
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        ``None`` if request is not supported.
							 | 
						
					
						
							
								
									
										
										
										
											2022-08-01 16:42:33 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        Not supported conditions (``None`` is returned):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        - A page-number > 1 when engine does not support paging.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        - A time range when the engine does not support time range.
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        """
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # if paging is not supported, skip
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if search_query.pageno > 1 and not self.engine.paging:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-13 19:12:50 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # if max page is reached, skip
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        max_page = self.engine.max_page or settings['search']['max_page']
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if max_page and max_page < search_query.pageno:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # if time_range is not supported, skip
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if search_query.time_range and not self.engine.time_range_support:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            return None
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        params = {}
							 | 
						
					
						
							
								
									
										
											 
										 
										
											
												[refactor] typification of SearXNG (initial) / result items (part 1)
Typification of SearXNG
=======================
This patch introduces the typing of the results.  The why and how is described
in the documentation, please generate the documentation ..
    $ make docs.clean docs.live
and read the following articles in the "Developer documentation":
- result types --> http://0.0.0.0:8000/dev/result_types/index.html
The result types are available from the `searx.result_types` module.  The
following have been implemented so far:
- base result type: `searx.result_type.Result`
  --> http://0.0.0.0:8000/dev/result_types/base_result.html
- answer results
  --> http://0.0.0.0:8000/dev/result_types/answer.html
including the type for translations (inspired by #3925).  For all other
types (which still need to be set up in subsequent PRs), template documentation
has been created for the transition period.
Doc of the fields used in Templates
===================================
The template documentation is the basis for the typing and is the first complete
documentation of the results (needed for engine development).  It is the
"working paper" (the plan) with which further typifications can be implemented
in subsequent PRs.
- https://github.com/searxng/searxng/issues/357
Answer Templates
================
With the new (sub) types for `Answer`, the templates for the answers have also
been revised, `Translation` are now displayed with collapsible entries (inspired
by #3925).
    !en-de dog
Plugins & Answerer
==================
The implementation for `Plugin` and `Answer` has been revised, see
documentation:
- Plugin: http://0.0.0.0:8000/dev/plugins/index.html
- Answerer: http://0.0.0.0:8000/dev/answerers/index.html
With `AnswerStorage` and `AnswerStorage` to manage those items (in follow up
PRs, `ArticleStorage`, `InfoStorage` and .. will be implemented)
Autocomplete
============
The autocompletion had a bug where the results from `Answer` had not been shown
in the past.  To test activate autocompletion and try search terms for which we
have answerers
- statistics: type `min 1 2 3` .. in the completion list you should find an
  entry like `[de] min(1, 2, 3) = 1`
- random: type `random uuid` .. in the completion list, the first item is a
  random UUID
Extended Types
==============
SearXNG extends e.g. the request and response types of flask and httpx, a module
has been set up for type extensions:
- Extended Types
  --> http://0.0.0.0:8000/dev/extended_types.html
Unit-Tests
==========
The unit tests have been completely revised.  In the previous implementation,
the runtime (the global variables such as `searx.settings`) was not initialized
before each test, so the runtime environment with which a test ran was always
determined by the tests that ran before it.  This was also the reason why we
sometimes had to observe non-deterministic errors in the tests in the past:
- https://github.com/searxng/searxng/issues/2988 is one example for the Runtime
  issues, with non-deterministic behavior ..
- https://github.com/searxng/searxng/pull/3650
- https://github.com/searxng/searxng/pull/3654
- https://github.com/searxng/searxng/pull/3642#issuecomment-2226884469
- https://github.com/searxng/searxng/pull/3746#issuecomment-2300965005
Why msgspec.Struct
==================
We have already discussed typing based on e.g. `TypeDict` or `dataclass` in the past:
- https://github.com/searxng/searxng/pull/1562/files
- https://gist.github.com/dalf/972eb05e7a9bee161487132a7de244d2
- https://github.com/searxng/searxng/pull/1412/files
- https://github.com/searxng/searxng/pull/1356
In my opinion, TypeDict is unsuitable because the objects are still dictionaries
and not instances of classes / the `dataclass` are classes but ...
The `msgspec.Struct` combine the advantages of typing, runtime behaviour and
also offer the option of (fast) serializing (incl. type check) the objects.
Currently not possible but conceivable with `msgspec`: Outsourcing the engines
into separate processes, what possibilities this opens up in the future is left
to the imagination!
Internally, we have already defined that it is desirable to decouple the
development of the engines from the development of the SearXNG core / The
serialization of the `Result` objects is a prerequisite for this.
HINT: The threads listed above were the template for this PR, even though the
implementation here is based on msgspec.  They should also be an inspiration for
the following PRs of typification, as the models and implementations can provide
a good direction.
Why just one commit?
====================
I tried to create several (thematically separated) commits, but gave up at some
point ... there are too many things to tackle at once / The comprehensibility of
the commits would not be improved by a thematic separation. On the contrary, we
would have to make multiple changes at the same places and the goal of a change
would be vaguely recognizable in the fog of the commits.
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
											
										 
										
											2024-12-15 09:59:50 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        params["query"] = search_query.query
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        params['category'] = engine_category
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        params['pageno'] = search_query.pageno
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        params['safesearch'] = search_query.safesearch
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        params['time_range'] = search_query.time_range
							 | 
						
					
						
							
								
									
										
										
										
											2021-03-25 09:37:37 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        params['engine_data'] = search_query.engine_data.get(self.engine_name, {})
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-29 20:54:46 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        params['searxng_locale'] = search_query.lang
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # deprecated / vintage --> use params['searxng_locale']
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        #
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # Conditions related to engine's traits are implemented in engine.traits
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-15 09:53:03 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # module. Don't do 'locale' decisions here in the abstract layer of the
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-29 20:54:46 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # search processor, just pass the value from user's choice unchanged to
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # the engine request.
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if hasattr(self.engine, 'language') and self.engine.language:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            params['language'] = self.engine.language
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        else:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            params['language'] = search_query.lang
							 | 
						
					
						
							
								
									
										
										
										
											2022-09-29 20:54:46 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        return params
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    @abstractmethod
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def search(self, query, params, result_container, start_time, timeout_limit):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        pass
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-24 09:28:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    def get_tests(self):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        tests = getattr(self.engine, 'tests', None)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if tests is None:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            tests = getattr(self.engine, 'additional_tests', {})
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            tests.update(self.get_default_tests())
							 | 
						
					
						
							
								
									
										
										
										
											2021-04-27 15:13:39 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return tests
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-24 09:28:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2022-06-03 15:41:52 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    def get_default_tests(self):
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-24 09:28:16 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        return {}
							 |