| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  | import typing | 
					
						
							|  |  |  | import inspect | 
					
						
							|  |  |  | from json import JSONDecodeError | 
					
						
							|  |  |  | from urllib.parse import urlparse | 
					
						
							| 
									
										
										
										
											2021-03-18 19:59:01 +01:00
										 |  |  | from httpx import HTTPError, HTTPStatusError | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  | from searx.exceptions import ( | 
					
						
							|  |  |  |     SearxXPathSyntaxException, | 
					
						
							|  |  |  |     SearxEngineXPathException, | 
					
						
							|  |  |  |     SearxEngineAPIException, | 
					
						
							|  |  |  |     SearxEngineAccessDeniedException, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2021-12-26 22:44:46 +01:00
										 |  |  | from searx import searx_parent_dir, settings | 
					
						
							| 
									
										
										
										
											2021-09-06 19:46:08 +02:00
										 |  |  | from searx.engines import engines | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | errors_per_engines = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class ErrorContext: | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |     __slots__ = ( | 
					
						
							|  |  |  |         'filename', | 
					
						
							|  |  |  |         'function', | 
					
						
							|  |  |  |         'line_no', | 
					
						
							|  |  |  |         'code', | 
					
						
							|  |  |  |         'exception_classname', | 
					
						
							|  |  |  |         'log_message', | 
					
						
							|  |  |  |         'log_parameters', | 
					
						
							|  |  |  |         'secondary', | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:50 +02:00
										 |  |  |     def __init__(self, filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary): | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |         self.filename = filename | 
					
						
							|  |  |  |         self.function = function | 
					
						
							|  |  |  |         self.line_no = line_no | 
					
						
							|  |  |  |         self.code = code | 
					
						
							|  |  |  |         self.exception_classname = exception_classname | 
					
						
							|  |  |  |         self.log_message = log_message | 
					
						
							|  |  |  |         self.log_parameters = log_parameters | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:50 +02:00
										 |  |  |         self.secondary = secondary | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def __eq__(self, o) -> bool: | 
					
						
							|  |  |  |         if not isinstance(o, ErrorContext): | 
					
						
							|  |  |  |             return False | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         return ( | 
					
						
							|  |  |  |             self.filename == o.filename | 
					
						
							|  |  |  |             and self.function == o.function | 
					
						
							|  |  |  |             and self.line_no == o.line_no | 
					
						
							|  |  |  |             and self.code == o.code | 
					
						
							|  |  |  |             and self.exception_classname == o.exception_classname | 
					
						
							|  |  |  |             and self.log_message == o.log_message | 
					
						
							|  |  |  |             and self.log_parameters == o.log_parameters | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:50 +02:00
										 |  |  |             and self.secondary == o.secondary | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def __hash__(self): | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         return hash( | 
					
						
							|  |  |  |             ( | 
					
						
							|  |  |  |                 self.filename, | 
					
						
							|  |  |  |                 self.function, | 
					
						
							|  |  |  |                 self.line_no, | 
					
						
							|  |  |  |                 self.code, | 
					
						
							|  |  |  |                 self.exception_classname, | 
					
						
							|  |  |  |                 self.log_message, | 
					
						
							|  |  |  |                 self.log_parameters, | 
					
						
							|  |  |  |                 self.secondary, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def __repr__(self): | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         return "ErrorContext({!r}, {!r}, {!r}, {!r}, {!r}, {!r}) {!r}".format( | 
					
						
							|  |  |  |             self.filename, | 
					
						
							|  |  |  |             self.line_no, | 
					
						
							|  |  |  |             self.code, | 
					
						
							|  |  |  |             self.exception_classname, | 
					
						
							|  |  |  |             self.log_message, | 
					
						
							|  |  |  |             self.log_parameters, | 
					
						
							|  |  |  |             self.secondary, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def add_error_context(engine_name: str, error_context: ErrorContext) -> None: | 
					
						
							|  |  |  |     errors_for_engine = errors_per_engines.setdefault(engine_name, {}) | 
					
						
							|  |  |  |     errors_for_engine[error_context] = errors_for_engine.get(error_context, 0) + 1 | 
					
						
							| 
									
										
										
										
											2021-09-06 19:46:08 +02:00
										 |  |  |     engines[engine_name].logger.warning('%s', str(error_context)) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_trace(traces): | 
					
						
							|  |  |  |     for trace in reversed(traces): | 
					
						
							| 
									
										
										
										
											2021-01-17 16:11:26 +01:00
										 |  |  |         split_filename = trace.filename.split('/') | 
					
						
							| 
									
										
										
										
											2021-01-17 16:14:16 +01:00
										 |  |  |         if '/'.join(split_filename[-3:-1]) == 'searx/engines': | 
					
						
							| 
									
										
										
										
											2021-01-17 16:11:26 +01:00
										 |  |  |             return trace | 
					
						
							| 
									
										
										
										
											2021-01-17 16:14:16 +01:00
										 |  |  |         if '/'.join(split_filename[-4:-1]) == 'searx/search/processors': | 
					
						
							| 
									
										
										
										
											2021-01-17 16:11:26 +01:00
										 |  |  |             return trace | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     return traces[-1] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-18 19:59:01 +01:00
										 |  |  | def get_hostname(exc: HTTPError) -> typing.Optional[None]: | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     url = exc.request.url | 
					
						
							|  |  |  |     if url is None and exc.response is not None: | 
					
						
							|  |  |  |         url = exc.response.url | 
					
						
							|  |  |  |     return urlparse(url).netloc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  | def get_request_exception_messages( | 
					
						
							|  |  |  |     exc: HTTPError, | 
					
						
							|  |  |  | ) -> typing.Tuple[typing.Optional[str], typing.Optional[str], typing.Optional[str]]: | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     url = None | 
					
						
							|  |  |  |     status_code = None | 
					
						
							|  |  |  |     reason = None | 
					
						
							|  |  |  |     hostname = None | 
					
						
							| 
									
										
										
										
											2021-09-16 18:05:31 +02:00
										 |  |  |     if hasattr(exc, '_request') and exc._request is not None: | 
					
						
							|  |  |  |         # exc.request is property that raise an RuntimeException | 
					
						
							|  |  |  |         # if exc._request is not defined. | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |         url = exc.request.url | 
					
						
							| 
									
										
										
										
											2021-09-16 18:05:31 +02:00
										 |  |  |     if url is None and hasattr(exc, 'response') and exc.response is not None: | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |         url = exc.response.url | 
					
						
							|  |  |  |     if url is not None: | 
					
						
							| 
									
										
										
										
											2021-03-18 19:59:01 +01:00
										 |  |  |         hostname = url.host | 
					
						
							|  |  |  |     if isinstance(exc, HTTPStatusError): | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |         status_code = str(exc.response.status_code) | 
					
						
							| 
									
										
										
										
											2021-03-18 19:59:01 +01:00
										 |  |  |         reason = exc.response.reason_phrase | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     return (status_code, reason, hostname) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_messages(exc, filename) -> typing.Tuple: | 
					
						
							|  |  |  |     if isinstance(exc, JSONDecodeError): | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         return (exc.msg,) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     if isinstance(exc, TypeError): | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         return (str(exc),) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     if isinstance(exc, ValueError) and 'lxml' in filename: | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         return (str(exc),) | 
					
						
							| 
									
										
										
										
											2021-03-18 19:59:01 +01:00
										 |  |  |     if isinstance(exc, HTTPError): | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |         return get_request_exception_messages(exc) | 
					
						
							|  |  |  |     if isinstance(exc, SearxXPathSyntaxException): | 
					
						
							|  |  |  |         return (exc.xpath_str, exc.message) | 
					
						
							|  |  |  |     if isinstance(exc, SearxEngineXPathException): | 
					
						
							|  |  |  |         return (exc.xpath_str, exc.message) | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  |     if isinstance(exc, SearxEngineAPIException): | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         return (str(exc.args[0]),) | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  |     if isinstance(exc, SearxEngineAccessDeniedException): | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         return (exc.message,) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     return () | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_exception_classname(exc: Exception) -> str: | 
					
						
							|  |  |  |     exc_class = exc.__class__ | 
					
						
							|  |  |  |     exc_name = exc_class.__qualname__ | 
					
						
							|  |  |  |     exc_module = exc_class.__module__ | 
					
						
							|  |  |  |     if exc_module is None or exc_module == str.__class__.__module__: | 
					
						
							|  |  |  |         return exc_name | 
					
						
							|  |  |  |     return exc_module + '.' + exc_name | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:50 +02:00
										 |  |  | def get_error_context(framerecords, exception_classname, log_message, log_parameters, secondary) -> ErrorContext: | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     searx_frame = get_trace(framerecords) | 
					
						
							|  |  |  |     filename = searx_frame.filename | 
					
						
							| 
									
										
										
										
											2021-04-27 10:42:00 +02:00
										 |  |  |     if filename.startswith(searx_parent_dir): | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         filename = filename[len(searx_parent_dir) + 1 :] | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     function = searx_frame.function | 
					
						
							|  |  |  |     line_no = searx_frame.lineno | 
					
						
							|  |  |  |     code = searx_frame.code_context[0].strip() | 
					
						
							|  |  |  |     del framerecords | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:50 +02:00
										 |  |  |     return ErrorContext(filename, function, line_no, code, exception_classname, log_message, log_parameters, secondary) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:50 +02:00
										 |  |  | def count_exception(engine_name: str, exc: Exception, secondary: bool = False) -> None: | 
					
						
							| 
									
										
										
										
											2021-12-26 22:44:46 +01:00
										 |  |  |     if not settings['general']['enable_metrics']: | 
					
						
							|  |  |  |         return | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     framerecords = inspect.trace() | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         exception_classname = get_exception_classname(exc) | 
					
						
							|  |  |  |         log_parameters = get_messages(exc, framerecords[-1][1]) | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:50 +02:00
										 |  |  |         error_context = get_error_context(framerecords, exception_classname, None, log_parameters, secondary) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |         add_error_context(engine_name, error_context) | 
					
						
							|  |  |  |     finally: | 
					
						
							|  |  |  |         del framerecords | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  | def count_error( | 
					
						
							|  |  |  |     engine_name: str, log_message: str, log_parameters: typing.Optional[typing.Tuple] = None, secondary: bool = False | 
					
						
							|  |  |  | ) -> None: | 
					
						
							| 
									
										
										
										
											2021-12-26 22:44:46 +01:00
										 |  |  |     if not settings['general']['enable_metrics']: | 
					
						
							|  |  |  |         return | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |     framerecords = list(reversed(inspect.stack()[1:])) | 
					
						
							|  |  |  |     try: | 
					
						
							| 
									
										
										
										
											2021-04-17 18:15:50 +02:00
										 |  |  |         error_context = get_error_context(framerecords, None, log_message, log_parameters or (), secondary) | 
					
						
							| 
									
										
										
										
											2020-11-26 15:12:11 +01:00
										 |  |  |         add_error_context(engine_name, error_context) | 
					
						
							|  |  |  |     finally: | 
					
						
							|  |  |  |         del framerecords |