| 
									
										
										
										
											2020-09-15 10:54:31 +02:00
										 |  |  | import sys | 
					
						
							|  |  |  | from time import time | 
					
						
							| 
									
										
										
										
											2015-02-22 23:24:49 +01:00
										 |  |  | from itertools import cycle | 
					
						
							| 
									
										
										
										
											2021-02-09 14:33:36 +01:00
										 |  |  | from threading import local | 
					
						
							| 
									
										
										
										
											2020-09-15 10:54:31 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | import requests | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-22 23:24:49 +01:00
										 |  |  | from searx import settings | 
					
						
							| 
									
										
										
										
											2020-09-15 10:54:31 +02:00
										 |  |  | from searx import logger | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  | from searx.raise_for_httperror import raise_for_httperror | 
					
						
							| 
									
										
										
										
											2020-09-15 10:54:31 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | logger = logger.getChild('poolrequests') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | try: | 
					
						
							|  |  |  |     import ssl | 
					
						
							|  |  |  |     if ssl.OPENSSL_VERSION_INFO[0:3] < (1, 0, 2): | 
					
						
							|  |  |  |         # https://github.com/certifi/python-certifi#1024-bit-root-certificates | 
					
						
							|  |  |  |         logger.critical('You are using an old openssl version({0}), please upgrade above 1.0.2!' | 
					
						
							|  |  |  |                         .format(ssl.OPENSSL_VERSION)) | 
					
						
							|  |  |  |         sys.exit(1) | 
					
						
							|  |  |  | except ImportError: | 
					
						
							|  |  |  |     ssl = None | 
					
						
							|  |  |  | if not getattr(ssl, "HAS_SNI", False): | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         import OpenSSL  # pylint: disable=unused-import | 
					
						
							|  |  |  |     except ImportError: | 
					
						
							|  |  |  |         logger.critical("ssl doesn't support SNI and the pyopenssl module is not installed.\n" | 
					
						
							|  |  |  |                         "Some HTTPS connections will fail") | 
					
						
							|  |  |  |         sys.exit(1) | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-22 23:24:49 +01:00
										 |  |  | class HTTPAdapterWithConnParams(requests.adapters.HTTPAdapter): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, pool_connections=requests.adapters.DEFAULT_POOLSIZE, | 
					
						
							|  |  |  |                  pool_maxsize=requests.adapters.DEFAULT_POOLSIZE, | 
					
						
							|  |  |  |                  max_retries=requests.adapters.DEFAULT_RETRIES, | 
					
						
							|  |  |  |                  pool_block=requests.adapters.DEFAULT_POOLBLOCK, | 
					
						
							|  |  |  |                  **conn_params): | 
					
						
							|  |  |  |         if max_retries == requests.adapters.DEFAULT_RETRIES: | 
					
						
							|  |  |  |             self.max_retries = requests.adapters.Retry(0, read=False) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             self.max_retries = requests.adapters.Retry.from_int(max_retries) | 
					
						
							|  |  |  |         self.config = {} | 
					
						
							|  |  |  |         self.proxy_manager = {} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  |         super().__init__() | 
					
						
							| 
									
										
										
										
											2015-02-22 23:24:49 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self._pool_connections = pool_connections | 
					
						
							|  |  |  |         self._pool_maxsize = pool_maxsize | 
					
						
							|  |  |  |         self._pool_block = pool_block | 
					
						
							|  |  |  |         self._conn_params = conn_params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block, **conn_params) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __setstate__(self, state): | 
					
						
							|  |  |  |         # Can't handle by adding 'proxy_manager' to self.__attrs__ because | 
					
						
							|  |  |  |         # because self.poolmanager uses a lambda function, which isn't pickleable. | 
					
						
							|  |  |  |         self.proxy_manager = {} | 
					
						
							|  |  |  |         self.config = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for attr, value in state.items(): | 
					
						
							|  |  |  |             setattr(self, attr, value) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.init_poolmanager(self._pool_connections, self._pool_maxsize, | 
					
						
							|  |  |  |                               block=self._pool_block, **self._conn_params) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-23 11:56:57 +02:00
										 |  |  | threadLocal = local() | 
					
						
							| 
									
										
										
										
											2016-05-02 19:25:09 +02:00
										 |  |  | connect = settings['outgoing'].get('pool_connections', 100)  # Magic number kept from previous code | 
					
						
							|  |  |  | maxsize = settings['outgoing'].get('pool_maxsize', requests.adapters.DEFAULT_POOLSIZE)  # Picked from constructor | 
					
						
							| 
									
										
										
										
											2015-08-02 19:38:27 +02:00
										 |  |  | if settings['outgoing'].get('source_ips'): | 
					
						
							| 
									
										
										
										
											2016-04-28 13:59:41 +02:00
										 |  |  |     http_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=connect, pool_maxsize=maxsize, | 
					
						
							|  |  |  |                                                     source_address=(source_ip, 0)) | 
					
						
							| 
									
										
										
										
											2015-08-02 19:38:27 +02:00
										 |  |  |                           for source_ip in settings['outgoing']['source_ips']) | 
					
						
							| 
									
										
										
										
											2016-04-28 13:59:41 +02:00
										 |  |  |     https_adapters = cycle(HTTPAdapterWithConnParams(pool_connections=connect, pool_maxsize=maxsize, | 
					
						
							| 
									
										
										
										
											2016-05-02 19:25:09 +02:00
										 |  |  |                                                      source_address=(source_ip, 0)) | 
					
						
							| 
									
										
										
										
											2015-08-02 19:38:27 +02:00
										 |  |  |                            for source_ip in settings['outgoing']['source_ips']) | 
					
						
							| 
									
										
										
										
											2015-02-22 23:24:49 +01:00
										 |  |  | else: | 
					
						
							| 
									
										
										
										
											2016-04-28 13:59:41 +02:00
										 |  |  |     http_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=connect, pool_maxsize=maxsize), )) | 
					
						
							|  |  |  |     https_adapters = cycle((HTTPAdapterWithConnParams(pool_connections=connect, pool_maxsize=maxsize), )) | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SessionSinglePool(requests.Session): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self): | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  |         super().__init__() | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # reuse the same adapters | 
					
						
							| 
									
										
										
										
											2021-02-09 14:33:36 +01:00
										 |  |  |         self.adapters.clear() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         https_adapter = threadLocal.__dict__.setdefault('https_adapter', next(https_adapters)) | 
					
						
							|  |  |  |         http_adapter = threadLocal.__dict__.setdefault('http_adapter', next(http_adapters)) | 
					
						
							|  |  |  |         self.mount('https://', https_adapter) | 
					
						
							|  |  |  |         self.mount('http://', http_adapter) | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def close(self): | 
					
						
							|  |  |  |         """Call super, but clear adapters since there are managed globaly""" | 
					
						
							|  |  |  |         self.adapters.clear() | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  |         super().close() | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-23 11:56:57 +02:00
										 |  |  | def set_timeout_for_thread(timeout, start_time=None): | 
					
						
							|  |  |  |     threadLocal.timeout = timeout | 
					
						
							|  |  |  |     threadLocal.start_time = start_time | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def reset_time_for_thread(): | 
					
						
							|  |  |  |     threadLocal.total_time = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_time_for_thread(): | 
					
						
							|  |  |  |     return threadLocal.total_time | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-16 12:44:07 +01:00
										 |  |  | def get_proxy_cycles(proxy_settings): | 
					
						
							|  |  |  |     if not proxy_settings: | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  |     # Backwards compatibility for single proxy in settings.yml | 
					
						
							|  |  |  |     for protocol, proxy in proxy_settings.items(): | 
					
						
							|  |  |  |         if isinstance(proxy, str): | 
					
						
							|  |  |  |             proxy_settings[protocol] = [proxy] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for protocol in proxy_settings: | 
					
						
							|  |  |  |         proxy_settings[protocol] = cycle(proxy_settings[protocol]) | 
					
						
							|  |  |  |     return proxy_settings | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | GLOBAL_PROXY_CYCLES = get_proxy_cycles(settings['outgoing'].get('proxies')) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_proxies(proxy_cycles): | 
					
						
							|  |  |  |     if proxy_cycles: | 
					
						
							|  |  |  |         return {protocol: next(proxy_cycle) for protocol, proxy_cycle in proxy_cycles.items()} | 
					
						
							|  |  |  |     return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_global_proxies(): | 
					
						
							|  |  |  |     return get_proxies(GLOBAL_PROXY_CYCLES) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  | def request(method, url, **kwargs): | 
					
						
							| 
									
										
										
										
											2017-07-23 11:56:57 +02:00
										 |  |  |     """same as requests/requests/api.py request(...)""" | 
					
						
							|  |  |  |     time_before_request = time() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # session start | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  |     session = SessionSinglePool() | 
					
						
							| 
									
										
										
										
											2017-07-23 11:56:57 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # proxies | 
					
						
							| 
									
										
										
										
											2020-11-16 12:44:07 +01:00
										 |  |  |     if not kwargs.get('proxies'): | 
					
						
							|  |  |  |         kwargs['proxies'] = get_global_proxies() | 
					
						
							| 
									
										
										
										
											2017-07-23 11:56:57 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # timeout | 
					
						
							|  |  |  |     if 'timeout' in kwargs: | 
					
						
							|  |  |  |         timeout = kwargs['timeout'] | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         timeout = getattr(threadLocal, 'timeout', None) | 
					
						
							|  |  |  |         if timeout is not None: | 
					
						
							|  |  |  |             kwargs['timeout'] = timeout | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  |     # raise_for_error | 
					
						
							|  |  |  |     check_for_httperror = True | 
					
						
							|  |  |  |     if 'raise_for_httperror' in kwargs: | 
					
						
							|  |  |  |         check_for_httperror = kwargs['raise_for_httperror'] | 
					
						
							|  |  |  |         del kwargs['raise_for_httperror'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-23 11:56:57 +02:00
										 |  |  |     # do request | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  |     response = session.request(method=method, url=url, **kwargs) | 
					
						
							| 
									
										
										
										
											2017-07-23 11:56:57 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     time_after_request = time() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # is there a timeout for this engine ? | 
					
						
							|  |  |  |     if timeout is not None: | 
					
						
							|  |  |  |         timeout_overhead = 0.2  # seconds | 
					
						
							|  |  |  |         # start_time = when the user request started | 
					
						
							|  |  |  |         start_time = getattr(threadLocal, 'start_time', time_before_request) | 
					
						
							|  |  |  |         search_duration = time_after_request - start_time | 
					
						
							|  |  |  |         if search_duration > timeout + timeout_overhead: | 
					
						
							|  |  |  |             raise requests.exceptions.Timeout(response=response) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # session end | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  |     session.close() | 
					
						
							| 
									
										
										
										
											2017-07-23 11:56:57 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-11-23 01:52:45 +01:00
										 |  |  |     if hasattr(threadLocal, 'total_time'): | 
					
						
							|  |  |  |         threadLocal.total_time += time_after_request - time_before_request | 
					
						
							| 
									
										
										
										
											2017-07-23 11:56:57 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  |     # raise an exception | 
					
						
							|  |  |  |     if check_for_httperror: | 
					
						
							|  |  |  |         raise_for_httperror(response) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  |     return response | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get(url, **kwargs): | 
					
						
							|  |  |  |     kwargs.setdefault('allow_redirects', True) | 
					
						
							|  |  |  |     return request('get', url, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def options(url, **kwargs): | 
					
						
							|  |  |  |     kwargs.setdefault('allow_redirects', True) | 
					
						
							|  |  |  |     return request('options', url, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def head(url, **kwargs): | 
					
						
							|  |  |  |     kwargs.setdefault('allow_redirects', False) | 
					
						
							|  |  |  |     return request('head', url, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-18 12:47:31 +01:00
										 |  |  | def post(url, data=None, **kwargs): | 
					
						
							| 
									
										
										
										
											2015-01-22 17:20:44 +01:00
										 |  |  |     return request('post', url, data=data, **kwargs) | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def put(url, data=None, **kwargs): | 
					
						
							|  |  |  |     return request('put', url, data=data, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def patch(url, data=None, **kwargs): | 
					
						
							|  |  |  |     return request('patch', url, data=data, **kwargs) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def delete(url, **kwargs): | 
					
						
							|  |  |  |     return request('delete', url, **kwargs) |