| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | '''
 | 
					
						
							|  |  |  | searx is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | searx is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | GNU Affero General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | 
					
						
							|  |  |  | '''
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-31 23:39:58 +02:00
										 |  |  | import gc | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | import threading | 
					
						
							| 
									
										
										
										
											2016-07-31 23:39:58 +02:00
										 |  |  | from thread import start_new_thread | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  | from time import time | 
					
						
							| 
									
										
										
										
											2016-09-06 00:36:33 +02:00
										 |  |  | from uuid import uuid4 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  | import requests.exceptions | 
					
						
							| 
									
										
										
										
											2016-07-31 23:39:58 +02:00
										 |  |  | import searx.poolrequests as requests_lib | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | from searx.engines import ( | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |     categories, engines | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | ) | 
					
						
							| 
									
										
										
										
											2016-11-19 20:53:51 +01:00
										 |  |  | from searx.answerers import ask | 
					
						
							| 
									
										
										
										
											2016-04-08 16:38:05 +02:00
										 |  |  | from searx.utils import gen_useragent | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | from searx.query import RawTextQuery, SearchQuery | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  | from searx.results import ResultContainer | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | from searx import logger | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | from searx.plugins import plugins | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | logger = logger.getChild('search') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | number_of_searches = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | def send_http_request(engine, request_params, timeout_limit): | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |     # for page_load_time stats | 
					
						
							|  |  |  |     time_before_request = time() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # create dictionary which contain all | 
					
						
							|  |  |  |     # informations about the request | 
					
						
							|  |  |  |     request_args = dict( | 
					
						
							|  |  |  |         headers=request_params['headers'], | 
					
						
							|  |  |  |         cookies=request_params['cookies'], | 
					
						
							|  |  |  |         timeout=timeout_limit, | 
					
						
							|  |  |  |         verify=request_params['verify'] | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # specific type of request (GET or POST) | 
					
						
							|  |  |  |     if request_params['method'] == 'GET': | 
					
						
							|  |  |  |         req = requests_lib.get | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         req = requests_lib.post | 
					
						
							|  |  |  |         request_args['data'] = request_params['data'] | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |     # send the request | 
					
						
							|  |  |  |     response = req(request_params['url'], **request_args) | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |     # is there a timeout (no parsing in this case) | 
					
						
							|  |  |  |     timeout_overhead = 0.2  # seconds | 
					
						
							|  |  |  |     search_duration = time() - request_params['started'] | 
					
						
							|  |  |  |     if search_duration > timeout_limit + timeout_overhead: | 
					
						
							|  |  |  |         raise Timeout(response=response) | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |     with threading.RLock(): | 
					
						
							|  |  |  |         # no error : reset the suspend variables | 
					
						
							|  |  |  |         engine.continuous_errors = 0 | 
					
						
							|  |  |  |         engine.suspend_end_time = 0 | 
					
						
							|  |  |  |         # update stats with current page-load-time | 
					
						
							|  |  |  |         # only the HTTP request | 
					
						
							|  |  |  |         engine.stats['page_load_time'] += time() - time_before_request | 
					
						
							|  |  |  |         engine.stats['page_load_count'] += 1 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |     # everything is ok : return the response | 
					
						
							|  |  |  |     return response | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  | def search_one_request(engine, query, request_params, timeout_limit): | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |     # update request parameters dependent on | 
					
						
							|  |  |  |     # search-engine (contained in engines folder) | 
					
						
							|  |  |  |     engine.request(query, request_params) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |     # ignoring empty urls | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |     if request_params['url'] is None: | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |         return [] | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if not request_params['url']: | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |         return [] | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # send request | 
					
						
							|  |  |  |     response = send_http_request(engine, request_params, timeout_limit) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |     # parse the response | 
					
						
							|  |  |  |     response.search_params = request_params | 
					
						
							|  |  |  |     return engine.response(response) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def search_one_request_safe(engine_name, query, request_params, result_container, timeout_limit): | 
					
						
							|  |  |  |     start_time = time() | 
					
						
							|  |  |  |     engine = engines[engine_name] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         # send requests and parse the results | 
					
						
							|  |  |  |         search_results = search_one_request(engine, query, request_params, timeout_limit) | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # add results | 
					
						
							|  |  |  |         for result in search_results: | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |             result['engine'] = engine_name | 
					
						
							|  |  |  |         result_container.extend(engine_name, search_results) | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |         # update engine time when there is no exception | 
					
						
							|  |  |  |         with threading.RLock(): | 
					
						
							|  |  |  |             engine.stats['engine_time'] += time() - start_time | 
					
						
							|  |  |  |             engine.stats['engine_time_count'] += 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     except Exception as e: | 
					
						
							|  |  |  |         engine.stats['errors'] += 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         search_duration = time() - start_time | 
					
						
							|  |  |  |         requests_exception = False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (issubclass(e.__class__, requests.exceptions.Timeout)): | 
					
						
							|  |  |  |             # requests timeout (connect or read) | 
					
						
							|  |  |  |             logger.error("engine {0} : HTTP requests timeout" | 
					
						
							|  |  |  |                          "(search duration : {1} s, timeout: {2} s) : {3}" | 
					
						
							|  |  |  |                          .format(engine_name, search_duration, timeout_limit, e.__class__.__name__)) | 
					
						
							|  |  |  |             requests_exception = True | 
					
						
							|  |  |  |         if (issubclass(e.__class__, requests.exceptions.RequestException)): | 
					
						
							|  |  |  |             # other requests exception | 
					
						
							|  |  |  |             logger.exception("engine {0} : requests exception" | 
					
						
							|  |  |  |                              "(search duration : {1} s, timeout: {2} s) : {3}" | 
					
						
							|  |  |  |                              .format(engine_name, search_duration, timeout_limit, e)) | 
					
						
							|  |  |  |             requests_exception = True | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # others errors | 
					
						
							|  |  |  |             logger.exception('engine {0} : exception : {1}'.format(engine_name, e)) | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |         # update continuous_errors / suspend_end_time | 
					
						
							|  |  |  |         if requests_exception: | 
					
						
							|  |  |  |             with threading.RLock(): | 
					
						
							|  |  |  |                 engine.continuous_errors += 1 | 
					
						
							|  |  |  |                 engine.suspend_end_time = time() + min(60, engine.continuous_errors) | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |         # | 
					
						
							|  |  |  |         return False | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def search_multiple_requests(requests, result_container, timeout_limit): | 
					
						
							|  |  |  |     start_time = time() | 
					
						
							| 
									
										
										
										
											2016-09-06 00:36:33 +02:00
										 |  |  |     search_id = uuid4().__str__() | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for engine_name, query, request_params in requests: | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         th = threading.Thread( | 
					
						
							| 
									
										
										
										
											2016-12-29 11:08:19 +01:00
										 |  |  |             target=search_one_request_safe, | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |             args=(engine_name, query, request_params, result_container, timeout_limit), | 
					
						
							| 
									
										
										
										
											2016-09-06 00:36:33 +02:00
										 |  |  |             name=search_id, | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2014-12-19 13:59:41 +01:00
										 |  |  |         th._engine_name = engine_name | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         th.start() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for th in threading.enumerate(): | 
					
						
							| 
									
										
										
										
											2016-09-06 00:36:33 +02:00
										 |  |  |         if th.name == search_id: | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |             remaining_time = max(0.0, timeout_limit - (time() - start_time)) | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  |             th.join(remaining_time) | 
					
						
							|  |  |  |             if th.isAlive(): | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  |                 logger.warning('engine timeout: {0}'.format(th._engine_name)) | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | # get default reqest parameter | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | def default_request_params(): | 
					
						
							|  |  |  |     return { | 
					
						
							| 
									
										
										
										
											2014-12-29 21:31:04 +01:00
										 |  |  |         'method': 'GET', | 
					
						
							|  |  |  |         'headers': {}, | 
					
						
							|  |  |  |         'data': {}, | 
					
						
							|  |  |  |         'url': '', | 
					
						
							|  |  |  |         'cookies': {}, | 
					
						
							|  |  |  |         'verify': True | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-02 14:52:22 +01:00
										 |  |  | def get_search_query_from_webapp(preferences, form): | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     query = None | 
					
						
							|  |  |  |     query_engines = [] | 
					
						
							|  |  |  |     query_categories = [] | 
					
						
							|  |  |  |     query_pageno = 1 | 
					
						
							|  |  |  |     query_lang = 'all' | 
					
						
							|  |  |  |     query_time_range = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # set blocked engines | 
					
						
							|  |  |  |     disabled_engines = preferences.engines.get_disabled() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # set specific language if set | 
					
						
							|  |  |  |     query_lang = preferences.get_value('language') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # safesearch | 
					
						
							|  |  |  |     query_safesearch = preferences.get_value('safesearch') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # TODO better exceptions | 
					
						
							| 
									
										
										
										
											2016-11-02 14:52:22 +01:00
										 |  |  |     if not form.get('q'): | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         raise Exception('noquery') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # set pagenumber | 
					
						
							| 
									
										
										
										
											2016-11-02 14:52:22 +01:00
										 |  |  |     pageno_param = form.get('pageno', '1') | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     if not pageno_param.isdigit() or int(pageno_param) < 1: | 
					
						
							|  |  |  |         pageno_param = 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     query_pageno = int(pageno_param) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # parse query, if tags are set, which change | 
					
						
							|  |  |  |     # the serch engine or search-language | 
					
						
							| 
									
										
										
										
											2016-11-02 14:52:22 +01:00
										 |  |  |     raw_text_query = RawTextQuery(form['q'], disabled_engines) | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     raw_text_query.parse_query() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # set query | 
					
						
							|  |  |  |     query = raw_text_query.getSearchQuery() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-14 04:36:40 +01:00
										 |  |  |     # set specific language if set on request, query or preferences | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     # TODO support search with multible languages | 
					
						
							|  |  |  |     if len(raw_text_query.languages): | 
					
						
							|  |  |  |         query_lang = raw_text_query.languages[-1] | 
					
						
							| 
									
										
										
										
											2016-12-14 04:36:40 +01:00
										 |  |  |     elif 'language' in form: | 
					
						
							|  |  |  |         query_lang = form.get('language') | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         query_lang = preferences.get_value('language') | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-02 14:52:22 +01:00
										 |  |  |     query_time_range = form.get('time_range') | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     query_engines = raw_text_query.engines | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # if engines are calculated from query, | 
					
						
							|  |  |  |     # set categories by using that informations | 
					
						
							|  |  |  |     if query_engines and raw_text_query.specific: | 
					
						
							|  |  |  |         query_categories = list(set(engine['category'] | 
					
						
							|  |  |  |                                     for engine in query_engines)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # otherwise, using defined categories to | 
					
						
							|  |  |  |     # calculate which engines should be used | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         # set categories/engines | 
					
						
							|  |  |  |         load_default_categories = True | 
					
						
							| 
									
										
										
										
											2016-11-02 14:52:22 +01:00
										 |  |  |         for pd_name, pd in form.items(): | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             if pd_name == 'categories': | 
					
						
							|  |  |  |                 query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories) | 
					
						
							|  |  |  |             elif pd_name == 'engines': | 
					
						
							|  |  |  |                 pd_engines = [{'category': engines[engine].categories[0], | 
					
						
							|  |  |  |                                'name': engine} | 
					
						
							|  |  |  |                               for engine in map(unicode.strip, pd.split(',')) if engine in engines] | 
					
						
							|  |  |  |                 if pd_engines: | 
					
						
							|  |  |  |                     query_engines.extend(pd_engines) | 
					
						
							|  |  |  |                     load_default_categories = False | 
					
						
							|  |  |  |             elif pd_name.startswith('category_'): | 
					
						
							|  |  |  |                 category = pd_name[9:] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # if category is not found in list, skip | 
					
						
							|  |  |  |                 if category not in categories: | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 if pd != 'off': | 
					
						
							|  |  |  |                     # add category to list | 
					
						
							|  |  |  |                     query_categories.append(category) | 
					
						
							|  |  |  |                 elif category in query_categories: | 
					
						
							|  |  |  |                     # remove category from list if property is set to 'off' | 
					
						
							|  |  |  |                     query_categories.remove(category) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not load_default_categories: | 
					
						
							|  |  |  |             if not query_categories: | 
					
						
							|  |  |  |                 query_categories = list(set(engine['category'] | 
					
						
							| 
									
										
										
										
											2016-12-15 11:59:21 +01:00
										 |  |  |                                             for engine in query_engines)) | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # if no category is specified for this search, | 
					
						
							|  |  |  |             # using user-defined default-configuration which | 
					
						
							|  |  |  |             # (is stored in cookie) | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             if not query_categories: | 
					
						
							|  |  |  |                 cookie_categories = preferences.get_value('categories') | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |                 for ccateg in cookie_categories: | 
					
						
							|  |  |  |                     if ccateg in categories: | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |                         query_categories.append(ccateg) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # if still no category is specified, using general | 
					
						
							|  |  |  |             # as default-category | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             if not query_categories: | 
					
						
							|  |  |  |                 query_categories = ['general'] | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # using all engines for that search, which are | 
					
						
							|  |  |  |             # declared under the specific categories | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             for categ in query_categories: | 
					
						
							|  |  |  |                 query_engines.extend({'category': categ, | 
					
						
							|  |  |  |                                       'name': engine.name} | 
					
						
							|  |  |  |                                      for engine in categories[categ] | 
					
						
							|  |  |  |                                      if (engine.name, categ) not in disabled_engines) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return SearchQuery(query, query_engines, query_categories, | 
					
						
							|  |  |  |                        query_lang, query_safesearch, query_pageno, query_time_range) | 
					
						
							| 
									
										
										
										
											2014-02-09 01:07:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | class Search(object): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """Search information container""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, search_query): | 
					
						
							|  |  |  |         # init vars | 
					
						
							|  |  |  |         super(Search, self).__init__() | 
					
						
							|  |  |  |         self.search_query = search_query | 
					
						
							|  |  |  |         self.result_container = ResultContainer() | 
					
						
							| 
									
										
										
										
											2016-02-20 00:21:56 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |     # do search-request | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     def search(self): | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         global number_of_searches | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |         # start time | 
					
						
							|  |  |  |         start_time = time() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-09 23:11:45 +01:00
										 |  |  |         # answeres ? | 
					
						
							| 
									
										
										
										
											2016-11-19 20:53:51 +01:00
										 |  |  |         answerers_results = ask(self.search_query) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if answerers_results: | 
					
						
							|  |  |  |             for results in answerers_results: | 
					
						
							|  |  |  |                 self.result_container.extend('answer', results) | 
					
						
							|  |  |  |             return self.result_container | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # init vars | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         requests = [] | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:44:11 +02:00
										 |  |  |         # increase number of searches | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         number_of_searches += 1 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # set default useragent | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         # user_agent = request.headers.get('User-Agent', '') | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         user_agent = gen_useragent() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         search_query = self.search_query | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |         # max of all selected engine timeout | 
					
						
							|  |  |  |         timeout_limit = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # start search-reqest for all selected engines | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         for selected_engine in search_query.engines: | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             if selected_engine['name'] not in engines: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             engine = engines[selected_engine['name']] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             # skip suspended engines | 
					
						
							| 
									
										
										
										
											2016-11-06 02:51:14 +01:00
										 |  |  |             if engine.suspend_end_time >= time(): | 
					
						
							|  |  |  |                 logger.debug('Engine currently suspended: %s', selected_engine['name']) | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # if paging is not supported, skip | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             if search_query.pageno > 1 and not engine.paging: | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # if search-language is set and engine does not | 
					
						
							|  |  |  |             # provide language-support, skip | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             if search_query.lang != 'all' and not engine.language_support: | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             # if time_range is not supported, skip | 
					
						
							|  |  |  |             if search_query.time_range and not engine.time_range_support: | 
					
						
							| 
									
										
										
										
											2016-07-17 18:42:30 +02:00
										 |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # set default request parameters | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             request_params = default_request_params() | 
					
						
							|  |  |  |             request_params['headers']['User-Agent'] = user_agent | 
					
						
							|  |  |  |             request_params['category'] = selected_engine['category'] | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |             request_params['started'] = start_time | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             request_params['pageno'] = search_query.pageno | 
					
						
							| 
									
										
										
										
											2015-06-03 17:16:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-25 22:02:18 +02:00
										 |  |  |             if hasattr(engine, 'language') and engine.language: | 
					
						
							| 
									
										
										
										
											2015-06-03 17:16:12 +02:00
										 |  |  |                 request_params['language'] = engine.language | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |                 request_params['language'] = search_query.lang | 
					
						
							| 
									
										
										
										
											2015-06-03 17:16:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-08 16:38:05 +02:00
										 |  |  |             # 0 = None, 1 = Moderate, 2 = Strict | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             request_params['safesearch'] = search_query.safesearch | 
					
						
							|  |  |  |             request_params['time_range'] = search_query.time_range | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |             # append request to list | 
					
						
							|  |  |  |             requests.append((selected_engine['name'], search_query.query.encode('utf-8'), request_params)) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |             # update timeout_limit | 
					
						
							|  |  |  |             timeout_limit = max(timeout_limit, engine.timeout) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-05 13:45:20 +01:00
										 |  |  |         if requests: | 
					
						
							|  |  |  |             # send all search-request | 
					
						
							|  |  |  |             search_multiple_requests(requests, self.result_container, timeout_limit - (time() - start_time)) | 
					
						
							|  |  |  |             start_new_thread(gc.collect, tuple()) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # return results, suggestions, answers and infoboxes | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         return self.result_container | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SearchWithPlugins(Search): | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  |     """Similar to the Search class but call the plugins.""" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     def __init__(self, search_query, request): | 
					
						
							|  |  |  |         super(SearchWithPlugins, self).__init__(search_query) | 
					
						
							|  |  |  |         self.request = request | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def search(self): | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  |         if plugins.call('pre_search', self.request, self): | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |             super(SearchWithPlugins, self).search() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  |         plugins.call('post_search', self.request, self) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         results = self.result_container.get_ordered_results() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for result in results: | 
					
						
							|  |  |  |             plugins.call('on_result', self.request, self, result) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         return self.result_container |