| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | '''
 | 
					
						
							|  |  |  | searx is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | searx is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | GNU Affero General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | 
					
						
							|  |  |  | '''
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | import threading | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  | import searx.poolrequests as requests_lib | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  | from time import time | 
					
						
							| 
									
										
										
										
											2015-08-02 20:29:19 +02:00
										 |  |  | from searx import settings | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | from searx.engines import ( | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |     categories, engines | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | from searx.languages import language_codes | 
					
						
							| 
									
										
										
										
											2015-01-31 23:11:45 +01:00
										 |  |  | from searx.utils import gen_useragent, get_blocked_engines | 
					
						
							| 
									
										
										
										
											2014-10-01 17:57:53 +02:00
										 |  |  | from searx.query import Query | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  | from searx.results import ResultContainer | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | from searx import logger | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | logger = logger.getChild('search') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | number_of_searches = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  | def search_request_wrapper(fn, url, engine_name, **kwargs): | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         return fn(url, **kwargs) | 
					
						
							| 
									
										
										
										
											2015-01-09 04:30:55 +01:00
										 |  |  |     except: | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  |         # increase errors stats | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  |         with threading.RLock(): | 
					
						
							|  |  |  |             engines[engine_name].stats['errors'] += 1 | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # print engine name and specific error message | 
					
						
							| 
									
										
										
										
											2015-01-09 04:30:55 +01:00
										 |  |  |         logger.exception('engine crash: {0}'.format(engine_name)) | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | def threaded_requests(requests): | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  |     timeout_limit = max(r[2]['timeout'] for r in requests) | 
					
						
							|  |  |  |     search_start = time() | 
					
						
							| 
									
										
										
										
											2014-12-19 13:59:41 +01:00
										 |  |  |     for fn, url, request_args, engine_name in requests: | 
					
						
							| 
									
										
										
										
											2014-12-18 10:11:56 +01:00
										 |  |  |         request_args['timeout'] = timeout_limit | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         th = threading.Thread( | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  |             target=search_request_wrapper, | 
					
						
							|  |  |  |             args=(fn, url, engine_name), | 
					
						
							| 
									
										
										
										
											2014-12-08 23:55:11 +01:00
										 |  |  |             kwargs=request_args, | 
					
						
							|  |  |  |             name='search_request', | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2014-12-19 13:59:41 +01:00
										 |  |  |         th._engine_name = engine_name | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         th.start() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for th in threading.enumerate(): | 
					
						
							| 
									
										
										
										
											2014-12-08 23:55:11 +01:00
										 |  |  |         if th.name == 'search_request': | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  |             remaining_time = max(0.0, timeout_limit - (time() - search_start)) | 
					
						
							|  |  |  |             th.join(remaining_time) | 
					
						
							|  |  |  |             if th.isAlive(): | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  |                 logger.warning('engine timeout: {0}'.format(th._engine_name)) | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | # get default reqest parameter | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | def default_request_params(): | 
					
						
							|  |  |  |     return { | 
					
						
							| 
									
										
										
										
											2014-12-29 21:31:04 +01:00
										 |  |  |         'method': 'GET', | 
					
						
							|  |  |  |         'headers': {}, | 
					
						
							|  |  |  |         'data': {}, | 
					
						
							|  |  |  |         'url': '', | 
					
						
							|  |  |  |         'cookies': {}, | 
					
						
							|  |  |  |         'verify': True | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | # create a callback wrapper for the search engine results | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  | def make_callback(engine_name, callback, params, result_container): | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |     # creating a callback wrapper for the search engine results | 
					
						
							|  |  |  |     def process_callback(response, **kwargs): | 
					
						
							| 
									
										
										
										
											2015-02-13 11:17:00 +01:00
										 |  |  |         # check if redirect comparing to the True value, | 
					
						
							|  |  |  |         # because resp can be a Mock object, and any attribut name returns something. | 
					
						
							|  |  |  |         if response.is_redirect is True: | 
					
						
							| 
									
										
										
										
											2015-02-13 13:02:37 +01:00
										 |  |  |             logger.debug('{0} redirect on: {1}'.format(engine_name, response)) | 
					
						
							| 
									
										
										
										
											2015-02-13 11:17:00 +01:00
										 |  |  |             return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         response.search_params = params | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-18 12:07:20 +01:00
										 |  |  |         search_duration = time() - params['started'] | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  |         # update stats with current page-load-time | 
					
						
							|  |  |  |         with threading.RLock(): | 
					
						
							|  |  |  |             engines[engine_name].stats['page_load_time'] += search_duration | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         timeout_overhead = 0.2  # seconds | 
					
						
							| 
									
										
										
										
											2014-12-18 12:07:20 +01:00
										 |  |  |         timeout_limit = engines[engine_name].timeout + timeout_overhead | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-18 12:07:20 +01:00
										 |  |  |         if search_duration > timeout_limit: | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  |             with threading.RLock(): | 
					
						
							|  |  |  |                 engines[engine_name].stats['errors'] += 1 | 
					
						
							| 
									
										
										
										
											2014-12-18 12:07:20 +01:00
										 |  |  |             return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # callback | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  |         search_results = callback(response) | 
					
						
							| 
									
										
										
										
											2014-09-22 21:40:40 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # add results | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         for result in search_results: | 
					
						
							|  |  |  |             result['engine'] = engine_name | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  |         result_container.extend(engine_name, search_results) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |     return process_callback | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | class Search(object): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """Search information container""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, request): | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # init vars | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         super(Search, self).__init__() | 
					
						
							|  |  |  |         self.query = None | 
					
						
							|  |  |  |         self.engines = [] | 
					
						
							|  |  |  |         self.categories = [] | 
					
						
							|  |  |  |         self.paging = False | 
					
						
							|  |  |  |         self.pageno = 1 | 
					
						
							|  |  |  |         self.lang = 'all' | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # set blocked engines | 
					
						
							| 
									
										
										
										
											2015-01-31 23:11:45 +01:00
										 |  |  |         self.blocked_engines = get_blocked_engines(engines, request.cookies) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  |         self.result_container = ResultContainer() | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         self.request_data = {} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # set specific language if set | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         if request.cookies.get('language')\ | 
					
						
							|  |  |  |            and request.cookies['language'] in (x[0] for x in language_codes): | 
					
						
							|  |  |  |             self.lang = request.cookies['language'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # set request method | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         if request.method == 'POST': | 
					
						
							|  |  |  |             self.request_data = request.form | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             self.request_data = request.args | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # TODO better exceptions | 
					
						
							|  |  |  |         if not self.request_data.get('q'): | 
					
						
							|  |  |  |             raise Exception('noquery') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # set pagenumber | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         pageno_param = self.request_data.get('pageno', '1') | 
					
						
							|  |  |  |         if not pageno_param.isdigit() or int(pageno_param) < 1: | 
					
						
							|  |  |  |             raise Exception('wrong pagenumber') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.pageno = int(pageno_param) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         # parse query, if tags are set, which change | 
					
						
							|  |  |  |         # the serch engine or search-language | 
					
						
							| 
									
										
										
										
											2014-10-11 12:46:12 +02:00
										 |  |  |         query_obj = Query(self.request_data['q'], self.blocked_engines) | 
					
						
							|  |  |  |         query_obj.parse_query() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # set query | 
					
						
							|  |  |  |         self.query = query_obj.getSearchQuery() | 
					
						
							| 
									
										
										
										
											2014-10-01 17:57:53 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # get last selected language in query, if possible | 
					
						
							|  |  |  |         # TODO support search with multible languages | 
					
						
							|  |  |  |         if len(query_obj.languages): | 
					
						
							|  |  |  |             self.lang = query_obj.languages[-1] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.engines = query_obj.engines | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.categories = [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         # if engines are calculated from query, | 
					
						
							|  |  |  |         # set categories by using that informations | 
					
						
							| 
									
										
										
										
											2015-01-03 02:31:23 +01:00
										 |  |  |         if self.engines and query_obj.specific: | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |             self.categories = list(set(engine['category'] | 
					
						
							| 
									
										
										
										
											2014-02-07 02:45:12 +01:00
										 |  |  |                                        for engine in self.engines)) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         # otherwise, using defined categories to | 
					
						
							|  |  |  |         # calculate which engines should be used | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2015-05-12 08:21:39 +02:00
										 |  |  |             # set categories/engines | 
					
						
							|  |  |  |             load_default_categories = True | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |             for pd_name, pd in self.request_data.items(): | 
					
						
							| 
									
										
										
										
											2015-05-12 08:21:39 +02:00
										 |  |  |                 if pd_name == 'categories': | 
					
						
							| 
									
										
										
										
											2015-09-01 15:14:02 +02:00
										 |  |  |                     self.categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories) | 
					
						
							| 
									
										
										
										
											2015-05-12 08:21:39 +02:00
										 |  |  |                 elif pd_name == 'engines': | 
					
						
							|  |  |  |                     pd_engines = [{'category': engines[engine].categories[0], | 
					
						
							|  |  |  |                                    'name': engine} | 
					
						
							| 
									
										
										
										
											2015-09-01 15:14:02 +02:00
										 |  |  |                                   for engine in map(unicode.strip, pd.split(',')) if engine in engines] | 
					
						
							| 
									
										
										
										
											2015-05-12 08:21:39 +02:00
										 |  |  |                     if pd_engines: | 
					
						
							|  |  |  |                         self.engines.extend(pd_engines) | 
					
						
							|  |  |  |                         load_default_categories = False | 
					
						
							|  |  |  |                 elif pd_name.startswith('category_'): | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |                     category = pd_name[9:] | 
					
						
							| 
									
										
										
										
											2015-01-15 15:48:50 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |                     # if category is not found in list, skip | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |                     if category not in categories: | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |                         continue | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-15 15:48:50 +01:00
										 |  |  |                     if pd != 'off': | 
					
						
							|  |  |  |                         # add category to list | 
					
						
							|  |  |  |                         self.categories.append(category) | 
					
						
							|  |  |  |                     elif category in self.categories: | 
					
						
							|  |  |  |                         # remove category from list if property is set to 'off' | 
					
						
							|  |  |  |                         self.categories.remove(category) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-12 08:21:39 +02:00
										 |  |  |             if not load_default_categories: | 
					
						
							| 
									
										
										
										
											2015-09-01 15:21:17 +02:00
										 |  |  |                 if not self.categories: | 
					
						
							|  |  |  |                     self.categories = list(set(engine['category'] | 
					
						
							|  |  |  |                                                for engine in self.engines)) | 
					
						
							| 
									
										
										
										
											2015-05-12 08:21:39 +02:00
										 |  |  |                 return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # if no category is specified for this search, | 
					
						
							|  |  |  |             # using user-defined default-configuration which | 
					
						
							|  |  |  |             # (is stored in cookie) | 
					
						
							| 
									
										
										
										
											2014-02-11 13:13:51 +01:00
										 |  |  |             if not self.categories: | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |                 cookie_categories = request.cookies.get('categories', '') | 
					
						
							|  |  |  |                 cookie_categories = cookie_categories.split(',') | 
					
						
							|  |  |  |                 for ccateg in cookie_categories: | 
					
						
							|  |  |  |                     if ccateg in categories: | 
					
						
							|  |  |  |                         self.categories.append(ccateg) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # if still no category is specified, using general | 
					
						
							|  |  |  |             # as default-category | 
					
						
							| 
									
										
										
										
											2014-02-11 13:13:51 +01:00
										 |  |  |             if not self.categories: | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |                 self.categories = ['general'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # using all engines for that search, which are | 
					
						
							|  |  |  |             # declared under the specific categories | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |             for categ in self.categories: | 
					
						
							|  |  |  |                 self.engines.extend({'category': categ, | 
					
						
							| 
									
										
										
										
											2015-02-03 18:37:38 +01:00
										 |  |  |                                      'name': engine.name} | 
					
						
							|  |  |  |                                     for engine in categories[categ] | 
					
						
							|  |  |  |                                     if (engine.name, categ) not in self.blocked_engines) | 
					
						
							| 
									
										
										
										
											2014-02-09 01:07:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |     # do search-request | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |     def search(self, request): | 
					
						
							|  |  |  |         global number_of_searches | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # init vars | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         requests = [] | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:44:11 +02:00
										 |  |  |         # increase number of searches | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         number_of_searches += 1 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # set default useragent | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         # user_agent = request.headers.get('User-Agent', '') | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         user_agent = gen_useragent() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # start search-reqest for all selected engines | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         for selected_engine in self.engines: | 
					
						
							|  |  |  |             if selected_engine['name'] not in engines: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             engine = engines[selected_engine['name']] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # if paging is not supported, skip | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             if self.pageno > 1 and not engine.paging: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # if search-language is set and engine does not | 
					
						
							|  |  |  |             # provide language-support, skip | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             if self.lang != 'all' and not engine.language_support: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # set default request parameters | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             request_params = default_request_params() | 
					
						
							|  |  |  |             request_params['headers']['User-Agent'] = user_agent | 
					
						
							|  |  |  |             request_params['category'] = selected_engine['category'] | 
					
						
							| 
									
										
										
										
											2014-12-18 10:11:56 +01:00
										 |  |  |             request_params['started'] = time() | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             request_params['pageno'] = self.pageno | 
					
						
							| 
									
										
										
										
											2015-06-03 17:16:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-08-25 22:02:18 +02:00
										 |  |  |             if hasattr(engine, 'language') and engine.language: | 
					
						
							| 
									
										
										
										
											2015-06-03 17:16:12 +02:00
										 |  |  |                 request_params['language'] = engine.language | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 request_params['language'] = self.lang | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-08 21:53:37 +01:00
										 |  |  |             try: | 
					
						
							|  |  |  |                 # 0 = None, 1 = Moderate, 2 = Strict | 
					
						
							| 
									
										
										
										
											2015-08-02 20:29:19 +02:00
										 |  |  |                 request_params['safesearch'] = int(request.cookies.get('safesearch')) | 
					
						
							| 
									
										
										
										
											2015-08-02 20:41:44 +02:00
										 |  |  |             except Exception: | 
					
						
							| 
									
										
										
										
											2015-08-02 20:29:19 +02:00
										 |  |  |                 request_params['safesearch'] = settings['search']['safe_search'] | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # update request parameters dependent on | 
					
						
							|  |  |  |             # search-engine (contained in engines folder) | 
					
						
							| 
									
										
										
										
											2014-12-18 10:11:56 +01:00
										 |  |  |             engine.request(self.query.encode('utf-8'), request_params) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             if request_params['url'] is None: | 
					
						
							|  |  |  |                 # TODO add support of offline engines | 
					
						
							|  |  |  |                 pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # create a callback wrapper for the search engine results | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             callback = make_callback( | 
					
						
							|  |  |  |                 selected_engine['name'], | 
					
						
							|  |  |  |                 engine.response, | 
					
						
							| 
									
										
										
										
											2015-10-03 17:26:07 +02:00
										 |  |  |                 request_params, | 
					
						
							|  |  |  |                 self.result_container) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # create dictionary which contain all | 
					
						
							|  |  |  |             # informations about the request | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             request_args = dict( | 
					
						
							|  |  |  |                 headers=request_params['headers'], | 
					
						
							|  |  |  |                 hooks=dict(response=callback), | 
					
						
							|  |  |  |                 cookies=request_params['cookies'], | 
					
						
							| 
									
										
										
										
											2014-12-15 19:37:58 +01:00
										 |  |  |                 timeout=engine.timeout, | 
					
						
							|  |  |  |                 verify=request_params['verify'] | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # specific type of request (GET or POST) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             if request_params['method'] == 'GET': | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |                 req = requests_lib.get | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             else: | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |                 req = requests_lib.post | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |                 request_args['data'] = request_params['data'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # ignoring empty urls | 
					
						
							|  |  |  |             if not request_params['url']: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # append request to list | 
					
						
							| 
									
										
										
										
											2014-12-29 21:31:04 +01:00
										 |  |  |             requests.append((req, request_params['url'], | 
					
						
							|  |  |  |                              request_args, | 
					
						
							|  |  |  |                              selected_engine['name'])) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-19 19:40:40 +01:00
										 |  |  |         if not requests: | 
					
						
							| 
									
										
										
										
											2015-03-08 23:30:31 +01:00
										 |  |  |             return self | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # send all search-request | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         threaded_requests(requests) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # return results, suggestions, answers and infoboxes | 
					
						
							| 
									
										
										
										
											2015-03-08 23:30:31 +01:00
										 |  |  |         return self |