| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | '''
 | 
					
						
							|  |  |  | searx is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | searx is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | GNU Affero General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | 
					
						
							|  |  |  | '''
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | import threading | 
					
						
							| 
									
										
										
										
											2014-09-22 22:55:51 +02:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2015-01-21 11:33:16 +01:00
										 |  |  | import searx.poolrequests as requests_lib | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | from itertools import izip_longest, chain | 
					
						
							|  |  |  | from operator import itemgetter | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  | from Queue import Queue | 
					
						
							|  |  |  | from time import time | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | from urlparse import urlparse, unquote | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | from searx.engines import ( | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |     categories, engines | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | ) | 
					
						
							|  |  |  | from searx.languages import language_codes | 
					
						
							| 
									
										
										
										
											2015-01-31 23:11:45 +01:00
										 |  |  | from searx.utils import gen_useragent, get_blocked_engines | 
					
						
							| 
									
										
										
										
											2014-10-01 17:57:53 +02:00
										 |  |  | from searx.query import Query | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | from searx import logger | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  | logger = logger.getChild('search') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | number_of_searches = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  | def search_request_wrapper(fn, url, engine_name, **kwargs): | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         return fn(url, **kwargs) | 
					
						
							| 
									
										
										
										
											2015-01-09 04:30:55 +01:00
										 |  |  |     except: | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  |         # increase errors stats | 
					
						
							|  |  |  |         engines[engine_name].stats['errors'] += 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # print engine name and specific error message | 
					
						
							| 
									
										
										
										
											2015-01-09 04:30:55 +01:00
										 |  |  |         logger.exception('engine crash: {0}'.format(engine_name)) | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | def threaded_requests(requests): | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  |     timeout_limit = max(r[2]['timeout'] for r in requests) | 
					
						
							|  |  |  |     search_start = time() | 
					
						
							| 
									
										
										
										
											2014-12-19 13:59:41 +01:00
										 |  |  |     for fn, url, request_args, engine_name in requests: | 
					
						
							| 
									
										
										
										
											2014-12-18 10:11:56 +01:00
										 |  |  |         request_args['timeout'] = timeout_limit | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         th = threading.Thread( | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  |             target=search_request_wrapper, | 
					
						
							|  |  |  |             args=(fn, url, engine_name), | 
					
						
							| 
									
										
										
										
											2014-12-08 23:55:11 +01:00
										 |  |  |             kwargs=request_args, | 
					
						
							|  |  |  |             name='search_request', | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2014-12-19 13:59:41 +01:00
										 |  |  |         th._engine_name = engine_name | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         th.start() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for th in threading.enumerate(): | 
					
						
							| 
									
										
										
										
											2014-12-08 23:55:11 +01:00
										 |  |  |         if th.name == 'search_request': | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  |             remaining_time = max(0.0, timeout_limit - (time() - search_start)) | 
					
						
							|  |  |  |             th.join(remaining_time) | 
					
						
							|  |  |  |             if th.isAlive(): | 
					
						
							| 
									
										
										
										
											2015-01-09 04:13:05 +01:00
										 |  |  |                 logger.warning('engine timeout: {0}'.format(th._engine_name)) | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | # get default reqest parameter | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | def default_request_params(): | 
					
						
							|  |  |  |     return { | 
					
						
							| 
									
										
										
										
											2014-12-29 21:31:04 +01:00
										 |  |  |         'method': 'GET', | 
					
						
							|  |  |  |         'headers': {}, | 
					
						
							|  |  |  |         'data': {}, | 
					
						
							|  |  |  |         'url': '', | 
					
						
							|  |  |  |         'cookies': {}, | 
					
						
							|  |  |  |         'verify': True | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | # create a callback wrapper for the search engine results | 
					
						
							| 
									
										
										
										
											2014-12-18 10:11:56 +01:00
										 |  |  | def make_callback(engine_name, results_queue, callback, params): | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |     # creating a callback wrapper for the search engine results | 
					
						
							|  |  |  |     def process_callback(response, **kwargs): | 
					
						
							|  |  |  |         response.search_params = params | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-18 12:07:20 +01:00
										 |  |  |         timeout_overhead = 0.2  # seconds | 
					
						
							|  |  |  |         search_duration = time() - params['started'] | 
					
						
							|  |  |  |         timeout_limit = engines[engine_name].timeout + timeout_overhead | 
					
						
							|  |  |  |         if search_duration > timeout_limit: | 
					
						
							|  |  |  |             engines[engine_name].stats['page_load_time'] += timeout_limit | 
					
						
							|  |  |  |             engines[engine_name].stats['errors'] += 1 | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # callback | 
					
						
							| 
									
										
										
										
											2014-12-19 20:01:01 +01:00
										 |  |  |         search_results = callback(response) | 
					
						
							| 
									
										
										
										
											2014-09-22 21:40:40 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # add results | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         for result in search_results: | 
					
						
							|  |  |  |             result['engine'] = engine_name | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-14 02:15:04 +01:00
										 |  |  |         results_queue.put_nowait((engine_name, search_results)) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # update stats with current page-load-time | 
					
						
							| 
									
										
										
										
											2014-12-18 10:11:56 +01:00
										 |  |  |         engines[engine_name].stats['page_load_time'] += search_duration | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |     return process_callback | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-22 22:55:51 +02:00
										 |  |  | # return the meaningful length of the content for a result | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | def content_result_len(content): | 
					
						
							|  |  |  |     if isinstance(content, basestring): | 
					
						
							|  |  |  |         content = re.sub('[,;:!?\./\\\\ ()-_]', '', content) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         return len(content) | 
					
						
							| 
									
										
										
										
											2014-09-22 22:55:51 +02:00
										 |  |  |     else: | 
					
						
							|  |  |  |         return 0 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | # score results and remove duplications | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | def score_results(results): | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |     # calculate scoring parameters | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |     flat_res = filter( | 
					
						
							|  |  |  |         None, chain.from_iterable(izip_longest(*results.values()))) | 
					
						
							|  |  |  |     flat_len = len(flat_res) | 
					
						
							|  |  |  |     engines_len = len(results) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |     results = [] | 
					
						
							| 
									
										
										
										
											2014-09-14 14:39:22 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # pass 1: deduplication + scoring | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |     for i, res in enumerate(flat_res): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         res['parsed_url'] = urlparse(res['url']) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         res['host'] = res['parsed_url'].netloc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if res['host'].startswith('www.'): | 
					
						
							|  |  |  |             res['host'] = res['host'].replace('www.', '', 1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         res['engines'] = [res['engine']] | 
					
						
							| 
									
										
										
										
											2014-09-22 23:39:21 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         weight = 1.0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-22 23:39:21 +02:00
										 |  |  |         # strip multiple spaces and cariage returns from content | 
					
						
							| 
									
										
										
										
											2014-11-01 17:32:26 +01:00
										 |  |  |         if res.get('content'): | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             res['content'] = re.sub(' +', ' ', | 
					
						
							|  |  |  |                                     res['content'].strip().replace('\n', '')) | 
					
						
							| 
									
										
										
										
											2014-09-22 23:39:21 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # get weight of this engine if possible | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         if hasattr(engines[res['engine']], 'weight'): | 
					
						
							|  |  |  |             weight = float(engines[res['engine']].weight) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # calculate score for that engine | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         score = int((flat_len - i) / engines_len) * weight + 1 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # check for duplicates | 
					
						
							| 
									
										
										
										
											2014-09-22 23:39:21 +02:00
										 |  |  |         duplicated = False | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         for new_res in results: | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # remove / from the end of the url if required | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             p1 = res['parsed_url'].path[:-1]\ | 
					
						
							|  |  |  |                 if res['parsed_url'].path.endswith('/')\ | 
					
						
							|  |  |  |                 else res['parsed_url'].path | 
					
						
							|  |  |  |             p2 = new_res['parsed_url'].path[:-1]\ | 
					
						
							|  |  |  |                 if new_res['parsed_url'].path.endswith('/')\ | 
					
						
							|  |  |  |                 else new_res['parsed_url'].path | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             # check if that result is a duplicate | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             if res['host'] == new_res['host'] and\ | 
					
						
							|  |  |  |                unquote(p1) == unquote(p2) and\ | 
					
						
							|  |  |  |                res['parsed_url'].query == new_res['parsed_url'].query and\ | 
					
						
							|  |  |  |                res.get('template') == new_res.get('template'): | 
					
						
							|  |  |  |                 duplicated = new_res | 
					
						
							|  |  |  |                 break | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # merge duplicates together | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         if duplicated: | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # using content with more text | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             if content_result_len(res.get('content', '')) >\ | 
					
						
							|  |  |  |                     content_result_len(duplicated.get('content', '')): | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |                 duplicated['content'] = res['content'] | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             # increase result-score | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             duplicated['score'] += score | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             # add engine to list of result-engines | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             duplicated['engines'].append(res['engine']) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             # using https if possible | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             if duplicated['parsed_url'].scheme == 'https': | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             elif res['parsed_url'].scheme == 'https': | 
					
						
							|  |  |  |                 duplicated['url'] = res['parsed_url'].geturl() | 
					
						
							|  |  |  |                 duplicated['parsed_url'] = res['parsed_url'] | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # if there is no duplicate found, append result | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         else: | 
					
						
							|  |  |  |             res['score'] = score | 
					
						
							|  |  |  |             results.append(res) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-14 14:39:22 +02:00
										 |  |  |     results = sorted(results, key=itemgetter('score'), reverse=True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # pass 2 : group results by category and template | 
					
						
							|  |  |  |     gresults = [] | 
					
						
							|  |  |  |     categoryPositions = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for i, res in enumerate(results): | 
					
						
							|  |  |  |         # FIXME : handle more than one category per engine | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         category = engines[res['engine']].categories[0] + ':' + ''\ | 
					
						
							|  |  |  |             if 'template' not in res\ | 
					
						
							|  |  |  |             else res['template'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         current = None if category not in categoryPositions\ | 
					
						
							|  |  |  |             else categoryPositions[category] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # group with previous results using the same category | 
					
						
							|  |  |  |         # if the group can accept more result and is not too far | 
					
						
							|  |  |  |         # from the current position | 
					
						
							|  |  |  |         if current is not None and (current['count'] > 0)\ | 
					
						
							|  |  |  |                 and (len(gresults) - current['index'] < 20): | 
					
						
							|  |  |  |             # group with the previous results using | 
					
						
							|  |  |  |             # the same category with this one | 
					
						
							| 
									
										
										
										
											2014-09-14 14:39:22 +02:00
										 |  |  |             index = current['index'] | 
					
						
							|  |  |  |             gresults.insert(index, res) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # update every index after the current one | 
					
						
							|  |  |  |             # (including the current one) | 
					
						
							| 
									
										
										
										
											2014-09-14 14:39:22 +02:00
										 |  |  |             for k in categoryPositions: | 
					
						
							|  |  |  |                 v = categoryPositions[k]['index'] | 
					
						
							|  |  |  |                 if v >= index: | 
					
						
							|  |  |  |                     categoryPositions[k]['index'] = v+1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # update this category | 
					
						
							|  |  |  |             current['count'] -= 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # same category | 
					
						
							|  |  |  |             gresults.append(res) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # update categoryIndex | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             categoryPositions[category] = {'index': len(gresults), 'count': 8} | 
					
						
							| 
									
										
										
										
											2014-09-14 14:39:22 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # return gresults | 
					
						
							|  |  |  |     return gresults | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | def merge_two_infoboxes(infobox1, infobox2): | 
					
						
							|  |  |  |     if 'urls' in infobox2: | 
					
						
							|  |  |  |         urls1 = infobox1.get('urls', None) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         if urls1 is None: | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |             urls1 = [] | 
					
						
							|  |  |  |             infobox1.set('urls', urls1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         urlSet = set() | 
					
						
							|  |  |  |         for url in infobox1.get('urls', []): | 
					
						
							|  |  |  |             urlSet.add(url.get('url', None)) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         for url in infobox2.get('urls', []): | 
					
						
							|  |  |  |             if url.get('url', None) not in urlSet: | 
					
						
							|  |  |  |                 urls1.append(url) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if 'attributes' in infobox2: | 
					
						
							|  |  |  |         attributes1 = infobox1.get('attributes', None) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         if attributes1 is None: | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |             attributes1 = [] | 
					
						
							|  |  |  |             infobox1.set('attributes', attributes1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         attributeSet = set() | 
					
						
							|  |  |  |         for attribute in infobox1.get('attributes', []): | 
					
						
							|  |  |  |             if attribute.get('label', None) not in attributeSet: | 
					
						
							|  |  |  |                 attributeSet.add(attribute.get('label', None)) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         for attribute in infobox2.get('attributes', []): | 
					
						
							|  |  |  |             attributes1.append(attribute) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if 'content' in infobox2: | 
					
						
							|  |  |  |         content1 = infobox1.get('content', None) | 
					
						
							|  |  |  |         content2 = infobox2.get('content', '') | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         if content1 is not None: | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |             if content_result_len(content2) > content_result_len(content1): | 
					
						
							|  |  |  |                 infobox1['content'] = content2 | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             infobox1.set('content', content2) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def merge_infoboxes(infoboxes): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  |     infoboxes_id = {} | 
					
						
							|  |  |  |     for infobox in infoboxes: | 
					
						
							|  |  |  |         add_infobox = True | 
					
						
							|  |  |  |         infobox_id = infobox.get('id', None) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         if infobox_id is not None: | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |             existingIndex = infoboxes_id.get(infobox_id, None) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             if existingIndex is not None: | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |                 merge_two_infoboxes(results[existingIndex], infobox) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |                 add_infobox = False | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         if add_infobox: | 
					
						
							|  |  |  |             results.append(infobox) | 
					
						
							|  |  |  |             infoboxes_id[infobox_id] = len(results)-1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return results | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | class Search(object): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """Search information container""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, request): | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # init vars | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         super(Search, self).__init__() | 
					
						
							|  |  |  |         self.query = None | 
					
						
							|  |  |  |         self.engines = [] | 
					
						
							|  |  |  |         self.categories = [] | 
					
						
							|  |  |  |         self.paging = False | 
					
						
							|  |  |  |         self.pageno = 1 | 
					
						
							|  |  |  |         self.lang = 'all' | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # set blocked engines | 
					
						
							| 
									
										
										
										
											2015-01-31 23:11:45 +01:00
										 |  |  |         self.blocked_engines = get_blocked_engines(engines, request.cookies) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         self.results = [] | 
					
						
							|  |  |  |         self.suggestions = [] | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         self.answers = [] | 
					
						
							|  |  |  |         self.infoboxes = [] | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         self.request_data = {} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # set specific language if set | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         if request.cookies.get('language')\ | 
					
						
							|  |  |  |            and request.cookies['language'] in (x[0] for x in language_codes): | 
					
						
							|  |  |  |             self.lang = request.cookies['language'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # set request method | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         if request.method == 'POST': | 
					
						
							|  |  |  |             self.request_data = request.form | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             self.request_data = request.args | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # TODO better exceptions | 
					
						
							|  |  |  |         if not self.request_data.get('q'): | 
					
						
							|  |  |  |             raise Exception('noquery') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # set pagenumber | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         pageno_param = self.request_data.get('pageno', '1') | 
					
						
							|  |  |  |         if not pageno_param.isdigit() or int(pageno_param) < 1: | 
					
						
							|  |  |  |             raise Exception('wrong pagenumber') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.pageno = int(pageno_param) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         # parse query, if tags are set, which change | 
					
						
							|  |  |  |         # the serch engine or search-language | 
					
						
							| 
									
										
										
										
											2014-10-11 12:46:12 +02:00
										 |  |  |         query_obj = Query(self.request_data['q'], self.blocked_engines) | 
					
						
							|  |  |  |         query_obj.parse_query() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # set query | 
					
						
							|  |  |  |         self.query = query_obj.getSearchQuery() | 
					
						
							| 
									
										
										
										
											2014-10-01 17:57:53 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # get last selected language in query, if possible | 
					
						
							|  |  |  |         # TODO support search with multible languages | 
					
						
							|  |  |  |         if len(query_obj.languages): | 
					
						
							|  |  |  |             self.lang = query_obj.languages[-1] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.engines = query_obj.engines | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         self.categories = [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         # if engines are calculated from query, | 
					
						
							|  |  |  |         # set categories by using that informations | 
					
						
							| 
									
										
										
										
											2015-01-03 02:31:23 +01:00
										 |  |  |         if self.engines and query_obj.specific: | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |             self.categories = list(set(engine['category'] | 
					
						
							| 
									
										
										
										
											2014-02-07 02:45:12 +01:00
										 |  |  |                                        for engine in self.engines)) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         # otherwise, using defined categories to | 
					
						
							|  |  |  |         # calculate which engines should be used | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # set used categories | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |             for pd_name, pd in self.request_data.items(): | 
					
						
							|  |  |  |                 if pd_name.startswith('category_'): | 
					
						
							|  |  |  |                     category = pd_name[9:] | 
					
						
							| 
									
										
										
										
											2015-01-15 15:48:50 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |                     # if category is not found in list, skip | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |                     if category not in categories: | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |                         continue | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-15 15:48:50 +01:00
										 |  |  |                     if pd != 'off': | 
					
						
							|  |  |  |                         # add category to list | 
					
						
							|  |  |  |                         self.categories.append(category) | 
					
						
							|  |  |  |                     elif category in self.categories: | 
					
						
							|  |  |  |                         # remove category from list if property is set to 'off' | 
					
						
							|  |  |  |                         self.categories.remove(category) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # if no category is specified for this search, | 
					
						
							|  |  |  |             # using user-defined default-configuration which | 
					
						
							|  |  |  |             # (is stored in cookie) | 
					
						
							| 
									
										
										
										
											2014-02-11 13:13:51 +01:00
										 |  |  |             if not self.categories: | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |                 cookie_categories = request.cookies.get('categories', '') | 
					
						
							|  |  |  |                 cookie_categories = cookie_categories.split(',') | 
					
						
							|  |  |  |                 for ccateg in cookie_categories: | 
					
						
							|  |  |  |                     if ccateg in categories: | 
					
						
							|  |  |  |                         self.categories.append(ccateg) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # if still no category is specified, using general | 
					
						
							|  |  |  |             # as default-category | 
					
						
							| 
									
										
										
										
											2014-02-11 13:13:51 +01:00
										 |  |  |             if not self.categories: | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |                 self.categories = ['general'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # using all engines for that search, which are | 
					
						
							|  |  |  |             # declared under the specific categories | 
					
						
							| 
									
										
										
										
											2014-02-07 01:19:07 +01:00
										 |  |  |             for categ in self.categories: | 
					
						
							|  |  |  |                 self.engines.extend({'category': categ, | 
					
						
							| 
									
										
										
										
											2015-02-03 18:37:38 +01:00
										 |  |  |                                      'name': engine.name} | 
					
						
							|  |  |  |                                     for engine in categories[categ] | 
					
						
							|  |  |  |                                     if (engine.name, categ) not in self.blocked_engines) | 
					
						
							| 
									
										
										
										
											2014-02-09 01:07:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |     # do search-request | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |     def search(self, request): | 
					
						
							|  |  |  |         global number_of_searches | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # init vars | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         requests = [] | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  |         results_queue = Queue() | 
					
						
							| 
									
										
										
										
											2014-12-19 19:40:40 +01:00
										 |  |  |         results = {} | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         suggestions = set() | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         answers = set() | 
					
						
							|  |  |  |         infoboxes = [] | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:44:11 +02:00
										 |  |  |         # increase number of searches | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         number_of_searches += 1 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # set default useragent | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |         # user_agent = request.headers.get('User-Agent', '') | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         user_agent = gen_useragent() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # start search-reqest for all selected engines | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         for selected_engine in self.engines: | 
					
						
							|  |  |  |             if selected_engine['name'] not in engines: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             engine = engines[selected_engine['name']] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # if paging is not supported, skip | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             if self.pageno > 1 and not engine.paging: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # if search-language is set and engine does not | 
					
						
							|  |  |  |             # provide language-support, skip | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             if self.lang != 'all' and not engine.language_support: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # set default request parameters | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             request_params = default_request_params() | 
					
						
							|  |  |  |             request_params['headers']['User-Agent'] = user_agent | 
					
						
							|  |  |  |             request_params['category'] = selected_engine['category'] | 
					
						
							| 
									
										
										
										
											2014-12-18 10:11:56 +01:00
										 |  |  |             request_params['started'] = time() | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             request_params['pageno'] = self.pageno | 
					
						
							|  |  |  |             request_params['language'] = self.lang | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # update request parameters dependent on | 
					
						
							|  |  |  |             # search-engine (contained in engines folder) | 
					
						
							| 
									
										
										
										
											2014-12-18 10:11:56 +01:00
										 |  |  |             engine.request(self.query.encode('utf-8'), request_params) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             if request_params['url'] is None: | 
					
						
							|  |  |  |                 # TODO add support of offline engines | 
					
						
							|  |  |  |                 pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # create a callback wrapper for the search engine results | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             callback = make_callback( | 
					
						
							|  |  |  |                 selected_engine['name'], | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  |                 results_queue, | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |                 engine.response, | 
					
						
							| 
									
										
										
										
											2014-12-18 10:11:56 +01:00
										 |  |  |                 request_params) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # create dictionary which contain all | 
					
						
							|  |  |  |             # informations about the request | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             request_args = dict( | 
					
						
							|  |  |  |                 headers=request_params['headers'], | 
					
						
							|  |  |  |                 hooks=dict(response=callback), | 
					
						
							|  |  |  |                 cookies=request_params['cookies'], | 
					
						
							| 
									
										
										
										
											2014-12-15 19:37:58 +01:00
										 |  |  |                 timeout=engine.timeout, | 
					
						
							|  |  |  |                 verify=request_params['verify'] | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # specific type of request (GET or POST) | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             if request_params['method'] == 'GET': | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |                 req = requests_lib.get | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |             else: | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |                 req = requests_lib.post | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |                 request_args['data'] = request_params['data'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             # ignoring empty urls | 
					
						
							|  |  |  |             if not request_params['url']: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |             # append request to list | 
					
						
							| 
									
										
										
										
											2014-12-29 21:31:04 +01:00
										 |  |  |             requests.append((req, request_params['url'], | 
					
						
							|  |  |  |                              request_args, | 
					
						
							|  |  |  |                              selected_engine['name'])) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-19 19:40:40 +01:00
										 |  |  |         if not requests: | 
					
						
							|  |  |  |             return results, suggestions, answers, infoboxes | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # send all search-request | 
					
						
							| 
									
										
										
										
											2014-12-05 19:24:11 +01:00
										 |  |  |         threaded_requests(requests) | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  |         while not results_queue.empty(): | 
					
						
							|  |  |  |             engine_name, engine_results = results_queue.get_nowait() | 
					
						
							| 
									
										
										
										
											2014-12-14 02:15:04 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |             # TODO type checks | 
					
						
							|  |  |  |             [suggestions.add(x['suggestion']) | 
					
						
							|  |  |  |              for x in list(engine_results) | 
					
						
							|  |  |  |              if 'suggestion' in x | 
					
						
							|  |  |  |              and engine_results.remove(x) is None] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             [answers.add(x['answer']) | 
					
						
							|  |  |  |              for x in list(engine_results) | 
					
						
							|  |  |  |              if 'answer' in x | 
					
						
							|  |  |  |              and engine_results.remove(x) is None] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             infoboxes.extend(x for x in list(engine_results) | 
					
						
							|  |  |  |                              if 'infobox' in x | 
					
						
							|  |  |  |                              and engine_results.remove(x) is None) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-14 01:18:01 +01:00
										 |  |  |             results[engine_name] = engine_results | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # update engine-specific stats | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         for engine_name, engine_results in results.items(): | 
					
						
							|  |  |  |             engines[engine_name].stats['search_count'] += 1 | 
					
						
							|  |  |  |             engines[engine_name].stats['result_count'] += len(engine_results) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # score results and remove duplications | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         results = score_results(results) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # merge infoboxes according to their ids | 
					
						
							|  |  |  |         infoboxes = merge_infoboxes(infoboxes) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-13 18:25:25 +02:00
										 |  |  |         # update engine stats, using calculated score | 
					
						
							| 
									
										
										
										
											2014-07-07 13:59:27 +02:00
										 |  |  |         for result in results: | 
					
						
							|  |  |  |             for res_engine in result['engines']: | 
					
						
							|  |  |  |                 engines[result['engine']]\ | 
					
						
							|  |  |  |                     .stats['score_count'] += result['score'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # return results, suggestions, answers and infoboxes | 
					
						
							|  |  |  |         return results, suggestions, answers, infoboxes |