Merge branch 'comments' of https://github.com/pointhi/searx
Conflicts: searx/search.py
This commit is contained in:
		
						commit
						bd2db71fa6
					
				| @ -1,3 +1,20 @@ | |||||||
|  | ''' | ||||||
|  | searx is free software: you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU Affero General Public License as published by | ||||||
|  | the Free Software Foundation, either version 3 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  | 
 | ||||||
|  | searx is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU Affero General Public License for more details. | ||||||
|  | 
 | ||||||
|  | You should have received a copy of the GNU Affero General Public License | ||||||
|  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||||
|  | 
 | ||||||
|  | (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | ||||||
|  | ''' | ||||||
|  | 
 | ||||||
| from os import environ | from os import environ | ||||||
| from os.path import realpath, dirname, join, abspath | from os.path import realpath, dirname, join, abspath | ||||||
| try: | try: | ||||||
| @ -10,11 +27,14 @@ except: | |||||||
| searx_dir = abspath(dirname(__file__)) | searx_dir = abspath(dirname(__file__)) | ||||||
| engine_dir = dirname(realpath(__file__)) | engine_dir = dirname(realpath(__file__)) | ||||||
| 
 | 
 | ||||||
|  | # if possible set path to settings using the enviroment variable SEARX_SETTINGS_PATH | ||||||
| if 'SEARX_SETTINGS_PATH' in environ: | if 'SEARX_SETTINGS_PATH' in environ: | ||||||
|     settings_path = environ['SEARX_SETTINGS_PATH'] |     settings_path = environ['SEARX_SETTINGS_PATH'] | ||||||
|  | # otherwise using default path | ||||||
| else: | else: | ||||||
|     settings_path = join(searx_dir, 'settings.yml') |     settings_path = join(searx_dir, 'settings.yml') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # load settings | ||||||
| with open(settings_path) as settings_yaml: | with open(settings_path) as settings_yaml: | ||||||
|     settings = load(settings_yaml) |     settings = load(settings_yaml) | ||||||
|  | |||||||
| @ -1,3 +1,21 @@ | |||||||
|  | ''' | ||||||
|  | searx is free software: you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU Affero General Public License as published by | ||||||
|  | the Free Software Foundation, either version 3 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  | 
 | ||||||
|  | searx is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU Affero General Public License for more details. | ||||||
|  | 
 | ||||||
|  | You should have received a copy of the GNU Affero General Public License | ||||||
|  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||||
|  | 
 | ||||||
|  | (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | ||||||
|  | ''' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| from lxml import etree | from lxml import etree | ||||||
| from requests import get | from requests import get | ||||||
| from json import loads | from json import loads | ||||||
| @ -22,7 +40,7 @@ def dbpedia(query): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def duckduckgo(query): | def duckduckgo(query): | ||||||
|     # wikipedia autocompleter |     # duckduckgo autocompleter | ||||||
|     url = 'https://ac.duckduckgo.com/ac/?{0}&type=list' |     url = 'https://ac.duckduckgo.com/ac/?{0}&type=list' | ||||||
| 
 | 
 | ||||||
|     resp = loads(get(url.format(urlencode(dict(q=query)))).text) |     resp = loads(get(url.format(urlencode(dict(q=query)))).text) | ||||||
|  | |||||||
| @ -1,3 +1,21 @@ | |||||||
|  | ''' | ||||||
|  | searx is free software: you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU Affero General Public License as published by | ||||||
|  | the Free Software Foundation, either version 3 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  | 
 | ||||||
|  | searx is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU Affero General Public License for more details. | ||||||
|  | 
 | ||||||
|  | You should have received a copy of the GNU Affero General Public License | ||||||
|  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||||
|  | 
 | ||||||
|  | (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | ||||||
|  | ''' | ||||||
|  | 
 | ||||||
|  | # list of language codes | ||||||
| language_codes = ( | language_codes = ( | ||||||
|     ("ar_XA", "Arabic", "Arabia"), |     ("ar_XA", "Arabic", "Arabia"), | ||||||
|     ("bg_BG", "Bulgarian", "Bulgaria"), |     ("bg_BG", "Bulgarian", "Bulgaria"), | ||||||
|  | |||||||
							
								
								
									
										116
									
								
								searx/search.py
									
									
									
									
									
								
							
							
						
						
									
										116
									
								
								searx/search.py
									
									
									
									
									
								
							| @ -1,3 +1,20 @@ | |||||||
|  | ''' | ||||||
|  | searx is free software: you can redistribute it and/or modify | ||||||
|  | it under the terms of the GNU Affero General Public License as published by | ||||||
|  | the Free Software Foundation, either version 3 of the License, or | ||||||
|  | (at your option) any later version. | ||||||
|  | 
 | ||||||
|  | searx is distributed in the hope that it will be useful, | ||||||
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||||
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||||
|  | GNU Affero General Public License for more details. | ||||||
|  | 
 | ||||||
|  | You should have received a copy of the GNU Affero General Public License | ||||||
|  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||||
|  | 
 | ||||||
|  | (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | ||||||
|  | ''' | ||||||
|  | 
 | ||||||
| import grequests | import grequests | ||||||
| from itertools import izip_longest, chain | from itertools import izip_longest, chain | ||||||
| from datetime import datetime | from datetime import datetime | ||||||
| @ -9,45 +26,65 @@ from searx.engines import ( | |||||||
| from searx.languages import language_codes | from searx.languages import language_codes | ||||||
| from searx.utils import gen_useragent | from searx.utils import gen_useragent | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
| number_of_searches = 0 | number_of_searches = 0 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # get default reqest parameter | ||||||
| def default_request_params(): | def default_request_params(): | ||||||
|     return { |     return { | ||||||
|         'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}} |         'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}} | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # create a callback wrapper for the search engine results | ||||||
| def make_callback(engine_name, results, suggestions, callback, params): | def make_callback(engine_name, results, suggestions, callback, params): | ||||||
|  | 
 | ||||||
|     # creating a callback wrapper for the search engine results |     # creating a callback wrapper for the search engine results | ||||||
|     def process_callback(response, **kwargs): |     def process_callback(response, **kwargs): | ||||||
|         cb_res = [] |         cb_res = [] | ||||||
|         response.search_params = params |         response.search_params = params | ||||||
|  | 
 | ||||||
|  |         # update stats with current page-load-time | ||||||
|         engines[engine_name].stats['page_load_time'] += \ |         engines[engine_name].stats['page_load_time'] += \ | ||||||
|             (datetime.now() - params['started']).total_seconds() |             (datetime.now() - params['started']).total_seconds() | ||||||
|  | 
 | ||||||
|         try: |         try: | ||||||
|             search_results = callback(response) |             search_results = callback(response) | ||||||
|         except Exception, e: |         except Exception, e: | ||||||
|  |             # increase errors stats | ||||||
|             engines[engine_name].stats['errors'] += 1 |             engines[engine_name].stats['errors'] += 1 | ||||||
|             results[engine_name] = cb_res |             results[engine_name] = cb_res | ||||||
|  | 
 | ||||||
|  |             # print engine name and specific error message | ||||||
|             print '[E] Error with engine "{0}":\n\t{1}'.format( |             print '[E] Error with engine "{0}":\n\t{1}'.format( | ||||||
|                 engine_name, str(e)) |                 engine_name, str(e)) | ||||||
|             return |             return | ||||||
|  | 
 | ||||||
|         for result in search_results: |         for result in search_results: | ||||||
|             result['engine'] = engine_name |             result['engine'] = engine_name | ||||||
|  | 
 | ||||||
|  |             # if it is a suggestion, add it to list of suggestions | ||||||
|             if 'suggestion' in result: |             if 'suggestion' in result: | ||||||
|                 # TODO type checks |                 # TODO type checks | ||||||
|                 suggestions.add(result['suggestion']) |                 suggestions.add(result['suggestion']) | ||||||
|                 continue |                 continue | ||||||
|  | 
 | ||||||
|  |             # append result | ||||||
|             cb_res.append(result) |             cb_res.append(result) | ||||||
|  | 
 | ||||||
|         results[engine_name] = cb_res |         results[engine_name] = cb_res | ||||||
|  | 
 | ||||||
|     return process_callback |     return process_callback | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # score results and remove duplications | ||||||
| def score_results(results): | def score_results(results): | ||||||
|  |     # calculate scoring parameters | ||||||
|     flat_res = filter( |     flat_res = filter( | ||||||
|         None, chain.from_iterable(izip_longest(*results.values()))) |         None, chain.from_iterable(izip_longest(*results.values()))) | ||||||
|     flat_len = len(flat_res) |     flat_len = len(flat_res) | ||||||
|     engines_len = len(results) |     engines_len = len(results) | ||||||
|  | 
 | ||||||
|     results = [] |     results = [] | ||||||
| 
 | 
 | ||||||
|     # pass 1: deduplication + scoring |     # pass 1: deduplication + scoring | ||||||
| @ -63,34 +100,53 @@ def score_results(results): | |||||||
|         res['engines'] = [res['engine']] |         res['engines'] = [res['engine']] | ||||||
|         weight = 1.0 |         weight = 1.0 | ||||||
| 
 | 
 | ||||||
|  |         # get weight of this engine if possible | ||||||
|         if hasattr(engines[res['engine']], 'weight'): |         if hasattr(engines[res['engine']], 'weight'): | ||||||
|             weight = float(engines[res['engine']].weight) |             weight = float(engines[res['engine']].weight) | ||||||
| 
 | 
 | ||||||
|  |         # calculate score for that engine | ||||||
|         score = int((flat_len - i) / engines_len) * weight + 1 |         score = int((flat_len - i) / engines_len) * weight + 1 | ||||||
|  | 
 | ||||||
|         duplicated = False |         duplicated = False | ||||||
| 
 | 
 | ||||||
|  |         # check for duplicates | ||||||
|         for new_res in results: |         for new_res in results: | ||||||
|  |             # remove / from the end of the url if required | ||||||
|             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa |             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa | ||||||
|             p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa |             p2 = new_res['parsed_url'].path[:-1] if new_res['parsed_url'].path.endswith('/') else new_res['parsed_url'].path  # noqa | ||||||
|  | 
 | ||||||
|  |             # check if that result is a duplicate | ||||||
|             if res['host'] == new_res['host'] and\ |             if res['host'] == new_res['host'] and\ | ||||||
|                unquote(p1) == unquote(p2) and\ |                unquote(p1) == unquote(p2) and\ | ||||||
|                res['parsed_url'].query == new_res['parsed_url'].query and\ |                res['parsed_url'].query == new_res['parsed_url'].query and\ | ||||||
|                res.get('template') == new_res.get('template'): |                res.get('template') == new_res.get('template'): | ||||||
|                 duplicated = new_res |                 duplicated = new_res | ||||||
|                 break |                 break | ||||||
|  | 
 | ||||||
|  |         # merge duplicates together | ||||||
|         if duplicated: |         if duplicated: | ||||||
|  |             # using content with more text | ||||||
|             if res.get('content') > duplicated.get('content'): |             if res.get('content') > duplicated.get('content'): | ||||||
|                 duplicated['content'] = res['content'] |                 duplicated['content'] = res['content'] | ||||||
|  | 
 | ||||||
|  |             # increase result-score | ||||||
|             duplicated['score'] += score |             duplicated['score'] += score | ||||||
|  | 
 | ||||||
|  |             # add engine to list of result-engines | ||||||
|             duplicated['engines'].append(res['engine']) |             duplicated['engines'].append(res['engine']) | ||||||
|  | 
 | ||||||
|  |             # using https if possible | ||||||
|             if duplicated['parsed_url'].scheme == 'https': |             if duplicated['parsed_url'].scheme == 'https': | ||||||
|                 continue |                 continue | ||||||
|             elif res['parsed_url'].scheme == 'https': |             elif res['parsed_url'].scheme == 'https': | ||||||
|                 duplicated['url'] = res['parsed_url'].geturl() |                 duplicated['url'] = res['parsed_url'].geturl() | ||||||
|                 duplicated['parsed_url'] = res['parsed_url'] |                 duplicated['parsed_url'] = res['parsed_url'] | ||||||
|  | 
 | ||||||
|  |         # if there is no duplicate found, append result | ||||||
|         else: |         else: | ||||||
|             res['score'] = score |             res['score'] = score | ||||||
|             results.append(res) |             results.append(res) | ||||||
|  | 
 | ||||||
|     results = sorted(results, key=itemgetter('score'), reverse=True) |     results = sorted(results, key=itemgetter('score'), reverse=True) | ||||||
| 
 | 
 | ||||||
|     # pass 2 : group results by category and template |     # pass 2 : group results by category and template | ||||||
| @ -134,6 +190,7 @@ class Search(object): | |||||||
|     """Search information container""" |     """Search information container""" | ||||||
| 
 | 
 | ||||||
|     def __init__(self, request): |     def __init__(self, request): | ||||||
|  |         # init vars | ||||||
|         super(Search, self).__init__() |         super(Search, self).__init__() | ||||||
|         self.query = None |         self.query = None | ||||||
|         self.engines = [] |         self.engines = [] | ||||||
| @ -141,18 +198,23 @@ class Search(object): | |||||||
|         self.paging = False |         self.paging = False | ||||||
|         self.pageno = 1 |         self.pageno = 1 | ||||||
|         self.lang = 'all' |         self.lang = 'all' | ||||||
|  | 
 | ||||||
|  |         # set blocked engines | ||||||
|         if request.cookies.get('blocked_engines'): |         if request.cookies.get('blocked_engines'): | ||||||
|             self.blocked_engines = request.cookies['blocked_engines'].split(',')  # noqa |             self.blocked_engines = request.cookies['blocked_engines'].split(',')  # noqa | ||||||
|         else: |         else: | ||||||
|             self.blocked_engines = [] |             self.blocked_engines = [] | ||||||
|  | 
 | ||||||
|         self.results = [] |         self.results = [] | ||||||
|         self.suggestions = [] |         self.suggestions = [] | ||||||
|         self.request_data = {} |         self.request_data = {} | ||||||
| 
 | 
 | ||||||
|  |         # set specific language if set | ||||||
|         if request.cookies.get('language')\ |         if request.cookies.get('language')\ | ||||||
|            and request.cookies['language'] in (x[0] for x in language_codes): |            and request.cookies['language'] in (x[0] for x in language_codes): | ||||||
|             self.lang = request.cookies['language'] |             self.lang = request.cookies['language'] | ||||||
| 
 | 
 | ||||||
|  |         # set request method | ||||||
|         if request.method == 'POST': |         if request.method == 'POST': | ||||||
|             self.request_data = request.form |             self.request_data = request.form | ||||||
|         else: |         else: | ||||||
| @ -162,51 +224,72 @@ class Search(object): | |||||||
|         if not self.request_data.get('q'): |         if not self.request_data.get('q'): | ||||||
|             raise Exception('noquery') |             raise Exception('noquery') | ||||||
| 
 | 
 | ||||||
|  |         # set query | ||||||
|         self.query = self.request_data['q'] |         self.query = self.request_data['q'] | ||||||
| 
 | 
 | ||||||
|  |         # set pagenumber | ||||||
|         pageno_param = self.request_data.get('pageno', '1') |         pageno_param = self.request_data.get('pageno', '1') | ||||||
|         if not pageno_param.isdigit() or int(pageno_param) < 1: |         if not pageno_param.isdigit() or int(pageno_param) < 1: | ||||||
|             raise Exception('wrong pagenumber') |             raise Exception('wrong pagenumber') | ||||||
| 
 | 
 | ||||||
|         self.pageno = int(pageno_param) |         self.pageno = int(pageno_param) | ||||||
| 
 | 
 | ||||||
|  |         # parse query, if tags are set, which change the serch engine or search-language | ||||||
|         self.parse_query() |         self.parse_query() | ||||||
| 
 | 
 | ||||||
|         self.categories = [] |         self.categories = [] | ||||||
| 
 | 
 | ||||||
|  |         # if engines are calculated from query, set categories by using that informations | ||||||
|         if self.engines: |         if self.engines: | ||||||
|             self.categories = list(set(engine['category'] |             self.categories = list(set(engine['category'] | ||||||
|                                        for engine in self.engines)) |                                        for engine in self.engines)) | ||||||
|  | 
 | ||||||
|  |         # otherwise, using defined categories to calculate which engines should be used | ||||||
|         else: |         else: | ||||||
|  |             # set used categories | ||||||
|             for pd_name, pd in self.request_data.items(): |             for pd_name, pd in self.request_data.items(): | ||||||
|                 if pd_name.startswith('category_'): |                 if pd_name.startswith('category_'): | ||||||
|                     category = pd_name[9:] |                     category = pd_name[9:] | ||||||
|  |                     # if category is not found in list, skip | ||||||
|                     if not category in categories: |                     if not category in categories: | ||||||
|                         continue |                         continue | ||||||
|  | 
 | ||||||
|  |                     # add category to list | ||||||
|                     self.categories.append(category) |                     self.categories.append(category) | ||||||
|  | 
 | ||||||
|  |             # if no category is specified for this search, using user-defined default-configuration which (is stored in cookie) | ||||||
|             if not self.categories: |             if not self.categories: | ||||||
|                 cookie_categories = request.cookies.get('categories', '') |                 cookie_categories = request.cookies.get('categories', '') | ||||||
|                 cookie_categories = cookie_categories.split(',') |                 cookie_categories = cookie_categories.split(',') | ||||||
|                 for ccateg in cookie_categories: |                 for ccateg in cookie_categories: | ||||||
|                     if ccateg in categories: |                     if ccateg in categories: | ||||||
|                         self.categories.append(ccateg) |                         self.categories.append(ccateg) | ||||||
|  | 
 | ||||||
|  |             # if still no category is specified, using general as default-category | ||||||
|             if not self.categories: |             if not self.categories: | ||||||
|                 self.categories = ['general'] |                 self.categories = ['general'] | ||||||
| 
 | 
 | ||||||
|  |             # using all engines for that search, which are declared under the specific categories | ||||||
|             for categ in self.categories: |             for categ in self.categories: | ||||||
|                 self.engines.extend({'category': categ, |                 self.engines.extend({'category': categ, | ||||||
|                                      'name': x.name} |                                      'name': x.name} | ||||||
|                                     for x in categories[categ] |                                     for x in categories[categ] | ||||||
|                                     if not x.name in self.blocked_engines) |                                     if not x.name in self.blocked_engines) | ||||||
| 
 | 
 | ||||||
|  |     # parse query, if tags are set, which change the serch engine or search-language | ||||||
|     def parse_query(self): |     def parse_query(self): | ||||||
|         query_parts = self.query.split() |         query_parts = self.query.split() | ||||||
|         modified = False |         modified = False | ||||||
|  | 
 | ||||||
|  |         # check if language-prefix is set | ||||||
|         if query_parts[0].startswith(':'): |         if query_parts[0].startswith(':'): | ||||||
|             lang = query_parts[0][1:].lower() |             lang = query_parts[0][1:].lower() | ||||||
| 
 | 
 | ||||||
|  |             # check if any language-code is equal with declared language-codes | ||||||
|             for lc in language_codes: |             for lc in language_codes: | ||||||
|                 lang_id, lang_name, country = map(str.lower, lc) |                 lang_id, lang_name, country = map(str.lower, lc) | ||||||
|  | 
 | ||||||
|  |                 # if correct language-code is found, set it as new search-language | ||||||
|                 if lang == lang_id\ |                 if lang == lang_id\ | ||||||
|                    or lang_id.startswith(lang)\ |                    or lang_id.startswith(lang)\ | ||||||
|                    or lang == lang_name\ |                    or lang == lang_name\ | ||||||
| @ -215,56 +298,78 @@ class Search(object): | |||||||
|                     modified = True |                     modified = True | ||||||
|                     break |                     break | ||||||
| 
 | 
 | ||||||
|  |         # check if category/engine prefix is set | ||||||
|         elif query_parts[0].startswith('!'): |         elif query_parts[0].startswith('!'): | ||||||
|             prefix = query_parts[0][1:].replace('_', ' ') |             prefix = query_parts[0][1:].replace('_', ' ') | ||||||
| 
 | 
 | ||||||
|  |             # check if prefix is equal with engine shortcut | ||||||
|             if prefix in engine_shortcuts\ |             if prefix in engine_shortcuts\ | ||||||
|                and not engine_shortcuts[prefix] in self.blocked_engines: |                and not engine_shortcuts[prefix] in self.blocked_engines: | ||||||
|                 modified = True |                 modified = True | ||||||
|                 self.engines.append({'category': 'none', |                 self.engines.append({'category': 'none', | ||||||
|                                      'name': engine_shortcuts[prefix]}) |                                      'name': engine_shortcuts[prefix]}) | ||||||
|  | 
 | ||||||
|  |             # check if prefix is equal with engine name | ||||||
|             elif prefix in engines\ |             elif prefix in engines\ | ||||||
|                     and not prefix in self.blocked_engines: |                     and not prefix in self.blocked_engines: | ||||||
|                 modified = True |                 modified = True | ||||||
|                 self.engines.append({'category': 'none', |                 self.engines.append({'category': 'none', | ||||||
|                                     'name': prefix}) |                                     'name': prefix}) | ||||||
|  | 
 | ||||||
|  |             # check if prefix is equal with categorie name | ||||||
|             elif prefix in categories: |             elif prefix in categories: | ||||||
|                 modified = True |                 modified = True | ||||||
|  |                 # using all engines for that search, which are declared under that categorie name | ||||||
|                 self.engines.extend({'category': prefix, |                 self.engines.extend({'category': prefix, | ||||||
|                                     'name': engine.name} |                                     'name': engine.name} | ||||||
|                                     for engine in categories[prefix] |                                     for engine in categories[prefix] | ||||||
|                                     if not engine in self.blocked_engines) |                                     if not engine in self.blocked_engines) | ||||||
|  | 
 | ||||||
|  |         # if language, category or engine were specificed in this query, search for more tags which does the same | ||||||
|         if modified: |         if modified: | ||||||
|             self.query = self.query.replace(query_parts[0], '', 1).strip() |             self.query = self.query.replace(query_parts[0], '', 1).strip() | ||||||
|             self.parse_query() |             self.parse_query() | ||||||
| 
 | 
 | ||||||
|  |     # do search-request | ||||||
|     def search(self, request): |     def search(self, request): | ||||||
|         global number_of_searches |         global number_of_searches | ||||||
|  | 
 | ||||||
|  |         # init vars | ||||||
|         requests = [] |         requests = [] | ||||||
|         results = {} |         results = {} | ||||||
|         suggestions = set() |         suggestions = set() | ||||||
|  | 
 | ||||||
|  |         # increase number of searches | ||||||
|         number_of_searches += 1 |         number_of_searches += 1 | ||||||
|  | 
 | ||||||
|  |         # set default useragent | ||||||
|         #user_agent = request.headers.get('User-Agent', '') |         #user_agent = request.headers.get('User-Agent', '') | ||||||
|         user_agent = gen_useragent() |         user_agent = gen_useragent() | ||||||
| 
 | 
 | ||||||
|  |         # start search-reqest for all selected engines | ||||||
|         for selected_engine in self.engines: |         for selected_engine in self.engines: | ||||||
|             if selected_engine['name'] not in engines: |             if selected_engine['name'] not in engines: | ||||||
|                 continue |                 continue | ||||||
| 
 | 
 | ||||||
|             engine = engines[selected_engine['name']] |             engine = engines[selected_engine['name']] | ||||||
| 
 | 
 | ||||||
|  |             # if paging is not supported, skip | ||||||
|             if self.pageno > 1 and not engine.paging: |             if self.pageno > 1 and not engine.paging: | ||||||
|                 continue |                 continue | ||||||
| 
 | 
 | ||||||
|  |             # if search-language is set and engine does not provide language-support, skip | ||||||
|             if self.lang != 'all' and not engine.language_support: |             if self.lang != 'all' and not engine.language_support: | ||||||
|                 continue |                 continue | ||||||
| 
 | 
 | ||||||
|  |             # set default request parameters | ||||||
|             request_params = default_request_params() |             request_params = default_request_params() | ||||||
|             request_params['headers']['User-Agent'] = user_agent |             request_params['headers']['User-Agent'] = user_agent | ||||||
|             request_params['category'] = selected_engine['category'] |             request_params['category'] = selected_engine['category'] | ||||||
|             request_params['started'] = datetime.now() |             request_params['started'] = datetime.now() | ||||||
|             request_params['pageno'] = self.pageno |             request_params['pageno'] = self.pageno | ||||||
|             request_params['language'] = self.lang |             request_params['language'] = self.lang | ||||||
|  | 
 | ||||||
|  |             # update request parameters dependent on search-engine (contained in engines folder) | ||||||
|             request_params = engine.request(self.query.encode('utf-8'), |             request_params = engine.request(self.query.encode('utf-8'), | ||||||
|                                             request_params) |                                             request_params) | ||||||
| 
 | 
 | ||||||
| @ -272,6 +377,7 @@ class Search(object): | |||||||
|                 # TODO add support of offline engines |                 # TODO add support of offline engines | ||||||
|                 pass |                 pass | ||||||
| 
 | 
 | ||||||
|  |             # create a callback wrapper for the search engine results | ||||||
|             callback = make_callback( |             callback = make_callback( | ||||||
|                 selected_engine['name'], |                 selected_engine['name'], | ||||||
|                 results, |                 results, | ||||||
| @ -280,6 +386,7 @@ class Search(object): | |||||||
|                 request_params |                 request_params | ||||||
|             ) |             ) | ||||||
| 
 | 
 | ||||||
|  |             # create dictionary which contain all informations about the request | ||||||
|             request_args = dict( |             request_args = dict( | ||||||
|                 headers=request_params['headers'], |                 headers=request_params['headers'], | ||||||
|                 hooks=dict(response=callback), |                 hooks=dict(response=callback), | ||||||
| @ -287,6 +394,7 @@ class Search(object): | |||||||
|                 timeout=engine.timeout |                 timeout=engine.timeout | ||||||
|             ) |             ) | ||||||
| 
 | 
 | ||||||
|  |             # specific type of request (GET or POST) | ||||||
|             if request_params['method'] == 'GET': |             if request_params['method'] == 'GET': | ||||||
|                 req = grequests.get |                 req = grequests.get | ||||||
|             else: |             else: | ||||||
| @ -297,17 +405,25 @@ class Search(object): | |||||||
|             if not request_params['url']: |             if not request_params['url']: | ||||||
|                 continue |                 continue | ||||||
| 
 | 
 | ||||||
|  |             # append request to list | ||||||
|             requests.append(req(request_params['url'], **request_args)) |             requests.append(req(request_params['url'], **request_args)) | ||||||
|  | 
 | ||||||
|  |         # send all search-request | ||||||
|         grequests.map(requests) |         grequests.map(requests) | ||||||
|  | 
 | ||||||
|  |         # update engine-specific stats | ||||||
|         for engine_name, engine_results in results.items(): |         for engine_name, engine_results in results.items(): | ||||||
|             engines[engine_name].stats['search_count'] += 1 |             engines[engine_name].stats['search_count'] += 1 | ||||||
|             engines[engine_name].stats['result_count'] += len(engine_results) |             engines[engine_name].stats['result_count'] += len(engine_results) | ||||||
| 
 | 
 | ||||||
|  |         # score results and remove duplications | ||||||
|         results = score_results(results) |         results = score_results(results) | ||||||
| 
 | 
 | ||||||
|  |         # update engine stats, using calculated score | ||||||
|         for result in results: |         for result in results: | ||||||
|             for res_engine in result['engines']: |             for res_engine in result['engines']: | ||||||
|                 engines[result['engine']]\ |                 engines[result['engine']]\ | ||||||
|                     .stats['score_count'] += result['score'] |                     .stats['score_count'] += result['score'] | ||||||
| 
 | 
 | ||||||
|  |         # return results and suggestions | ||||||
|         return results, suggestions |         return results, suggestions | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user