Merge pull request #106 from pointhi/query_parser
FIX: #101 implement query parser and use it inside autocompletion and search query extraction
This commit is contained in:
		
						commit
						983339bb03
					
				
							
								
								
									
										127
									
								
								searx/query.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										127
									
								
								searx/query.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,127 @@ | ||||
| #!/usr/bin/env python | ||||
| 
 | ||||
| ''' | ||||
| searx is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU Affero General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| searx is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU Affero General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU Affero General Public License | ||||
| along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| 
 | ||||
| (C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at> | ||||
| ''' | ||||
| 
 | ||||
| from searx.languages import language_codes | ||||
| from searx.engines import ( | ||||
|     categories, engines, engine_shortcuts | ||||
| ) | ||||
| import string | ||||
| import re | ||||
| 
 | ||||
| 
 | ||||
| class Query(object): | ||||
|     """parse query""" | ||||
| 
 | ||||
|     def __init__(self, query, blocked_engines): | ||||
|         self.query = query | ||||
|         self.blocked_engines = [] | ||||
|          | ||||
|         if blocked_engines: | ||||
|             self.blocked_engines = blocked_engines | ||||
|              | ||||
|         self.query_parts = [] | ||||
|         self.engines = [] | ||||
|         self.languages = [] | ||||
|      | ||||
|     # parse query, if tags are set, which change the serch engine or search-language | ||||
|     def parse_query(self): | ||||
|         self.query_parts = [] | ||||
|          | ||||
|         # split query, including whitespaces | ||||
|         raw_query_parts = re.split(r'(\s+)', self.query) | ||||
|          | ||||
|         parse_next = True | ||||
|          | ||||
|         for query_part in raw_query_parts: | ||||
|             if not parse_next: | ||||
|                 self.query_parts[-1] += query_part | ||||
|                 continue | ||||
|             | ||||
|             parse_next = False | ||||
|             | ||||
|             # part does only contain spaces, skip | ||||
|             if query_part.isspace()\ | ||||
|                or query_part == '': | ||||
|                 parse_next = True | ||||
|                 self.query_parts.append(query_part) | ||||
|                 continue | ||||
| 
 | ||||
|             # this force a language             | ||||
|             if query_part[0] == ':': | ||||
|                 lang = query_part[1:].lower() | ||||
| 
 | ||||
|                 # check if any language-code is equal with declared language-codes | ||||
|                 for lc in language_codes: | ||||
|                     lang_id, lang_name, country = map(str.lower, lc) | ||||
| 
 | ||||
|                     # if correct language-code is found, set it as new search-language | ||||
|                     if lang == lang_id\ | ||||
|                        or lang_id.startswith(lang)\ | ||||
|                        or lang == lang_name\ | ||||
|                        or lang == country: | ||||
|                         parse_next = True | ||||
|                         self.languages.append(lang) | ||||
|                         break | ||||
| 
 | ||||
|             # this force a engine or category | ||||
|             if query_part[0] == '!': | ||||
|                 prefix = query_part[1:].replace('_', ' ') | ||||
| 
 | ||||
|                 # check if prefix is equal with engine shortcut | ||||
|                 if prefix in engine_shortcuts\ | ||||
|                    and not engine_shortcuts[prefix] in self.blocked_engines: | ||||
|                     parse_next = True | ||||
|                     self.engines.append({'category': 'none', | ||||
|                                          'name': engine_shortcuts[prefix]}) | ||||
|                  | ||||
|                 # check if prefix is equal with engine name | ||||
|                 elif prefix in engines\ | ||||
|                         and not prefix in self.blocked_engines: | ||||
|                     parse_next = True | ||||
|                     self.engines.append({'category': 'none', | ||||
|                                         'name': prefix}) | ||||
| 
 | ||||
|                 # check if prefix is equal with categorie name | ||||
|                 elif prefix in categories: | ||||
|                     # using all engines for that search, which are declared under that categorie name | ||||
|                     parse_next = True | ||||
|                     self.engines.extend({'category': prefix, | ||||
|                                         'name': engine.name} | ||||
|                                         for engine in categories[prefix] | ||||
|                                         if not engine in self.blocked_engines) | ||||
|            | ||||
|             # append query part to query_part list | ||||
|             self.query_parts.append(query_part) | ||||
| 
 | ||||
|     def changeSearchQuery(self, search_query): | ||||
|         if len(self.query_parts): | ||||
|             self.query_parts[-1] = search_query | ||||
|         else: | ||||
|             self.query_parts.append(search_query) | ||||
|              | ||||
|     def getSearchQuery(self): | ||||
|         if len(self.query_parts): | ||||
|             return self.query_parts[-1] | ||||
|         else: | ||||
|             return '' | ||||
|      | ||||
|     def getFullQuery(self): | ||||
|         # get full querry including whitespaces | ||||
|         return string.join(self.query_parts, '') | ||||
| 
 | ||||
| @ -25,6 +25,7 @@ from searx.engines import ( | ||||
| ) | ||||
| from searx.languages import language_codes | ||||
| from searx.utils import gen_useragent | ||||
| from searx.query import Query | ||||
| 
 | ||||
| 
 | ||||
| number_of_searches = 0 | ||||
| @ -235,7 +236,15 @@ class Search(object): | ||||
|         self.pageno = int(pageno_param) | ||||
| 
 | ||||
|         # parse query, if tags are set, which change the serch engine or search-language | ||||
|         self.parse_query() | ||||
|         query_obj = Query(self.query, self.blocked_engines) | ||||
|         query_obj.parse_query()         | ||||
| 
 | ||||
|         # get last selected language in query, if possible | ||||
|         # TODO support search with multible languages | ||||
|         if len(query_obj.languages): | ||||
|             self.lang = query_obj.languages[-1] | ||||
| 
 | ||||
|         self.engines = query_obj.engines | ||||
| 
 | ||||
|         self.categories = [] | ||||
| 
 | ||||
| @ -276,60 +285,6 @@ class Search(object): | ||||
|                                     for x in categories[categ] | ||||
|                                     if not x.name in self.blocked_engines) | ||||
| 
 | ||||
|     # parse query, if tags are set, which change the serch engine or search-language | ||||
|     def parse_query(self): | ||||
|         query_parts = self.query.split() | ||||
|         modified = False | ||||
| 
 | ||||
|         # check if language-prefix is set | ||||
|         if query_parts[0].startswith(':'): | ||||
|             lang = query_parts[0][1:].lower() | ||||
| 
 | ||||
|             # check if any language-code is equal with declared language-codes | ||||
|             for lc in language_codes: | ||||
|                 lang_id, lang_name, country = map(str.lower, lc) | ||||
| 
 | ||||
|                 # if correct language-code is found, set it as new search-language | ||||
|                 if lang == lang_id\ | ||||
|                    or lang_id.startswith(lang)\ | ||||
|                    or lang == lang_name\ | ||||
|                    or lang == country: | ||||
|                     self.lang = lang | ||||
|                     modified = True | ||||
|                     break | ||||
| 
 | ||||
|         # check if category/engine prefix is set | ||||
|         elif query_parts[0].startswith('!'): | ||||
|             prefix = query_parts[0][1:].replace('_', ' ') | ||||
| 
 | ||||
|             # check if prefix is equal with engine shortcut | ||||
|             if prefix in engine_shortcuts\ | ||||
|                and not engine_shortcuts[prefix] in self.blocked_engines: | ||||
|                 modified = True | ||||
|                 self.engines.append({'category': 'none', | ||||
|                                      'name': engine_shortcuts[prefix]}) | ||||
| 
 | ||||
|             # check if prefix is equal with engine name | ||||
|             elif prefix in engines\ | ||||
|                     and not prefix in self.blocked_engines: | ||||
|                 modified = True | ||||
|                 self.engines.append({'category': 'none', | ||||
|                                     'name': prefix}) | ||||
| 
 | ||||
|             # check if prefix is equal with categorie name | ||||
|             elif prefix in categories: | ||||
|                 modified = True | ||||
|                 # using all engines for that search, which are declared under that categorie name | ||||
|                 self.engines.extend({'category': prefix, | ||||
|                                     'name': engine.name} | ||||
|                                     for engine in categories[prefix] | ||||
|                                     if not engine in self.blocked_engines) | ||||
| 
 | ||||
|         # if language, category or engine were specificed in this query, search for more tags which does the same | ||||
|         if modified: | ||||
|             self.query = self.query.replace(query_parts[0], '', 1).strip() | ||||
|             self.parse_query() | ||||
| 
 | ||||
|     # do search-request | ||||
|     def search(self, request): | ||||
|         global number_of_searches | ||||
|  | ||||
| @ -47,6 +47,7 @@ from searx.utils import ( | ||||
| from searx.https_rewrite import https_rules | ||||
| from searx.languages import language_codes | ||||
| from searx.search import Search | ||||
| from searx.query import Query | ||||
| from searx.autocomplete import backends as autocomplete_backends | ||||
| 
 | ||||
| 
 | ||||
| @ -308,23 +309,46 @@ def autocompleter(): | ||||
|     """Return autocompleter results""" | ||||
|     request_data = {} | ||||
| 
 | ||||
|     # select request method | ||||
|     if request.method == 'POST': | ||||
|         request_data = request.form | ||||
|     else: | ||||
|         request_data = request.args | ||||
| 
 | ||||
|     query = request_data.get('q', '').encode('utf-8') | ||||
|     # set blocked engines | ||||
|     if request.cookies.get('blocked_engines'): | ||||
|         blocked_engines = request.cookies['blocked_engines'].split(',')  # noqa | ||||
|     else: | ||||
|         blocked_engines = [] | ||||
| 
 | ||||
|     if not query: | ||||
|     # parse query | ||||
|     query = Query(request_data.get('q', '').encode('utf-8'), blocked_engines) | ||||
|     query.parse_query() | ||||
| 
 | ||||
|     # check if search query is set | ||||
|     if not query.getSearchQuery(): | ||||
|         return | ||||
| 
 | ||||
|     # run autocompleter | ||||
|     completer = autocomplete_backends.get(request.cookies.get('autocomplete')) | ||||
| 
 | ||||
|     # check if valid autocompleter is selected | ||||
|     if not completer: | ||||
|         return | ||||
| 
 | ||||
|     results = completer(query) | ||||
|     # run autocompletion | ||||
|     raw_results = completer(query.getSearchQuery()) | ||||
| 
 | ||||
|     # parse results (write :language and !engine back to result string) | ||||
|     results = [] | ||||
|     for result in raw_results: | ||||
|         result_query = query | ||||
|         result_query.changeSearchQuery(result) | ||||
| 
 | ||||
|         # add parsed result | ||||
|         results.append(result_query.getFullQuery()) | ||||
| 
 | ||||
|     # return autocompleter results | ||||
|     if request_data.get('format') == 'x-suggestions': | ||||
|         return Response(json.dumps([query, results]), | ||||
|                         mimetype='application/json') | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user