| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | #!/usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | '''
 | 
					
						
							|  |  |  | searx is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | searx is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | GNU Affero General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at> | 
					
						
							|  |  |  | '''
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from searx.languages import language_codes | 
					
						
							|  |  |  | from searx.engines import ( | 
					
						
							|  |  |  |     categories, engines, engine_shortcuts | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | import re | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  | import sys | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if sys.version_info[0] == 3: | 
					
						
							|  |  |  |     unicode = str | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-03-02 00:11:51 +01:00
										 |  |  | VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') | 
					
						
							| 
									
										
										
										
											2016-12-29 06:24:56 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | class RawTextQuery(object): | 
					
						
							|  |  |  |     """parse raw text query (the value from the html input)""" | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-09 18:26:29 +02:00
										 |  |  |     def __init__(self, query, disabled_engines): | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         self.query = query | 
					
						
							| 
									
										
										
										
											2016-04-09 18:26:29 +02:00
										 |  |  |         self.disabled_engines = [] | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-09 18:26:29 +02:00
										 |  |  |         if disabled_engines: | 
					
						
							|  |  |  |             self.disabled_engines = disabled_engines | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         self.query_parts = [] | 
					
						
							|  |  |  |         self.engines = [] | 
					
						
							|  |  |  |         self.languages = [] | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         self.timeout_limit = None | 
					
						
							| 
									
										
										
										
											2015-01-03 02:31:23 +01:00
										 |  |  |         self.specific = False | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # parse query, if tags are set, which | 
					
						
							|  |  |  |     # change the serch engine or search-language | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |     def parse_query(self): | 
					
						
							|  |  |  |         self.query_parts = [] | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         # split query, including whitespaces | 
					
						
							| 
									
										
										
										
											2017-11-23 22:04:00 +01:00
										 |  |  |         raw_query_parts = re.split(r'(\s+)' if isinstance(self.query, str) else b'(\s+)', self.query) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         parse_next = True | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         for query_part in raw_query_parts: | 
					
						
							|  |  |  |             if not parse_next: | 
					
						
							|  |  |  |                 self.query_parts[-1] += query_part | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |             parse_next = False | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |             # part does only contain spaces, skip | 
					
						
							| 
									
										
										
										
											2014-10-01 17:57:53 +02:00
										 |  |  |             if query_part.isspace()\ | 
					
						
							|  |  |  |                or query_part == '': | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                 parse_next = True | 
					
						
							|  |  |  |                 self.query_parts.append(query_part) | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |             # this force the timeout | 
					
						
							|  |  |  |             if query_part[0] == '<': | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     raw_timeout_limit = int(query_part[1:]) | 
					
						
							|  |  |  |                     if raw_timeout_limit < 100: | 
					
						
							|  |  |  |                         # below 100, the unit is the second ( <3 = 3 seconds timeout ) | 
					
						
							|  |  |  |                         self.timeout_limit = float(raw_timeout_limit) | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         # 100 or above, the unit is the millisecond ( <850 = 850 milliseconds timeout ) | 
					
						
							|  |  |  |                         self.timeout_limit = raw_timeout_limit / 1000.0 | 
					
						
							|  |  |  |                     parse_next = True | 
					
						
							|  |  |  |                 except ValueError: | 
					
						
							|  |  |  |                     # error not reported to the user | 
					
						
							|  |  |  |                     pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # this force a language | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |             if query_part[0] == ':': | 
					
						
							| 
									
										
										
										
											2017-03-02 00:11:51 +01:00
										 |  |  |                 lang = query_part[1:].lower().replace('_', '-') | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |                 # check if any language-code is equal with | 
					
						
							|  |  |  |                 # declared language-codes | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                 for lc in language_codes: | 
					
						
							| 
									
										
										
										
											2016-08-06 06:34:56 +02:00
										 |  |  |                     lang_id, lang_name, country, english_name = map(unicode.lower, lc) | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |                     # if correct language-code is found | 
					
						
							|  |  |  |                     # set it as new search-language | 
					
						
							| 
									
										
										
										
											2017-07-20 22:47:20 +02:00
										 |  |  |                     if (lang == lang_id | 
					
						
							|  |  |  |                         or lang == lang_name | 
					
						
							|  |  |  |                         or lang == english_name | 
					
						
							|  |  |  |                         or lang.replace('-', ' ') == country)\ | 
					
						
							|  |  |  |                        and lang not in self.languages: | 
					
						
							|  |  |  |                             parse_next = True | 
					
						
							|  |  |  |                             lang_parts = lang_id.split('-') | 
					
						
							|  |  |  |                             if len(lang_parts) == 2: | 
					
						
							|  |  |  |                                 self.languages.append(lang_parts[0] + '-' + lang_parts[1].upper()) | 
					
						
							|  |  |  |                             else: | 
					
						
							|  |  |  |                                 self.languages.append(lang_id) | 
					
						
							|  |  |  |                             # to ensure best match (first match is not necessarily the best one) | 
					
						
							|  |  |  |                             if lang == lang_id: | 
					
						
							|  |  |  |                                 break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # user may set a valid, yet not selectable language | 
					
						
							| 
									
										
										
										
											2018-03-01 05:30:48 +01:00
										 |  |  |                 if VALID_LANGUAGE_CODE.match(lang): | 
					
						
							|  |  |  |                     lang_parts = lang.split('-') | 
					
						
							|  |  |  |                     if len(lang_parts) > 1: | 
					
						
							|  |  |  |                         lang = lang_parts[0].lower() + '-' + lang_parts[1].upper() | 
					
						
							|  |  |  |                     if lang not in self.languages: | 
					
						
							|  |  |  |                         self.languages.append(lang) | 
					
						
							|  |  |  |                         parse_next = True | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             # this force a engine or category | 
					
						
							| 
									
										
										
										
											2015-01-03 02:31:23 +01:00
										 |  |  |             if query_part[0] == '!' or query_part[0] == '?': | 
					
						
							| 
									
										
										
										
											2017-01-12 16:17:29 +01:00
										 |  |  |                 prefix = query_part[1:].replace('-', ' ').replace('_', ' ') | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # check if prefix is equal with engine shortcut | 
					
						
							| 
									
										
										
										
											2015-01-31 23:11:48 +01:00
										 |  |  |                 if prefix in engine_shortcuts: | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                     parse_next = True | 
					
						
							| 
									
										
										
										
											2018-03-22 11:02:24 +01:00
										 |  |  |                     engine_name = engine_shortcuts[prefix] | 
					
						
							|  |  |  |                     if engine_name in engines: | 
					
						
							| 
									
										
										
										
											2019-03-29 02:25:20 +01:00
										 |  |  |                         self.engines.append({'category': 'none', | 
					
						
							|  |  |  |                                              'name': engine_name, | 
					
						
							|  |  |  |                                              'from_bang': True}) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                 # check if prefix is equal with engine name | 
					
						
							| 
									
										
										
										
											2015-01-31 23:11:48 +01:00
										 |  |  |                 elif prefix in engines: | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                     parse_next = True | 
					
						
							| 
									
										
										
										
											2019-03-29 02:25:20 +01:00
										 |  |  |                     self.engines.append({'category': 'none', | 
					
						
							|  |  |  |                                          'name': prefix, | 
					
						
							|  |  |  |                                          'from_bang': True}) | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # check if prefix is equal with categorie name | 
					
						
							|  |  |  |                 elif prefix in categories: | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |                     # using all engines for that search, which | 
					
						
							|  |  |  |                     # are declared under that categorie name | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                     parse_next = True | 
					
						
							|  |  |  |                     self.engines.extend({'category': prefix, | 
					
						
							| 
									
										
										
										
											2015-01-31 23:11:48 +01:00
										 |  |  |                                          'name': engine.name} | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                                         for engine in categories[prefix] | 
					
						
							| 
									
										
										
										
											2016-04-09 18:26:29 +02:00
										 |  |  |                                         if (engine.name, prefix) not in self.disabled_engines) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-03 02:31:23 +01:00
										 |  |  |             if query_part[0] == '!': | 
					
						
							|  |  |  |                 self.specific = True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |             # append query part to query_part list | 
					
						
							|  |  |  |             self.query_parts.append(query_part) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def changeSearchQuery(self, search_query): | 
					
						
							|  |  |  |         if len(self.query_parts): | 
					
						
							|  |  |  |             self.query_parts[-1] = search_query | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             self.query_parts.append(search_query) | 
					
						
							| 
									
										
										
										
											2019-07-16 16:27:29 +02:00
										 |  |  |         return self | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |     def getSearchQuery(self): | 
					
						
							|  |  |  |         if len(self.query_parts): | 
					
						
							|  |  |  |             return self.query_parts[-1] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             return '' | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |     def getFullQuery(self): | 
					
						
							|  |  |  |         # get full querry including whitespaces | 
					
						
							| 
									
										
										
										
											2017-11-23 17:40:26 +01:00
										 |  |  |         return u''.join(self.query_parts) | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class SearchQuery(object): | 
					
						
							|  |  |  |     """container for all the search parameters (query, language, etc...)""" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, timeout_limit=None): | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  |         self.query = query.encode('utf-8') | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |         self.engines = engines | 
					
						
							|  |  |  |         self.categories = categories | 
					
						
							|  |  |  |         self.lang = lang | 
					
						
							|  |  |  |         self.safesearch = safesearch | 
					
						
							|  |  |  |         self.pageno = pageno | 
					
						
							| 
									
										
										
										
											2019-10-14 14:18:02 +02:00
										 |  |  |         self.time_range = None if time_range in ('', 'None', None) else time_range | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         self.timeout_limit = timeout_limit | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def __str__(self): | 
					
						
							|  |  |  |         return str(self.query) + ";" + str(self.engines) |