| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | #!/usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | '''
 | 
					
						
							|  |  |  | searx is free software: you can redistribute it and/or modify | 
					
						
							|  |  |  | it under the terms of the GNU Affero General Public License as published by | 
					
						
							|  |  |  | the Free Software Foundation, either version 3 of the License, or | 
					
						
							|  |  |  | (at your option) any later version. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | searx is distributed in the hope that it will be useful, | 
					
						
							|  |  |  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
					
						
							|  |  |  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
					
						
							|  |  |  | GNU Affero General Public License for more details. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | You should have received a copy of the GNU Affero General Public License | 
					
						
							|  |  |  | along with searx. If not, see < http://www.gnu.org/licenses/ >. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | (C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at> | 
					
						
							|  |  |  | '''
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | from searx.languages import language_codes | 
					
						
							| 
									
										
										
										
											2020-09-22 16:22:22 +02:00
										 |  |  | from searx.engines import categories, engines, engine_shortcuts | 
					
						
							|  |  |  | from searx.search import EngineRef | 
					
						
							| 
									
										
										
										
											2020-10-01 11:29:31 +02:00
										 |  |  | from searx.webutils import VALID_LANGUAGE_CODE | 
					
						
							| 
									
										
										
										
											2016-12-29 06:24:56 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-12 09:42:27 +02:00
										 |  |  | class RawTextQuery: | 
					
						
							| 
									
										
										
										
											2016-10-22 13:10:31 +02:00
										 |  |  |     """parse raw text query (the value from the html input)""" | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-09 18:26:29 +02:00
										 |  |  |     def __init__(self, query, disabled_engines): | 
					
						
							| 
									
										
										
										
											2020-08-11 16:25:03 +02:00
										 |  |  |         assert isinstance(query, str) | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         self.query = query | 
					
						
							| 
									
										
										
										
											2016-04-09 18:26:29 +02:00
										 |  |  |         self.disabled_engines = [] | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-09 18:26:29 +02:00
										 |  |  |         if disabled_engines: | 
					
						
							|  |  |  |             self.disabled_engines = disabled_engines | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         self.query_parts = [] | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |         self.user_query_parts = [] | 
					
						
							| 
									
										
										
										
											2020-09-22 16:22:22 +02:00
										 |  |  |         self.enginerefs = [] | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         self.languages = [] | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |         self.timeout_limit = None | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  |         self.external_bang = None | 
					
						
							| 
									
										
										
										
											2015-01-03 02:31:23 +01:00
										 |  |  |         self.specific = False | 
					
						
							| 
									
										
										
										
											2020-09-09 12:17:58 +02:00
										 |  |  |         self._parse_query() | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # parse query, if tags are set, which | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  |     # change the search engine or search-language | 
					
						
							| 
									
										
										
										
											2020-09-09 12:17:58 +02:00
										 |  |  |     def _parse_query(self): | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         self.query_parts = [] | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         # split query, including whitespaces | 
					
						
							| 
									
										
										
										
											2020-08-11 16:25:03 +02:00
										 |  |  |         raw_query_parts = re.split(r'(\s+)', self.query) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |         for query_part in raw_query_parts: | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |             searx_query_part = False | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |             # part does only contain spaces, skip | 
					
						
							| 
									
										
										
										
											2014-10-01 17:57:53 +02:00
										 |  |  |             if query_part.isspace()\ | 
					
						
							|  |  |  |                or query_part == '': | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |             # this force the timeout | 
					
						
							|  |  |  |             if query_part[0] == '<': | 
					
						
							|  |  |  |                 try: | 
					
						
							|  |  |  |                     raw_timeout_limit = int(query_part[1:]) | 
					
						
							|  |  |  |                     if raw_timeout_limit < 100: | 
					
						
							|  |  |  |                         # below 100, the unit is the second ( <3 = 3 seconds timeout ) | 
					
						
							|  |  |  |                         self.timeout_limit = float(raw_timeout_limit) | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         # 100 or above, the unit is the millisecond ( <850 = 850 milliseconds timeout ) | 
					
						
							|  |  |  |                         self.timeout_limit = raw_timeout_limit / 1000.0 | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |                     searx_query_part = True | 
					
						
							| 
									
										
										
										
											2019-08-02 13:50:51 +02:00
										 |  |  |                 except ValueError: | 
					
						
							|  |  |  |                     # error not reported to the user | 
					
						
							|  |  |  |                     pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |             # this force a language | 
					
						
							| 
									
										
										
										
											2021-01-10 06:11:41 +01:00
										 |  |  |             if query_part[0] == ':' and len(query_part) > 1: | 
					
						
							| 
									
										
										
										
											2017-03-02 00:11:51 +01:00
										 |  |  |                 lang = query_part[1:].lower().replace('_', '-') | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |                 # check if any language-code is equal with | 
					
						
							|  |  |  |                 # declared language-codes | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                 for lc in language_codes: | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  |                     lang_id, lang_name, country, english_name = map(str.lower, lc) | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |                     # if correct language-code is found | 
					
						
							|  |  |  |                     # set it as new search-language | 
					
						
							| 
									
										
										
										
											2017-07-20 22:47:20 +02:00
										 |  |  |                     if (lang == lang_id | 
					
						
							|  |  |  |                         or lang == lang_name | 
					
						
							|  |  |  |                         or lang == english_name | 
					
						
							|  |  |  |                         or lang.replace('-', ' ') == country)\ | 
					
						
							|  |  |  |                        and lang not in self.languages: | 
					
						
							| 
									
										
										
										
											2020-11-16 09:43:23 +01:00
										 |  |  |                         searx_query_part = True | 
					
						
							|  |  |  |                         lang_parts = lang_id.split('-') | 
					
						
							|  |  |  |                         if len(lang_parts) == 2: | 
					
						
							|  |  |  |                             self.languages.append(lang_parts[0] + '-' + lang_parts[1].upper()) | 
					
						
							|  |  |  |                         else: | 
					
						
							|  |  |  |                             self.languages.append(lang_id) | 
					
						
							|  |  |  |                         # to ensure best match (first match is not necessarily the best one) | 
					
						
							|  |  |  |                         if lang == lang_id: | 
					
						
							|  |  |  |                             break | 
					
						
							| 
									
										
										
										
											2017-07-20 22:47:20 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # user may set a valid, yet not selectable language | 
					
						
							| 
									
										
										
										
											2018-03-01 05:30:48 +01:00
										 |  |  |                 if VALID_LANGUAGE_CODE.match(lang): | 
					
						
							|  |  |  |                     lang_parts = lang.split('-') | 
					
						
							|  |  |  |                     if len(lang_parts) > 1: | 
					
						
							|  |  |  |                         lang = lang_parts[0].lower() + '-' + lang_parts[1].upper() | 
					
						
							|  |  |  |                     if lang not in self.languages: | 
					
						
							|  |  |  |                         self.languages.append(lang) | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |                         searx_query_part = True | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  |             # external bang | 
					
						
							|  |  |  |             if query_part[0:2] == "!!": | 
					
						
							|  |  |  |                 self.external_bang = query_part[2:] | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |                 searx_query_part = True | 
					
						
							| 
									
										
										
										
											2020-07-03 15:25:04 +02:00
										 |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |             # this force a engine or category | 
					
						
							| 
									
										
										
										
											2015-01-03 02:31:23 +01:00
										 |  |  |             if query_part[0] == '!' or query_part[0] == '?': | 
					
						
							| 
									
										
										
										
											2017-01-12 16:17:29 +01:00
										 |  |  |                 prefix = query_part[1:].replace('-', ' ').replace('_', ' ') | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # check if prefix is equal with engine shortcut | 
					
						
							| 
									
										
										
										
											2015-01-31 23:11:48 +01:00
										 |  |  |                 if prefix in engine_shortcuts: | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |                     searx_query_part = True | 
					
						
							| 
									
										
										
										
											2018-03-22 11:02:24 +01:00
										 |  |  |                     engine_name = engine_shortcuts[prefix] | 
					
						
							|  |  |  |                     if engine_name in engines: | 
					
						
							| 
									
										
										
										
											2020-12-18 12:19:14 +01:00
										 |  |  |                         self.enginerefs.append(EngineRef(engine_name, 'none')) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |                 # check if prefix is equal with engine name | 
					
						
							| 
									
										
										
										
											2015-01-31 23:11:48 +01:00
										 |  |  |                 elif prefix in engines: | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |                     searx_query_part = True | 
					
						
							| 
									
										
										
										
											2020-12-18 12:19:14 +01:00
										 |  |  |                     self.enginerefs.append(EngineRef(prefix, 'none')) | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # check if prefix is equal with categorie name | 
					
						
							|  |  |  |                 elif prefix in categories: | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  |                     # using all engines for that search, which | 
					
						
							|  |  |  |                     # are declared under that categorie name | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |                     searx_query_part = True | 
					
						
							| 
									
										
										
										
											2020-09-22 16:22:22 +02:00
										 |  |  |                     self.enginerefs.extend(EngineRef(engine.name, prefix) | 
					
						
							|  |  |  |                                            for engine in categories[prefix] | 
					
						
							|  |  |  |                                            if (engine.name, prefix) not in self.disabled_engines) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-03 02:31:23 +01:00
										 |  |  |             if query_part[0] == '!': | 
					
						
							|  |  |  |                 self.specific = True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |             # append query part to query_part list | 
					
						
							| 
									
										
										
										
											2020-10-26 13:33:40 +01:00
										 |  |  |             if searx_query_part: | 
					
						
							|  |  |  |                 self.query_parts.append(query_part) | 
					
						
							|  |  |  |             else: | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |                 self.user_query_parts.append(query_part) | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-14 10:11:49 +02:00
										 |  |  |     def changeQuery(self, query): | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |         self.user_query_parts = query.strip().split() | 
					
						
							| 
									
										
										
										
											2019-07-16 16:27:29 +02:00
										 |  |  |         return self | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-14 10:11:49 +02:00
										 |  |  |     def getQuery(self): | 
					
						
							| 
									
										
										
										
											2020-10-25 17:40:16 +01:00
										 |  |  |         return ' '.join(self.user_query_parts) | 
					
						
							| 
									
										
										
										
											2014-10-19 12:41:04 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-10-01 17:18:18 +02:00
										 |  |  |     def getFullQuery(self): | 
					
						
							|  |  |  |         # get full querry including whitespaces | 
					
						
							| 
									
										
										
										
											2020-10-26 13:33:40 +01:00
										 |  |  |         return '{0} {1}'.format(''.join(self.query_parts), self.getQuery()).strip() |