| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  | #!/usr/bin/env python | 
					
						
							| 
									
										
										
										
											2021-10-03 15:12:09 +02:00
										 |  |  | # lint: pylint | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | # (C) Copyright Contributors to the SearXNG project. | 
					
						
							|  |  |  | # (C) Copyright Contributors to the searx project (2014 - 2021) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """Script to run SearXNG from terminal.
 | 
					
						
							| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | Getting categories without initiate the engine will only return `['general']` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | >>> import searx.engines | 
					
						
							|  |  |  | ... list(searx.engines.categories.keys()) | 
					
						
							|  |  |  | ['general'] | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  | >>> import searx.search | 
					
						
							|  |  |  | ... searx.search.initialize() | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | ... list(searx.engines.categories.keys()) | 
					
						
							|  |  |  | ['general', 'it', 'science', 'images', 'news', 'videos', 'music', 'files', 'social media', 'map'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Example to use this script: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | .. code::  bash | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-10-02 17:30:39 +02:00
										 |  |  |     $ python3 searxng_extra/standalone_searx.py rain | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | Example to run it from python: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | >>> import importlib | 
					
						
							|  |  |  | ... import json | 
					
						
							|  |  |  | ... import sys | 
					
						
							|  |  |  | ... import searx.engines | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  | ... import searx.search | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | ... search_query = 'rain' | 
					
						
							|  |  |  | ... # initialize engines | 
					
						
							| 
									
										
										
										
											2020-12-16 13:41:32 +01:00
										 |  |  | ... searx.search.initialize() | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | ... # load engines categories once instead of each time the function called | 
					
						
							|  |  |  | ... engine_cs = list(searx.engines.categories.keys()) | 
					
						
							|  |  |  | ... # load module | 
					
						
							|  |  |  | ... spec = importlib.util.spec_from_file_location( | 
					
						
							| 
									
										
										
										
											2021-10-02 17:30:39 +02:00
										 |  |  | ...     'utils.standalone_searx', 'searxng_extra/standalone_searx.py') | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | ... sas = importlib.util.module_from_spec(spec) | 
					
						
							|  |  |  | ... spec.loader.exec_module(sas) | 
					
						
							|  |  |  | ... # use function from module | 
					
						
							|  |  |  | ... prog_args = sas.parse_argument([search_query], category_choices=engine_cs) | 
					
						
							|  |  |  | ... search_q = sas.get_search_query(prog_args, engine_categories=engine_cs) | 
					
						
							|  |  |  | ... res_dict = sas.to_dict(search_q) | 
					
						
							|  |  |  | ... sys.stdout.write(json.dumps( | 
					
						
							|  |  |  | ...     res_dict, sort_keys=True, indent=4, ensure_ascii=False, | 
					
						
							|  |  |  | ...     default=sas.json_serial)) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     "answers": [], | 
					
						
							|  |  |  |     "infoboxes": [ {...} ], | 
					
						
							|  |  |  |     "paging": true, | 
					
						
							|  |  |  |     "results": [... ], | 
					
						
							|  |  |  |     "results_number": 820000000.0, | 
					
						
							|  |  |  |     "search": { | 
					
						
							|  |  |  |         "lang": "all", | 
					
						
							|  |  |  |         "pageno": 1, | 
					
						
							|  |  |  |         "q": "rain", | 
					
						
							|  |  |  |         "safesearch": 0, | 
					
						
							|  |  |  |         "timerange": null | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     "suggestions": [...] | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  | """  # pylint: disable=line-too-long
 | 
					
						
							| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | import argparse | 
					
						
							| 
									
										
										
										
											2017-07-25 14:39:11 +02:00
										 |  |  | import sys | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | from datetime import datetime | 
					
						
							|  |  |  | from json import dumps | 
					
						
							|  |  |  | from typing import Any, Dict, List, Optional | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import searx | 
					
						
							| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  | import searx.preferences | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | import searx.query | 
					
						
							|  |  |  | import searx.search | 
					
						
							| 
									
										
										
										
											2020-09-22 13:59:27 +02:00
										 |  |  | import searx.webadapter | 
					
						
							| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | EngineCategoriesVar = Optional[List[str]] | 
					
						
							| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | def get_search_query( | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |     args: argparse.Namespace, engine_categories: EngineCategoriesVar = None | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | ) -> searx.search.SearchQuery: | 
					
						
							|  |  |  |     """Get  search results for the query""" | 
					
						
							|  |  |  |     if engine_categories is None: | 
					
						
							|  |  |  |         engine_categories = list(searx.engines.categories.keys()) | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         category = args.category.decode('utf-8') | 
					
						
							|  |  |  |     except AttributeError: | 
					
						
							|  |  |  |         category = args.category | 
					
						
							|  |  |  |     form = { | 
					
						
							|  |  |  |         "q": args.query, | 
					
						
							|  |  |  |         "categories": category, | 
					
						
							|  |  |  |         "pageno": str(args.pageno), | 
					
						
							|  |  |  |         "language": args.lang, | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         "time_range": args.timerange, | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |     preferences = searx.preferences.Preferences(['oscar'], engine_categories, searx.engines.engines, []) | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     preferences.key_value_settings['safesearch'].parse(args.safesearch) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |     search_query = searx.webadapter.get_search_query_from_webapp(preferences, form)[0] | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     return search_query | 
					
						
							| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | def no_parsed_url(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]: | 
					
						
							|  |  |  |     """Remove parsed url from dict.""" | 
					
						
							| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  |     for result in results: | 
					
						
							|  |  |  |         del result['parsed_url'] | 
					
						
							|  |  |  |     return results | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | def json_serial(obj: Any) -> Any: | 
					
						
							|  |  |  |     """JSON serializer for objects not serializable by default json code.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     :raise TypeError: raised when **obj** is not serializable | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  |     if isinstance(obj, datetime): | 
					
						
							|  |  |  |         serial = obj.isoformat() | 
					
						
							|  |  |  |         return serial | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     if isinstance(obj, bytes): | 
					
						
							|  |  |  |         return obj.decode('utf8') | 
					
						
							|  |  |  |     if isinstance(obj, set): | 
					
						
							|  |  |  |         return list(obj) | 
					
						
							|  |  |  |     raise TypeError("Type ({}) not serializable".format(type(obj))) | 
					
						
							| 
									
										
										
										
											2017-01-04 14:01:29 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | def to_dict(search_query: searx.search.SearchQuery) -> Dict[str, Any]: | 
					
						
							|  |  |  |     """Get result from parsed arguments.""" | 
					
						
							|  |  |  |     result_container = searx.search.Search(search_query).search() | 
					
						
							|  |  |  |     result_container_json = { | 
					
						
							|  |  |  |         "search": { | 
					
						
							|  |  |  |             "q": search_query.query, | 
					
						
							|  |  |  |             "pageno": search_query.pageno, | 
					
						
							|  |  |  |             "lang": search_query.lang, | 
					
						
							|  |  |  |             "safesearch": search_query.safesearch, | 
					
						
							|  |  |  |             "timerange": search_query.time_range, | 
					
						
							|  |  |  |         }, | 
					
						
							|  |  |  |         "results": no_parsed_url(result_container.get_ordered_results()), | 
					
						
							|  |  |  |         "infoboxes": result_container.infoboxes, | 
					
						
							|  |  |  |         "suggestions": list(result_container.suggestions), | 
					
						
							|  |  |  |         "answers": list(result_container.answers), | 
					
						
							|  |  |  |         "paging": result_container.paging, | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         "results_number": result_container.results_number(), | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     } | 
					
						
							|  |  |  |     return result_container_json | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parse_argument( | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |     args: Optional[List[str]] = None, category_choices: EngineCategoriesVar = None | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  | ) -> argparse.Namespace: | 
					
						
							|  |  |  |     """Parse command line.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     :raise SystemExit: Query argument required on `args` | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Examples: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     >>> import importlib | 
					
						
							|  |  |  |     ... # load module | 
					
						
							|  |  |  |     ... spec = importlib.util.spec_from_file_location( | 
					
						
							|  |  |  |     ...     'utils.standalone_searx', 'utils/standalone_searx.py') | 
					
						
							|  |  |  |     ... sas = importlib.util.module_from_spec(spec) | 
					
						
							|  |  |  |     ... spec.loader.exec_module(sas) | 
					
						
							|  |  |  |     ... sas.parse_argument() | 
					
						
							|  |  |  |     usage: ptipython [-h] [--category [{general}]] [--lang [LANG]] [--pageno [PAGENO]] [--safesearch [{0,1,2}]] [--timerange [{day,week,month,year}]] | 
					
						
							|  |  |  |                      query | 
					
						
							|  |  |  |     SystemExit: 2 | 
					
						
							|  |  |  |     >>> sas.parse_argument(['rain']) | 
					
						
							|  |  |  |     Namespace(category='general', lang='all', pageno=1, query='rain', safesearch='0', timerange=None) | 
					
						
							|  |  |  |     """  # noqa: E501
 | 
					
						
							|  |  |  |     if not category_choices: | 
					
						
							|  |  |  |         category_choices = list(searx.engines.categories.keys()) | 
					
						
							|  |  |  |     parser = argparse.ArgumentParser(description='Standalone searx.') | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |     parser.add_argument('query', type=str, help='Text query') | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     parser.add_argument( | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         '--category', type=str, nargs='?', choices=category_choices, default='general', help='Search category' | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     parser.add_argument('--lang', type=str, nargs='?', default='all', help='Search language') | 
					
						
							|  |  |  |     parser.add_argument('--pageno', type=int, nargs='?', default=1, help='Page number starting from 1') | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     parser.add_argument( | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         '--safesearch', | 
					
						
							|  |  |  |         type=str, | 
					
						
							|  |  |  |         nargs='?', | 
					
						
							|  |  |  |         choices=['0', '1', '2'], | 
					
						
							|  |  |  |         default='0', | 
					
						
							|  |  |  |         help='Safe content filter from none to strict', | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     parser.add_argument( | 
					
						
							|  |  |  |         '--timerange', type=str, nargs='?', choices=['day', 'week', 'month', 'year'], help='Filter by time range' | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     return parser.parse_args(args) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if __name__ == '__main__': | 
					
						
							| 
									
										
										
										
											2021-12-26 23:09:10 +01:00
										 |  |  |     settings_engines = searx.settings['engines'] | 
					
						
							|  |  |  |     searx.search.load_engines(settings_engines) | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     engine_cs = list(searx.engines.categories.keys()) | 
					
						
							|  |  |  |     prog_args = parse_argument(category_choices=engine_cs) | 
					
						
							| 
									
										
										
										
											2021-12-26 23:09:10 +01:00
										 |  |  |     searx.search.initialize_network(settings_engines, searx.settings['outgoing']) | 
					
						
							|  |  |  |     searx.search.check_network_configuration() | 
					
						
							|  |  |  |     searx.search.initialize_metrics([engine['name'] for engine in settings_engines]) | 
					
						
							|  |  |  |     searx.search.initialize_processors(settings_engines) | 
					
						
							| 
									
										
										
										
											2020-11-04 13:38:54 +01:00
										 |  |  |     search_q = get_search_query(prog_args, engine_categories=engine_cs) | 
					
						
							|  |  |  |     res_dict = to_dict(search_q) | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |     sys.stdout.write(dumps(res_dict, sort_keys=True, indent=4, ensure_ascii=False, default=json_serial)) |