| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | """
 | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  |  Qwant (Web, Images, News, Social) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  | from datetime import datetime | 
					
						
							| 
									
										
										
										
											2016-12-10 21:27:47 +01:00
										 |  |  | from json import loads | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from urllib.parse import urlencode | 
					
						
							|  |  |  | from searx.utils import html_to_text, match_language | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  | from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException | 
					
						
							| 
									
										
										
											
												[httpx] replace searx.poolrequests by searx.network
settings.yml:
* outgoing.networks:
   * can contains network definition
   * propertiers: enable_http, verify, http2, max_connections, max_keepalive_connections,
     keepalive_expiry, local_addresses, support_ipv4, support_ipv6, proxies, max_redirects, retries
   * retries: 0 by default, number of times searx retries to send the HTTP request (using different IP & proxy each time)
   * local_addresses can be "192.168.0.1/24" (it supports IPv6)
   * support_ipv4 & support_ipv6: both True by default
     see https://github.com/searx/searx/pull/1034
* each engine can define a "network" section:
   * either a full network description
   * either reference an existing network
* all HTTP requests of engine use the same HTTP configuration (it was not the case before, see proxy configuration in master)
											
										 
											2021-04-05 10:43:33 +02:00
										 |  |  | from searx.network import raise_for_httperror | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | # about | 
					
						
							|  |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://www.qwant.com/', | 
					
						
							|  |  |  |     "wikidata_id": 'Q14657870', | 
					
						
							|  |  |  |     "official_api_documentation": None, | 
					
						
							|  |  |  |     "use_official_api": True, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": 'JSON', | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | # engine dependent config | 
					
						
							| 
									
										
										
										
											2020-11-03 11:35:53 +01:00
										 |  |  | categories = [] | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | paging = True | 
					
						
							| 
									
										
										
										
											2021-05-03 02:24:28 +02:00
										 |  |  | supported_languages_url = about['website'] | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 22:11:47 +02:00
										 |  |  | category_to_keyword = {'general': 'web', | 
					
						
							|  |  |  |                        'images': 'images', | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  |                        'news': 'news'} | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | # search-url | 
					
						
							| 
									
										
										
										
											2018-08-13 12:03:27 +02:00
										 |  |  | url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}&t={keyword}&uiv=4' | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # do search-request | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     offset = (params['pageno'] - 1) * 10 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 22:11:47 +02:00
										 |  |  |     if categories[0] and categories[0] in category_to_keyword: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         params['url'] = url.format(keyword=category_to_keyword[categories[0]], | 
					
						
							|  |  |  |                                    query=urlencode({'q': query}), | 
					
						
							|  |  |  |                                    offset=offset) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         params['url'] = url.format(keyword='web', | 
					
						
							|  |  |  |                                    query=urlencode({'q': query}), | 
					
						
							|  |  |  |                                    offset=offset) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-20 22:47:20 +02:00
										 |  |  |     # add language tag | 
					
						
							| 
									
										
										
										
											2019-01-06 15:27:46 +01:00
										 |  |  |     if params['language'] != 'all': | 
					
						
							|  |  |  |         language = match_language(params['language'], supported_languages, language_aliases) | 
					
						
							|  |  |  |         params['url'] += '&locale=' + language.replace('-', '_').lower() | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-21 20:25:39 +01:00
										 |  |  |     params['headers']['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64; rv:69.0) Gecko/20100101 Firefox/69.0' | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  |     params['raise_for_httperror'] = False | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get response from search-request | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  |     # According to https://www.qwant.com/js/app.js | 
					
						
							|  |  |  |     if resp.status_code == 429: | 
					
						
							|  |  |  |         raise SearxEngineCaptchaException() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # raise for other errors | 
					
						
							|  |  |  |     raise_for_httperror(resp) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # load JSON result | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  |     search_results = loads(resp.text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-12-09 21:23:20 +01:00
										 |  |  |     # check for an API error | 
					
						
							|  |  |  |     if search_results.get('status') != 'success': | 
					
						
							|  |  |  |         raise SearxEngineAPIException('API error ' + str(search_results.get('error', ''))) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  |     # return empty array if there are no results | 
					
						
							|  |  |  |     if 'data' not in search_results: | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     data = search_results.get('data', {}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     res = data.get('result', {}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # parse results | 
					
						
							|  |  |  |     for result in res.get('items', {}): | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-10 21:27:47 +01:00
										 |  |  |         title = html_to_text(result['title']) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  |         res_url = result['url'] | 
					
						
							| 
									
										
										
										
											2016-12-10 21:27:47 +01:00
										 |  |  |         content = html_to_text(result['desc']) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 22:11:47 +02:00
										 |  |  |         if category_to_keyword.get(categories[0], '') == 'web': | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  |             results.append({'title': title, | 
					
						
							|  |  |  |                             'content': content, | 
					
						
							|  |  |  |                             'url': res_url}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 22:11:47 +02:00
										 |  |  |         elif category_to_keyword.get(categories[0], '') == 'images': | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  |             thumbnail_src = result['thumbnail'] | 
					
						
							|  |  |  |             img_src = result['media'] | 
					
						
							|  |  |  |             results.append({'template': 'images.html', | 
					
						
							|  |  |  |                             'url': res_url, | 
					
						
							|  |  |  |                             'title': title, | 
					
						
							|  |  |  |                             'content': '', | 
					
						
							|  |  |  |                             'thumbnail_src': thumbnail_src, | 
					
						
							|  |  |  |                             'img_src': img_src}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  |         elif category_to_keyword.get(categories[0], '') == 'news': | 
					
						
							|  |  |  |             published_date = datetime.fromtimestamp(result['date'], None) | 
					
						
							|  |  |  |             media = result.get('media', []) | 
					
						
							|  |  |  |             if len(media) > 0: | 
					
						
							|  |  |  |                 img_src = media[0].get('pict', {}).get('url', None) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 img_src = None | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  |             results.append({'url': res_url, | 
					
						
							|  |  |  |                             'title': title, | 
					
						
							|  |  |  |                             'publishedDate': published_date, | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  |                             'content': content, | 
					
						
							|  |  |  |                             'img_src': img_src}) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results | 
					
						
							| 
									
										
										
										
											2017-02-25 03:21:48 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get supported languages from their site | 
					
						
							|  |  |  | def _fetch_supported_languages(resp): | 
					
						
							|  |  |  |     # list of regions is embedded in page as a js object | 
					
						
							|  |  |  |     response_text = resp.text | 
					
						
							| 
									
										
										
										
											2021-05-03 02:24:28 +02:00
										 |  |  |     response_text = response_text[response_text.find('INITIAL_PROPS'):] | 
					
						
							|  |  |  |     response_text = response_text[response_text.find('{'):response_text.find('</script>')] | 
					
						
							| 
									
										
										
										
											2017-02-25 03:21:48 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     regions_json = loads(response_text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-05-03 02:24:28 +02:00
										 |  |  |     supported_languages = [] | 
					
						
							|  |  |  |     for country, langs in regions_json['locales'].items(): | 
					
						
							|  |  |  |         for lang in langs['langs']: | 
					
						
							|  |  |  |             lang_code = "{lang}-{country}".format(lang=lang, country=country) | 
					
						
							|  |  |  |             supported_languages.append(lang_code) | 
					
						
							| 
									
										
										
										
											2017-02-25 03:21:48 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return supported_languages |