| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | """
 | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  |  Qwant (Web, Images, News, Social) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |  @website     https://qwant.com/ | 
					
						
							|  |  |  |  @provide-api not officially (https://api.qwant.com/api/search/) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @using-api   yes | 
					
						
							|  |  |  |  @results     JSON | 
					
						
							|  |  |  |  @stable      yes | 
					
						
							|  |  |  |  @parse       url, title, content | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  | from datetime import datetime | 
					
						
							| 
									
										
										
										
											2016-12-10 21:27:47 +01:00
										 |  |  | from json import loads | 
					
						
							|  |  |  | from searx.utils import html_to_text | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  | from searx.url_utils import urlencode | 
					
						
							| 
									
										
										
										
											2018-03-01 05:30:48 +01:00
										 |  |  | from searx.utils import match_language | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | # engine dependent config | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  | categories = None | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | paging = True | 
					
						
							|  |  |  | language_support = True | 
					
						
							| 
									
										
										
										
											2017-02-25 03:21:48 +01:00
										 |  |  | supported_languages_url = 'https://qwant.com/region' | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 22:11:47 +02:00
										 |  |  | category_to_keyword = {'general': 'web', | 
					
						
							|  |  |  |                        'images': 'images', | 
					
						
							|  |  |  |                        'news': 'news', | 
					
						
							|  |  |  |                        'social media': 'social'} | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | # search-url | 
					
						
							| 
									
										
										
										
											2018-08-13 12:03:27 +02:00
										 |  |  | url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}&t={keyword}&uiv=4' | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # do search-request | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     offset = (params['pageno'] - 1) * 10 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 22:11:47 +02:00
										 |  |  |     if categories[0] and categories[0] in category_to_keyword: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         params['url'] = url.format(keyword=category_to_keyword[categories[0]], | 
					
						
							|  |  |  |                                    query=urlencode({'q': query}), | 
					
						
							|  |  |  |                                    offset=offset) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         params['url'] = url.format(keyword='web', | 
					
						
							|  |  |  |                                    query=urlencode({'q': query}), | 
					
						
							|  |  |  |                                    offset=offset) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-20 22:47:20 +02:00
										 |  |  |     # add language tag | 
					
						
							| 
									
										
										
										
											2019-01-06 15:27:46 +01:00
										 |  |  |     if params['language'] != 'all': | 
					
						
							|  |  |  |         language = match_language(params['language'], supported_languages, language_aliases) | 
					
						
							|  |  |  |         params['url'] += '&locale=' + language.replace('-', '_').lower() | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get response from search-request | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     search_results = loads(resp.text) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # return empty array if there are no results | 
					
						
							|  |  |  |     if 'data' not in search_results: | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     data = search_results.get('data', {}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     res = data.get('result', {}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # parse results | 
					
						
							|  |  |  |     for result in res.get('items', {}): | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-10 21:27:47 +01:00
										 |  |  |         title = html_to_text(result['title']) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  |         res_url = result['url'] | 
					
						
							| 
									
										
										
										
											2016-12-10 21:27:47 +01:00
										 |  |  |         content = html_to_text(result['desc']) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 22:11:47 +02:00
										 |  |  |         if category_to_keyword.get(categories[0], '') == 'web': | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  |             results.append({'title': title, | 
					
						
							|  |  |  |                             'content': content, | 
					
						
							|  |  |  |                             'url': res_url}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-02 22:11:47 +02:00
										 |  |  |         elif category_to_keyword.get(categories[0], '') == 'images': | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  |             thumbnail_src = result['thumbnail'] | 
					
						
							|  |  |  |             img_src = result['media'] | 
					
						
							|  |  |  |             results.append({'template': 'images.html', | 
					
						
							|  |  |  |                             'url': res_url, | 
					
						
							|  |  |  |                             'title': title, | 
					
						
							|  |  |  |                             'content': '', | 
					
						
							|  |  |  |                             'thumbnail_src': thumbnail_src, | 
					
						
							|  |  |  |                             'img_src': img_src}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  |         elif category_to_keyword.get(categories[0], '') == 'social': | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  |             published_date = datetime.fromtimestamp(result['date'], None) | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  |             img_src = result.get('img', None) | 
					
						
							|  |  |  |             results.append({'url': res_url, | 
					
						
							|  |  |  |                             'title': title, | 
					
						
							|  |  |  |                             'publishedDate': published_date, | 
					
						
							|  |  |  |                             'content': content, | 
					
						
							|  |  |  |                             'img_src': img_src}) | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  |         elif category_to_keyword.get(categories[0], '') == 'news': | 
					
						
							|  |  |  |             published_date = datetime.fromtimestamp(result['date'], None) | 
					
						
							|  |  |  |             media = result.get('media', []) | 
					
						
							|  |  |  |             if len(media) > 0: | 
					
						
							|  |  |  |                 img_src = media[0].get('pict', {}).get('url', None) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 img_src = None | 
					
						
							| 
									
										
										
										
											2015-06-02 20:36:58 +02:00
										 |  |  |             results.append({'url': res_url, | 
					
						
							|  |  |  |                             'title': title, | 
					
						
							|  |  |  |                             'publishedDate': published_date, | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  |                             'content': content, | 
					
						
							|  |  |  |                             'img_src': img_src}) | 
					
						
							| 
									
										
										
										
											2015-06-01 00:00:32 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results | 
					
						
							| 
									
										
										
										
											2017-02-25 03:21:48 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get supported languages from their site | 
					
						
							|  |  |  | def _fetch_supported_languages(resp): | 
					
						
							|  |  |  |     # list of regions is embedded in page as a js object | 
					
						
							|  |  |  |     response_text = resp.text | 
					
						
							|  |  |  |     response_text = response_text[response_text.find('regionalisation'):] | 
					
						
							|  |  |  |     response_text = response_text[response_text.find('{'):response_text.find(');')] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     regions_json = loads(response_text) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     supported_languages = [] | 
					
						
							|  |  |  |     for lang in regions_json['languages'].values(): | 
					
						
							| 
									
										
										
										
											2017-03-02 00:11:51 +01:00
										 |  |  |         if lang['code'] == 'nb': | 
					
						
							|  |  |  |             lang['code'] = 'no' | 
					
						
							| 
									
										
										
										
											2017-02-25 03:21:48 +01:00
										 |  |  |         for country in lang['countries']: | 
					
						
							|  |  |  |             supported_languages.append(lang['code'] + '-' + country) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return supported_languages |