| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2017-05-21 05:33:08 +02:00
										 |  |  | """
 | 
					
						
							|  |  |  |  DuckDuckGo (Images) | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from json import loads | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from urllib.parse import urlencode | 
					
						
							| 
									
										
										
										
											2020-11-26 17:22:54 +01:00
										 |  |  | from searx.exceptions import SearxEngineAPIException | 
					
						
							| 
									
										
										
										
											2020-11-02 11:19:53 +01:00
										 |  |  | from searx.engines.duckduckgo import get_region_code | 
					
						
							| 
									
										
										
										
											2020-11-16 09:43:23 +01:00
										 |  |  | from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import | 
					
						
							| 
									
										
										
										
											2017-07-21 16:23:20 +02:00
										 |  |  | from searx.poolrequests import get | 
					
						
							| 
									
										
										
										
											2017-05-21 05:33:08 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | # about | 
					
						
							|  |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://duckduckgo.com/', | 
					
						
							|  |  |  |     "wikidata_id": 'Q12805', | 
					
						
							|  |  |  |     "official_api_documentation": { | 
					
						
							|  |  |  |         'url': 'https://duckduckgo.com/api', | 
					
						
							|  |  |  |         'comment': 'but images are not supported', | 
					
						
							|  |  |  |     }, | 
					
						
							|  |  |  |     "use_official_api": False, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": 'JSON (site requires js to get images)', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-21 05:33:08 +02:00
										 |  |  | # engine dependent config | 
					
						
							|  |  |  | categories = ['images'] | 
					
						
							|  |  |  | paging = True | 
					
						
							|  |  |  | safesearch = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # search-url | 
					
						
							|  |  |  | images_url = 'https://duckduckgo.com/i.js?{query}&s={offset}&p={safesearch}&o=json&vqd={vqd}' | 
					
						
							|  |  |  | site_url = 'https://duckduckgo.com/?{query}&iar=images&iax=1&ia=images' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # run query in site to get vqd number needed for requesting images | 
					
						
							|  |  |  | # TODO: find a way to get this number without an extra request (is it a hash of the query?) | 
					
						
							| 
									
										
										
										
											2019-04-13 07:17:32 +02:00
										 |  |  | def get_vqd(query, headers): | 
					
						
							|  |  |  |     query_url = site_url.format(query=urlencode({'q': query})) | 
					
						
							|  |  |  |     res = get(query_url, headers=headers) | 
					
						
							| 
									
										
										
										
											2017-05-21 05:33:08 +02:00
										 |  |  |     content = res.text | 
					
						
							| 
									
										
										
										
											2019-04-13 07:17:32 +02:00
										 |  |  |     if content.find('vqd=\'') == -1: | 
					
						
							| 
									
										
										
										
											2020-11-26 17:22:54 +01:00
										 |  |  |         raise SearxEngineAPIException('Request failed') | 
					
						
							| 
									
										
										
										
											2017-05-21 05:33:08 +02:00
										 |  |  |     vqd = content[content.find('vqd=\'') + 5:] | 
					
						
							|  |  |  |     vqd = vqd[:vqd.find('\'')] | 
					
						
							|  |  |  |     return vqd | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # do search-request | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     # to avoid running actual external requests when testing | 
					
						
							|  |  |  |     if 'is_test' not in params: | 
					
						
							| 
									
										
										
										
											2019-04-13 07:17:32 +02:00
										 |  |  |         vqd = get_vqd(query, params['headers']) | 
					
						
							| 
									
										
										
										
											2017-05-21 05:33:08 +02:00
										 |  |  |     else: | 
					
						
							|  |  |  |         vqd = '12345' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     offset = (params['pageno'] - 1) * 50 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     safesearch = params['safesearch'] - 1 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-07-05 05:29:06 +02:00
										 |  |  |     region_code = get_region_code(params['language'], lang_list=supported_languages) | 
					
						
							| 
									
										
										
										
											2019-01-06 15:27:46 +01:00
										 |  |  |     if region_code: | 
					
						
							|  |  |  |         params['url'] = images_url.format( | 
					
						
							|  |  |  |             query=urlencode({'q': query, 'l': region_code}), offset=offset, safesearch=safesearch, vqd=vqd) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         params['url'] = images_url.format( | 
					
						
							|  |  |  |             query=urlencode({'q': query}), offset=offset, safesearch=safesearch, vqd=vqd) | 
					
						
							| 
									
										
										
										
											2017-05-21 05:33:08 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get response from search-request | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     content = resp.text | 
					
						
							| 
									
										
										
										
											2020-11-26 17:22:54 +01:00
										 |  |  |     res_json = loads(content) | 
					
						
							| 
									
										
										
										
											2017-05-21 05:33:08 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # parse results | 
					
						
							|  |  |  |     for result in res_json['results']: | 
					
						
							|  |  |  |         title = result['title'] | 
					
						
							|  |  |  |         url = result['url'] | 
					
						
							|  |  |  |         thumbnail = result['thumbnail'] | 
					
						
							|  |  |  |         image = result['image'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # append result | 
					
						
							|  |  |  |         results.append({'template': 'images.html', | 
					
						
							|  |  |  |                         'title': title, | 
					
						
							|  |  |  |                         'content': '', | 
					
						
							|  |  |  |                         'thumbnail_src': thumbnail, | 
					
						
							|  |  |  |                         'img_src': image, | 
					
						
							|  |  |  |                         'url': url}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return results |