| 
									
										
										
										
											2013-10-14 23:54:33 +02:00
										 |  |  | import json | 
					
						
							| 
									
										
										
										
											2013-10-23 23:55:37 +02:00
										 |  |  | from urllib import urlencode | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | from lxml import html | 
					
						
							| 
									
										
										
										
											2015-01-01 14:14:56 +01:00
										 |  |  | from searx.utils import html_to_text | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | from searx.engines.xpath import extract_text | 
					
						
							| 
									
										
										
										
											2013-10-23 23:55:37 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  | url = 'https://api.duckduckgo.com/'\ | 
					
						
							|  |  |  |     + '?{query}&format=json&pretty=0&no_redirect=1&d=1' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-14 23:54:33 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | def result_to_text(url, text, htmlResult): | 
					
						
							|  |  |  |     # TODO : remove result ending with "Meaning" or "Category" | 
					
						
							|  |  |  |     dom = html.fromstring(htmlResult) | 
					
						
							|  |  |  |     a = dom.xpath('//a') | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |     if len(a) >= 1: | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         return extract_text(a[0]) | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         return text | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-14 23:54:33 +02:00
										 |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |     # TODO add kl={locale} | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  |     params['url'] = url.format(query=urlencode({'q': query})) | 
					
						
							| 
									
										
										
										
											2013-10-14 23:54:33 +02:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-09 18:28:08 +01:00
										 |  |  |     search_res = json.loads(resp.text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |     content = '' | 
					
						
							|  |  |  |     heading = search_res.get('Heading', '') | 
					
						
							|  |  |  |     attributes = [] | 
					
						
							|  |  |  |     urls = [] | 
					
						
							|  |  |  |     infobox_id = None | 
					
						
							|  |  |  |     relatedTopics = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # add answer if there is one | 
					
						
							|  |  |  |     answer = search_res.get('Answer', '') | 
					
						
							|  |  |  |     if answer != '': | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |         results.append({'answer': html_to_text(answer)}) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # add infobox | 
					
						
							| 
									
										
										
										
											2013-10-14 23:54:33 +02:00
										 |  |  |     if 'Definition' in search_res: | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |         content = content + search_res.get('Definition', '') | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if 'Abstract' in search_res: | 
					
						
							|  |  |  |         content = content + search_res.get('Abstract', '') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # image | 
					
						
							|  |  |  |     image = search_res.get('Image', '') | 
					
						
							|  |  |  |     image = None if image == '' else image | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # attributes | 
					
						
							|  |  |  |     if 'Infobox' in search_res: | 
					
						
							|  |  |  |         infobox = search_res.get('Infobox', None) | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |         if 'content' in infobox: | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |             for info in infobox.get('content'): | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |                 attributes.append({'label': info.get('label'), | 
					
						
							|  |  |  |                                   'value': info.get('value')}) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # urls | 
					
						
							|  |  |  |     for ddg_result in search_res.get('Results', []): | 
					
						
							|  |  |  |         if 'FirstURL' in ddg_result: | 
					
						
							|  |  |  |             firstURL = ddg_result.get('FirstURL', '') | 
					
						
							|  |  |  |             text = ddg_result.get('Text', '') | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |             urls.append({'title': text, 'url': firstURL}) | 
					
						
							|  |  |  |             results.append({'title': heading, 'url': firstURL}) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # related topics | 
					
						
							| 
									
										
										
										
											2015-02-09 18:28:08 +01:00
										 |  |  |     for ddg_result in search_res.get('RelatedTopics', []): | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         if 'FirstURL' in ddg_result: | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |             suggestion = result_to_text(ddg_result.get('FirstURL', None), | 
					
						
							|  |  |  |                                         ddg_result.get('Text', None), | 
					
						
							|  |  |  |                                         ddg_result.get('Result', None)) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |             if suggestion != heading: | 
					
						
							|  |  |  |                 results.append({'suggestion': suggestion}) | 
					
						
							|  |  |  |         elif 'Topics' in ddg_result: | 
					
						
							|  |  |  |             suggestions = [] | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |             relatedTopics.append({'name': ddg_result.get('Name', ''), | 
					
						
							|  |  |  |                                  'suggestions': suggestions}) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |             for topic_result in ddg_result.get('Topics', []): | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |                 suggestion = result_to_text(topic_result.get('FirstURL', None), | 
					
						
							|  |  |  |                                             topic_result.get('Text', None), | 
					
						
							|  |  |  |                                             topic_result.get('Result', None)) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |                 if suggestion != heading: | 
					
						
							|  |  |  |                     suggestions.append(suggestion) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # abstract | 
					
						
							|  |  |  |     abstractURL = search_res.get('AbstractURL', '') | 
					
						
							|  |  |  |     if abstractURL != '': | 
					
						
							|  |  |  |         # add as result ? problem always in english | 
					
						
							|  |  |  |         infobox_id = abstractURL | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |         urls.append({'title': search_res.get('AbstractSource'), | 
					
						
							|  |  |  |                     'url': abstractURL}) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # definition | 
					
						
							|  |  |  |     definitionURL = search_res.get('DefinitionURL', '') | 
					
						
							|  |  |  |     if definitionURL != '': | 
					
						
							|  |  |  |         # add as result ? as answer ? problem always in english | 
					
						
							|  |  |  |         infobox_id = definitionURL | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |         urls.append({'title': search_res.get('DefinitionSource'), | 
					
						
							|  |  |  |                     'url': definitionURL}) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # entity | 
					
						
							|  |  |  |     entity = search_res.get('Entity', None) | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |     # TODO continent / country / department / location / waterfall / | 
					
						
							|  |  |  |     #      mountain range : | 
					
						
							|  |  |  |     #      link to map search, get weather, near by locations | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |     # TODO musician : link to music search | 
					
						
							|  |  |  |     # TODO concert tour : ?? | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |     # TODO film / actor / television  / media franchise : | 
					
						
							|  |  |  |     #      links to IMDB / rottentomatoes (or scrap result) | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |     # TODO music : link tu musicbrainz / last.fm | 
					
						
							|  |  |  |     # TODO book : ?? | 
					
						
							|  |  |  |     # TODO artist / playwright : ?? | 
					
						
							|  |  |  |     # TODO compagny : ?? | 
					
						
							|  |  |  |     # TODO software / os : ?? | 
					
						
							|  |  |  |     # TODO software engineer : ?? | 
					
						
							|  |  |  |     # TODO prepared food : ?? | 
					
						
							|  |  |  |     # TODO website : ?? | 
					
						
							|  |  |  |     # TODO performing art : ?? | 
					
						
							|  |  |  |     # TODO prepared food : ?? | 
					
						
							|  |  |  |     # TODO programming language : ?? | 
					
						
							|  |  |  |     # TODO file format : ?? | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |     if len(heading) > 0: | 
					
						
							| 
									
										
										
										
											2014-09-28 16:51:41 +02:00
										 |  |  |         # TODO get infobox.meta.value where .label='article_title' | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |         if image is None and len(attributes) == 0 and len(urls) == 1 and\ | 
					
						
							|  |  |  |            len(relatedTopics) == 0 and len(content) == 0: | 
					
						
							| 
									
										
										
										
											2014-10-11 15:49:50 +02:00
										 |  |  |             results.append({ | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |                            'url': urls[0]['url'], | 
					
						
							|  |  |  |                            'title': heading, | 
					
						
							|  |  |  |                            'content': content | 
					
						
							|  |  |  |                            }) | 
					
						
							| 
									
										
										
										
											2014-10-11 15:49:50 +02:00
										 |  |  |         else: | 
					
						
							|  |  |  |             results.append({ | 
					
						
							| 
									
										
										
										
											2014-12-07 16:36:20 +01:00
										 |  |  |                            'infobox': heading, | 
					
						
							|  |  |  |                            'id': infobox_id, | 
					
						
							|  |  |  |                            'entity': entity, | 
					
						
							|  |  |  |                            'content': content, | 
					
						
							|  |  |  |                            'img_src': image, | 
					
						
							|  |  |  |                            'attributes': attributes, | 
					
						
							|  |  |  |                            'urls': urls, | 
					
						
							|  |  |  |                            'relatedTopics': relatedTopics | 
					
						
							|  |  |  |                            }) | 
					
						
							| 
									
										
										
										
											2013-10-14 23:54:33 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results |