rewrite duckduckgo engine and add comments
This commit is contained in:
		
							parent
							
								
									3d61d9b930
								
							
						
					
					
						commit
						e6e4de8ba0
					
				| @ -1,24 +1,48 @@ | |||||||
|  | ## DuckDuckGo (Web) | ||||||
|  | #  | ||||||
|  | # @website     https://duckduckgo.com/ | ||||||
|  | # @provide-api yes (https://duckduckgo.com/api), but not all results from search-site | ||||||
|  | #  | ||||||
|  | # @using-api   no | ||||||
|  | # @results     HTML (using search portal) | ||||||
|  | # @stable      no (HTML can change) | ||||||
|  | # @parse       url, title, content | ||||||
|  | # | ||||||
|  | # @todo        rewrite to api | ||||||
|  | # @todo        language support | ||||||
|  | 
 | ||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from lxml.html import fromstring | from lxml.html import fromstring | ||||||
| from searx.utils import html_to_text | from searx.utils import html_to_text | ||||||
| 
 | 
 | ||||||
| url = 'https://duckduckgo.com/html?{query}&s={offset}' | # engine dependent config | ||||||
|  | categories = ['general'] | ||||||
|  | paging = True | ||||||
| locale = 'us-en' | locale = 'us-en' | ||||||
| 
 | 
 | ||||||
|  | # search-url | ||||||
|  | url = 'https://duckduckgo.com/html?{query}&s={offset}' | ||||||
| 
 | 
 | ||||||
|  | # specific xpath variables | ||||||
|  | result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa | ||||||
|  | url_xpath = './/a[@class="large"]/@href' | ||||||
|  | title_xpath = './/a[@class="large"]//text()' | ||||||
|  | content_xpath = './/div[@class="snippet"]//text()' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     offset = (params['pageno'] - 1) * 30 |     offset = (params['pageno'] - 1) * 30 | ||||||
|     q = urlencode({'q': query, | 
 | ||||||
|                    'l': locale}) |     params['url'] = url.format( | ||||||
|     params['url'] = url.format(query=q, offset=offset) |         query=urlencode({'q': query, 'l': locale}), | ||||||
|  |         offset=offset) | ||||||
|  | 
 | ||||||
|     return params |     return params | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | # get response from search-request | ||||||
| def response(resp): | def response(resp): | ||||||
|     result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa |  | ||||||
|     url_xpath = './/a[@class="large"]/@href' |  | ||||||
|     title_xpath = './/a[@class="large"]//text()' |  | ||||||
|     content_xpath = './/div[@class="snippet"]//text()' |  | ||||||
|     results = [] |     results = [] | ||||||
| 
 | 
 | ||||||
|     doc = fromstring(resp.text) |     doc = fromstring(resp.text) | ||||||
| @ -28,38 +52,17 @@ def response(resp): | |||||||
|             res_url = r.xpath(url_xpath)[-1] |             res_url = r.xpath(url_xpath)[-1] | ||||||
|         except: |         except: | ||||||
|             continue |             continue | ||||||
|  | 
 | ||||||
|         if not res_url: |         if not res_url: | ||||||
|             continue |             continue | ||||||
|  | 
 | ||||||
|         title = html_to_text(''.join(r.xpath(title_xpath))) |         title = html_to_text(''.join(r.xpath(title_xpath))) | ||||||
|         content = html_to_text(''.join(r.xpath(content_xpath))) |         content = html_to_text(''.join(r.xpath(content_xpath))) | ||||||
|  | 
 | ||||||
|  |         # append result | ||||||
|         results.append({'title': title, |         results.append({'title': title, | ||||||
|                         'content': content, |                         'content': content, | ||||||
|                         'url': res_url}) |                         'url': res_url}) | ||||||
| 
 | 
 | ||||||
|  |     # return results | ||||||
|     return results |     return results | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| #from json import loads |  | ||||||
| #search_url = url + 'd.js?{query}&p=1&s={offset}' |  | ||||||
| # |  | ||||||
| #paging = True |  | ||||||
| # |  | ||||||
| # |  | ||||||
| #def request(query, params): |  | ||||||
| #    offset = (params['pageno'] - 1) * 30 |  | ||||||
| #    q = urlencode({'q': query, |  | ||||||
| #                   'l': locale}) |  | ||||||
| #    params['url'] = search_url.format(query=q, offset=offset) |  | ||||||
| #    return params |  | ||||||
| # |  | ||||||
| # |  | ||||||
| #def response(resp): |  | ||||||
| #    results = [] |  | ||||||
| #    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1] |  | ||||||
| #    for r in search_res: |  | ||||||
| #        if not r.get('t'): |  | ||||||
| #            continue |  | ||||||
| #        results.append({'title': r['t'], |  | ||||||
| #                       'content': html_to_text(r['a']), |  | ||||||
| #                       'url': r['u']}) |  | ||||||
| #    return results |  | ||||||
|  | |||||||
| @ -37,7 +37,6 @@ engines: | |||||||
| 
 | 
 | ||||||
|   - name : deviantart |   - name : deviantart | ||||||
|     engine : deviantart |     engine : deviantart | ||||||
|     categories : images |  | ||||||
|     shortcut : da |     shortcut : da | ||||||
|     timeout: 3.0 |     timeout: 3.0 | ||||||
| 
 | 
 | ||||||
| @ -47,7 +46,6 @@ engines: | |||||||
| 
 | 
 | ||||||
|   - name : duckduckgo |   - name : duckduckgo | ||||||
|     engine : duckduckgo |     engine : duckduckgo | ||||||
|     locale : en-us |  | ||||||
|     shortcut : ddg |     shortcut : ddg | ||||||
| 
 | 
 | ||||||
| # down - website is under criminal investigation by the UK | # down - website is under criminal investigation by the UK | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user