rewrite duckduckgo engine and add comments
This commit is contained in:
		
							parent
							
								
									3d61d9b930
								
							
						
					
					
						commit
						e6e4de8ba0
					
				| @ -1,24 +1,48 @@ | ||||
| ## DuckDuckGo (Web) | ||||
| #  | ||||
| # @website     https://duckduckgo.com/ | ||||
| # @provide-api yes (https://duckduckgo.com/api), but not all results from search-site | ||||
| #  | ||||
| # @using-api   no | ||||
| # @results     HTML (using search portal) | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, content | ||||
| # | ||||
| # @todo        rewrite to api | ||||
| # @todo        language support | ||||
| 
 | ||||
| from urllib import urlencode | ||||
| from lxml.html import fromstring | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| url = 'https://duckduckgo.com/html?{query}&s={offset}' | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| paging = True | ||||
| locale = 'us-en' | ||||
| 
 | ||||
| # search-url | ||||
| url = 'https://duckduckgo.com/html?{query}&s={offset}' | ||||
| 
 | ||||
| # specific xpath variables | ||||
| result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa | ||||
| url_xpath = './/a[@class="large"]/@href' | ||||
| title_xpath = './/a[@class="large"]//text()' | ||||
| content_xpath = './/div[@class="snippet"]//text()' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     offset = (params['pageno'] - 1) * 30 | ||||
|     q = urlencode({'q': query, | ||||
|                    'l': locale}) | ||||
|     params['url'] = url.format(query=q, offset=offset) | ||||
| 
 | ||||
|     params['url'] = url.format( | ||||
|         query=urlencode({'q': query, 'l': locale}), | ||||
|         offset=offset) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     result_xpath = '//div[@class="results_links results_links_deep web-result"]'  # noqa | ||||
|     url_xpath = './/a[@class="large"]/@href' | ||||
|     title_xpath = './/a[@class="large"]//text()' | ||||
|     content_xpath = './/div[@class="snippet"]//text()' | ||||
|     results = [] | ||||
| 
 | ||||
|     doc = fromstring(resp.text) | ||||
| @ -28,38 +52,17 @@ def response(resp): | ||||
|             res_url = r.xpath(url_xpath)[-1] | ||||
|         except: | ||||
|             continue | ||||
| 
 | ||||
|         if not res_url: | ||||
|             continue | ||||
| 
 | ||||
|         title = html_to_text(''.join(r.xpath(title_xpath))) | ||||
|         content = html_to_text(''.join(r.xpath(content_xpath))) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'title': title, | ||||
|                         'content': content, | ||||
|                         'url': res_url}) | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| #from json import loads | ||||
| #search_url = url + 'd.js?{query}&p=1&s={offset}' | ||||
| # | ||||
| #paging = True | ||||
| # | ||||
| # | ||||
| #def request(query, params): | ||||
| #    offset = (params['pageno'] - 1) * 30 | ||||
| #    q = urlencode({'q': query, | ||||
| #                   'l': locale}) | ||||
| #    params['url'] = search_url.format(query=q, offset=offset) | ||||
| #    return params | ||||
| # | ||||
| # | ||||
| #def response(resp): | ||||
| #    results = [] | ||||
| #    search_res = loads(resp.text[resp.text.find('[{'):-2])[:-1] | ||||
| #    for r in search_res: | ||||
| #        if not r.get('t'): | ||||
| #            continue | ||||
| #        results.append({'title': r['t'], | ||||
| #                       'content': html_to_text(r['a']), | ||||
| #                       'url': r['u']}) | ||||
| #    return results | ||||
|  | ||||
| @ -37,7 +37,6 @@ engines: | ||||
| 
 | ||||
|   - name : deviantart | ||||
|     engine : deviantart | ||||
|     categories : images | ||||
|     shortcut : da | ||||
|     timeout: 3.0 | ||||
| 
 | ||||
| @ -47,7 +46,6 @@ engines: | ||||
| 
 | ||||
|   - name : duckduckgo | ||||
|     engine : duckduckgo | ||||
|     locale : en-us | ||||
|     shortcut : ddg | ||||
| 
 | ||||
| # down - website is under criminal investigation by the UK | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user