| 
									
										
										
										
											2021-04-08 01:58:00 +02:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | """Wordnik (general)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from lxml.html import fromstring | 
					
						
							|  |  |  | from searx.utils import extract_text | 
					
						
							| 
									
										
										
											
												[httpx] replace searx.poolrequests by searx.network
settings.yml:
* outgoing.networks:
   * can contains network definition
   * propertiers: enable_http, verify, http2, max_connections, max_keepalive_connections,
     keepalive_expiry, local_addresses, support_ipv4, support_ipv6, proxies, max_redirects, retries
   * retries: 0 by default, number of times searx retries to send the HTTP request (using different IP & proxy each time)
   * local_addresses can be "192.168.0.1/24" (it supports IPv6)
   * support_ipv4 & support_ipv6: both True by default
     see https://github.com/searx/searx/pull/1034
* each engine can define a "network" section:
   * either a full network description
   * either reference an existing network
* all HTTP requests of engine use the same HTTP configuration (it was not the case before, see proxy configuration in master)
											
										 
											2021-04-05 10:43:33 +02:00
										 |  |  | from searx.network import raise_for_httperror | 
					
						
							| 
									
										
										
										
											2021-04-08 01:58:00 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | # about | 
					
						
							|  |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://www.wordnik.com', | 
					
						
							|  |  |  |     "wikidata_id": 'Q8034401', | 
					
						
							|  |  |  |     "official_api_documentation": None, | 
					
						
							|  |  |  |     "use_official_api": False, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": 'HTML', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | categories = ['general'] | 
					
						
							|  |  |  | paging = False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | URL = 'https://www.wordnik.com' | 
					
						
							|  |  |  | SEARCH_URL = URL + '/words/{query}' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     params['url'] = SEARCH_URL.format(query=query) | 
					
						
							|  |  |  |     logger.debug(f"query_url --> {params['url']}") | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     raise_for_httperror(resp) | 
					
						
							|  |  |  |     dom = fromstring(resp.text) | 
					
						
							|  |  |  |     word = extract_text(dom.xpath('//*[@id="headword"]/text()')) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     definitions = [] | 
					
						
							|  |  |  |     for src in dom.xpath('//*[@id="define"]//h3[@class="source"]'): | 
					
						
							|  |  |  |         src_text = extract_text(src).strip() | 
					
						
							|  |  |  |         if src_text.startswith('from '): | 
					
						
							|  |  |  |             src_text = src_text[5:] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         src_defs = [] | 
					
						
							|  |  |  |         for def_item in src.xpath('following-sibling::ul[1]/li'): | 
					
						
							|  |  |  |             def_abbr = extract_text(def_item.xpath('.//abbr')).strip() | 
					
						
							|  |  |  |             def_text = extract_text(def_item).strip() | 
					
						
							|  |  |  |             if def_abbr: | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |                 def_text = def_text[len(def_abbr) :].strip() | 
					
						
							| 
									
										
										
										
											2021-04-08 01:58:00 +02:00
										 |  |  |             src_defs.append((def_abbr, def_text)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         definitions.append((src_text, src_defs)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not definitions: | 
					
						
							|  |  |  |         return results | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     infobox = '' | 
					
						
							|  |  |  |     for src_text, src_defs in definitions: | 
					
						
							|  |  |  |         infobox += f"<small>{src_text}</small>" | 
					
						
							|  |  |  |         infobox += "<ul>" | 
					
						
							|  |  |  |         for def_abbr, def_text in src_defs: | 
					
						
							|  |  |  |             if def_abbr: | 
					
						
							|  |  |  |                 def_abbr += ": " | 
					
						
							|  |  |  |             infobox += f"<li><i>{def_abbr}</i> {def_text}</li>" | 
					
						
							|  |  |  |         infobox += "</ul>" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |     results.append( | 
					
						
							|  |  |  |         { | 
					
						
							|  |  |  |             'infobox': word, | 
					
						
							|  |  |  |             'content': infobox, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-04-08 01:58:00 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results |