| 
									
										
										
										
											2016-09-06 16:36:04 +02:00
										 |  |  | """
 | 
					
						
							|  |  |  |  Dictzone | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @website     https://dictzone.com/ | 
					
						
							|  |  |  |  @provide-api no | 
					
						
							|  |  |  |  @using-api   no | 
					
						
							|  |  |  |  @results     HTML (using search portal) | 
					
						
							|  |  |  |  @stable      no (HTML can change) | 
					
						
							|  |  |  |  @parse       url, title, content | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from urllib.parse import urljoin | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | from lxml import html | 
					
						
							| 
									
										
										
										
											2019-11-15 09:31:37 +01:00
										 |  |  | from searx.utils import is_valid_lang, eval_xpath | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-06 12:34:20 +02:00
										 |  |  | categories = ['general'] | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | weight = 100 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-08-11 16:25:03 +02:00
										 |  |  | parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | results_xpath = './/table[@id="r"]/tr' | 
					
						
							| 
									
										
										
										
											2020-12-09 17:33:18 +01:00
										 |  |  | https_support = True | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  |     m = parser_re.match(query) | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  |     if not m: | 
					
						
							|  |  |  |         return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     from_lang, to_lang, query = m.groups() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-06 14:12:46 +02:00
										 |  |  |     from_lang = is_valid_lang(from_lang) | 
					
						
							|  |  |  |     to_lang = is_valid_lang(to_lang) | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-06 14:12:46 +02:00
										 |  |  |     if not from_lang or not to_lang: | 
					
						
							| 
									
										
										
										
											2016-09-06 12:46:18 +02:00
										 |  |  |         return params | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-06 16:43:48 +02:00
										 |  |  |     params['url'] = url.format(from_lang=from_lang[2], | 
					
						
							|  |  |  |                                to_lang=to_lang[2], | 
					
						
							| 
									
										
										
										
											2020-08-11 16:25:03 +02:00
										 |  |  |                                query=query) | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-06 14:24:08 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dom = html.fromstring(resp.text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-11-15 09:31:37 +01:00
										 |  |  |     for k, result in enumerate(eval_xpath(dom, results_xpath)[1:]): | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  |         try: | 
					
						
							| 
									
										
										
										
											2019-11-15 09:31:37 +01:00
										 |  |  |             from_result, to_results_raw = eval_xpath(result, './td') | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  |         except: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         to_results = [] | 
					
						
							| 
									
										
										
										
											2019-11-15 09:31:37 +01:00
										 |  |  |         for to_result in eval_xpath(to_results_raw, './p/a'): | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  |             t = to_result.text_content() | 
					
						
							|  |  |  |             if t.strip(): | 
					
						
							|  |  |  |                 to_results.append(to_result.text_content()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         results.append({ | 
					
						
							| 
									
										
										
										
											2016-09-06 12:37:26 +02:00
										 |  |  |             'url': urljoin(resp.url, '?%d' % k), | 
					
						
							| 
									
										
										
										
											2016-12-09 11:44:24 +01:00
										 |  |  |             'title': from_result.text_content(), | 
					
						
							|  |  |  |             'content': '; '.join(to_results) | 
					
						
							| 
									
										
										
										
											2016-09-06 11:47:27 +02:00
										 |  |  |         }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return results |