| 
									
										
										
										
											2021-02-11 12:32:58 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  |  Seznam | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from urllib.parse import urlencode, urlparse | 
					
						
							|  |  |  | from lxml import html | 
					
						
							|  |  |  | from searx.poolrequests import get | 
					
						
							|  |  |  | from searx.exceptions import SearxEngineAccessDeniedException | 
					
						
							| 
									
										
										
										
											2021-03-27 15:29:00 +01:00
										 |  |  | from searx.utils import ( | 
					
						
							|  |  |  |     extract_text, | 
					
						
							|  |  |  |     eval_xpath_list, | 
					
						
							|  |  |  |     eval_xpath_getindex, | 
					
						
							|  |  |  |     eval_xpath, | 
					
						
							|  |  |  | ) | 
					
						
							| 
									
										
										
										
											2021-02-11 12:32:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # about | 
					
						
							|  |  |  | about = { | 
					
						
							|  |  |  |     "website": "https://www.seznam.cz/", | 
					
						
							|  |  |  |     "wikidata_id": "Q3490485", | 
					
						
							|  |  |  |     "official_api_documentation": "https://api.sklik.cz/", | 
					
						
							|  |  |  |     "use_official_api": False, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": "HTML", | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | base_url = 'https://search.seznam.cz/' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     response_index = get(base_url, headers=params['headers'], raise_for_httperror=True) | 
					
						
							|  |  |  |     dom = html.fromstring(response_index.text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-27 15:29:00 +01:00
										 |  |  |     url_params = { | 
					
						
							|  |  |  |         'q': query, | 
					
						
							|  |  |  |         'oq': query, | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-02-11 12:32:58 +01:00
										 |  |  |     for e in eval_xpath_list(dom, '//input[@type="hidden"]'): | 
					
						
							|  |  |  |         name = e.get('name') | 
					
						
							|  |  |  |         value = e.get('value') | 
					
						
							|  |  |  |         url_params[name] = value | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     params['url'] = base_url + '?' + urlencode(url_params) | 
					
						
							|  |  |  |     params['cookies'] = response_index.cookies | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     resp_url = urlparse(resp.url) | 
					
						
							|  |  |  |     if resp_url.path.startswith('/verify'): | 
					
						
							|  |  |  |         raise SearxEngineAccessDeniedException() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dom = html.fromstring(resp.content.decode()) | 
					
						
							| 
									
										
										
										
											2021-03-27 15:29:00 +01:00
										 |  |  |     for result_element in eval_xpath_list(dom, '//div[@data-dot="results"]/div'): | 
					
						
							|  |  |  |         result_data = eval_xpath_getindex(result_element, './/div[contains(@class, "Result")]', 0, default=None) | 
					
						
							|  |  |  |         if result_data is None: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  |         title_element = eval_xpath_getindex(result_element, './/h3/a', 0) | 
					
						
							|  |  |  |         results.append({ | 
					
						
							|  |  |  |             'url': title_element.get('href'), | 
					
						
							|  |  |  |             'title': extract_text(title_element), | 
					
						
							|  |  |  |             'content': extract_text(eval_xpath(result_data, './/p[@class="Result-description"]')), | 
					
						
							|  |  |  |         }) | 
					
						
							| 
									
										
										
										
											2021-02-11 12:32:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results |