| 
									
										
										
										
											2015-10-31 15:27:23 +01:00
										 |  |  | """
 | 
					
						
							|  |  |  |  Yahoo (Web) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @website     https://yandex.ru/ | 
					
						
							|  |  |  |  @provide-api ? | 
					
						
							|  |  |  |  @using-api   no | 
					
						
							|  |  |  |  @results     HTML (using search portal) | 
					
						
							|  |  |  |  @stable      no (HTML can change) | 
					
						
							|  |  |  |  @parse       url, title, content | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from lxml import html | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  | from searx import logger | 
					
						
							|  |  |  | from searx.url_utils import urlencode | 
					
						
							| 
									
										
										
										
											2015-10-31 15:27:23 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | logger = logger.getChild('yandex engine') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # engine dependent config | 
					
						
							|  |  |  | categories = ['general'] | 
					
						
							|  |  |  | paging = True | 
					
						
							|  |  |  | language_support = True  # TODO | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-31 23:05:07 +01:00
										 |  |  | default_tld = 'com' | 
					
						
							|  |  |  | language_map = {'ru': 'ru', | 
					
						
							| 
									
										
										
										
											2016-10-30 03:04:01 +01:00
										 |  |  |                 'ua': 'ua', | 
					
						
							|  |  |  |                 'be': 'by', | 
					
						
							|  |  |  |                 'kk': 'kz', | 
					
						
							| 
									
										
										
										
											2015-10-31 23:05:07 +01:00
										 |  |  |                 'tr': 'com.tr'} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-10-31 15:27:23 +01:00
										 |  |  | # search-url | 
					
						
							| 
									
										
										
										
											2015-10-31 23:05:07 +01:00
										 |  |  | base_url = 'https://yandex.{tld}/' | 
					
						
							| 
									
										
										
										
											2015-10-31 15:27:23 +01:00
										 |  |  | search_url = 'search/?{query}&p={page}' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-11 04:57:42 +01:00
										 |  |  | results_xpath = '//li[@class="serp-item"]' | 
					
						
							| 
									
										
										
										
											2015-10-31 15:27:23 +01:00
										 |  |  | url_xpath = './/h2/a/@href' | 
					
						
							|  |  |  | title_xpath = './/h2/a//text()' | 
					
						
							| 
									
										
										
										
											2016-12-11 04:57:42 +01:00
										 |  |  | content_xpath = './/div[@class="text-container typo typo_text_m typo_line_m organic__text"]//text()' | 
					
						
							| 
									
										
										
										
											2015-10-31 15:27:23 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2016-08-06 06:34:56 +02:00
										 |  |  |     lang = params['language'].split('-')[0] | 
					
						
							| 
									
										
										
										
											2015-10-31 23:05:07 +01:00
										 |  |  |     host = base_url.format(tld=language_map.get(lang) or default_tld) | 
					
						
							| 
									
										
										
										
											2016-01-18 12:47:31 +01:00
										 |  |  |     params['url'] = host + search_url.format(page=params['pageno'] - 1, | 
					
						
							| 
									
										
										
										
											2015-10-31 23:05:07 +01:00
										 |  |  |                                              query=urlencode({'text': query})) | 
					
						
							| 
									
										
										
										
											2015-10-31 15:27:23 +01:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get response from search-request | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     dom = html.fromstring(resp.text) | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for result in dom.xpath(results_xpath): | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             res = {'url': result.xpath(url_xpath)[0], | 
					
						
							| 
									
										
										
										
											2016-12-09 11:44:24 +01:00
										 |  |  |                    'title': ''.join(result.xpath(title_xpath)), | 
					
						
							|  |  |  |                    'content': ''.join(result.xpath(content_xpath))} | 
					
						
							| 
									
										
										
										
											2015-10-31 15:27:23 +01:00
										 |  |  |         except: | 
					
						
							|  |  |  |             logger.exception('yandex parse crash') | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         results.append(res) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return results |