[mod] yahoo: fetch engine traits (data_type: traits_v1)
Implements a fetch_traits function for the Yahoo engine. .. note:: Includes migration of the request methode from 'supported_languages' to 'traits' (EngineTraits) object! Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									c1ae2ef57c
								
							
						
					
					
						commit
						ef143729a0
					
				| @ -6234,43 +6234,43 @@ | ||||
|     } | ||||
|   }, | ||||
|   "yahoo": { | ||||
|     "all_locale": null, | ||||
|     "data_type": "supported_languages", | ||||
|     "languages": {}, | ||||
|     "all_locale": "any", | ||||
|     "data_type": "traits_v1", | ||||
|     "languages": { | ||||
|       "ar": "ar", | ||||
|       "bg": "bg", | ||||
|       "cs": "cs", | ||||
|       "da": "da", | ||||
|       "de": "de", | ||||
|       "el": "el", | ||||
|       "en": "en", | ||||
|       "es": "es", | ||||
|       "et": "et", | ||||
|       "fi": "fi", | ||||
|       "fr": "fr", | ||||
|       "he": "he", | ||||
|       "hr": "hr", | ||||
|       "hu": "hu", | ||||
|       "it": "it", | ||||
|       "ja": "ja", | ||||
|       "ko": "ko", | ||||
|       "lt": "lt", | ||||
|       "lv": "lv", | ||||
|       "nl": "nl", | ||||
|       "no": "no", | ||||
|       "pl": "pl", | ||||
|       "pt": "pt", | ||||
|       "ro": "ro", | ||||
|       "ru": "ru", | ||||
|       "sk": "sk", | ||||
|       "sl": "sl", | ||||
|       "sv": "sv", | ||||
|       "th": "th", | ||||
|       "tr": "tr", | ||||
|       "zh_Hans": "zh_chs", | ||||
|       "zh_Hant": "zh_cht" | ||||
|     }, | ||||
|     "regions": {}, | ||||
|     "supported_languages": [ | ||||
|       "ar", | ||||
|       "bg", | ||||
|       "cs", | ||||
|       "da", | ||||
|       "de", | ||||
|       "el", | ||||
|       "en", | ||||
|       "es", | ||||
|       "et", | ||||
|       "fi", | ||||
|       "fr", | ||||
|       "he", | ||||
|       "hr", | ||||
|       "hu", | ||||
|       "it", | ||||
|       "ja", | ||||
|       "ko", | ||||
|       "lt", | ||||
|       "lv", | ||||
|       "nl", | ||||
|       "no", | ||||
|       "pl", | ||||
|       "pt", | ||||
|       "ro", | ||||
|       "ru", | ||||
|       "sk", | ||||
|       "sl", | ||||
|       "sv", | ||||
|       "th", | ||||
|       "tr", | ||||
|       "zh_chs", | ||||
|       "zh_cht" | ||||
|     ] | ||||
|     "supported_languages": {} | ||||
|   } | ||||
| } | ||||
|  | ||||
| @ -17,8 +17,10 @@ from searx.utils import ( | ||||
|     eval_xpath_getindex, | ||||
|     eval_xpath_list, | ||||
|     extract_text, | ||||
|     match_language, | ||||
| ) | ||||
| from searx.enginelib.traits import EngineTraits | ||||
| 
 | ||||
| traits: EngineTraits | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
| @ -34,8 +36,7 @@ about = { | ||||
| categories = ['general', 'web'] | ||||
| paging = True | ||||
| time_range_support = True | ||||
| supported_languages_url = 'https://search.yahoo.com/preferences/languages' | ||||
| """Supported languages are read from Yahoo preference page.""" | ||||
| # send_accept_language_header = True | ||||
| 
 | ||||
| time_range_dict = { | ||||
|     'day': ('1d', 'd'), | ||||
| @ -43,15 +44,10 @@ time_range_dict = { | ||||
|     'month': ('1m', 'm'), | ||||
| } | ||||
| 
 | ||||
| language_aliases = { | ||||
|     'zh-HK': 'zh_chs', | ||||
|     'zh-CN': 'zh_chs',  # dead since 2015 / routed to hk.search.yahoo.com | ||||
|     'zh-TW': 'zh_cht', | ||||
| } | ||||
| 
 | ||||
| lang2domain = { | ||||
|     'zh_chs': 'hk.search.yahoo.com', | ||||
|     'zh_cht': 'tw.search.yahoo.com', | ||||
|     'any': 'search.yahoo.com', | ||||
|     'en': 'search.yahoo.com', | ||||
|     'bg': 'search.yahoo.com', | ||||
|     'cs': 'search.yahoo.com', | ||||
| @ -67,21 +63,23 @@ lang2domain = { | ||||
| } | ||||
| """Map language to domain""" | ||||
| 
 | ||||
| 
 | ||||
| def _get_language(params): | ||||
| 
 | ||||
|     lang = language_aliases.get(params['language']) | ||||
|     if lang is None: | ||||
|         lang = match_language(params['language'], supported_languages, language_aliases) | ||||
|     lang = lang.split('-')[0] | ||||
|     logger.debug("params['language']: %s --> %s", params['language'], lang) | ||||
|     return lang | ||||
| locale_aliases = { | ||||
|     'zh': 'zh_Hans', | ||||
|     'zh-HK': 'zh_Hans', | ||||
|     'zh-CN': 'zh_Hans',  # dead since 2015 / routed to hk.search.yahoo.com | ||||
|     'zh-TW': 'zh_Hant', | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     """build request""" | ||||
| 
 | ||||
|     lang = locale_aliases.get(params['language'], None) | ||||
|     if not lang: | ||||
|         lang = params['language'].split('-')[0] | ||||
|     lang = traits.get_language(lang, traits.all_locale) | ||||
| 
 | ||||
|     offset = (params['pageno'] - 1) * 7 + 1 | ||||
|     lang = _get_language(params) | ||||
|     age, btf = time_range_dict.get(params['time_range'], ('', '')) | ||||
| 
 | ||||
|     args = urlencode( | ||||
| @ -154,13 +152,37 @@ def response(resp): | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| # get supported languages from their site | ||||
| def _fetch_supported_languages(resp): | ||||
|     supported_languages = [] | ||||
| def fetch_traits(engine_traits: EngineTraits): | ||||
|     """Fetch languages from yahoo""" | ||||
| 
 | ||||
|     # pylint: disable=import-outside-toplevel | ||||
|     import babel | ||||
|     from searx import network | ||||
|     from searx.locales import language_tag | ||||
| 
 | ||||
|     engine_traits.all_locale = 'any' | ||||
| 
 | ||||
|     resp = network.get('https://search.yahoo.com/preferences/languages') | ||||
|     if not resp.ok: | ||||
|         print("ERROR: response from peertube is not OK.") | ||||
| 
 | ||||
|     dom = html.fromstring(resp.text) | ||||
|     offset = len('lang_') | ||||
| 
 | ||||
|     for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'): | ||||
|         supported_languages.append(val[offset:]) | ||||
|     eng2sxng = {'zh_chs': 'zh_Hans', 'zh_cht': 'zh_Hant'} | ||||
| 
 | ||||
|     return supported_languages | ||||
|     for val in eval_xpath_list(dom, '//div[contains(@class, "lang-item")]/input/@value'): | ||||
|         eng_tag = val[offset:] | ||||
| 
 | ||||
|         try: | ||||
|             sxng_tag = language_tag(babel.Locale.parse(eng2sxng.get(eng_tag, eng_tag))) | ||||
|         except babel.UnknownLocaleError: | ||||
|             print('ERROR: unknown language --> %s' % eng_tag) | ||||
|             continue | ||||
| 
 | ||||
|         conflict = engine_traits.languages.get(sxng_tag) | ||||
|         if conflict: | ||||
|             if conflict != eng_tag: | ||||
|                 print("CONFLICT: babel %s --> %s, %s" % (sxng_tag, conflict, eng_tag)) | ||||
|             continue | ||||
|         engine_traits.languages[sxng_tag] = eng_tag | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user