[fix] update_engine_traits.py: annas archive, bing-* and zlibrary engines
Github action Update data - update_engine_traits [1] had issues in annas
archive, bing-* and zlibrary engines:
    ./manage pyenv.cmd python ./searxng_extra/update/update_engine_traits.py
[1] https://github.com/searxng/searxng/actions/runs/12530827768/job/34953392587
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
			
			
This commit is contained in:
		
							parent
							
								
									9e32cd2047
								
							
						
					
					
						commit
						e52e3f15ca
					
				| @ -169,7 +169,7 @@ def fetch_traits(engine_traits: EngineTraits): | |||||||
|     lang_map = {} |     lang_map = {} | ||||||
|     for x in eval_xpath_list(dom, "//form//input[@name='lang']"): |     for x in eval_xpath_list(dom, "//form//input[@name='lang']"): | ||||||
|         eng_lang = x.get("value") |         eng_lang = x.get("value") | ||||||
|         if eng_lang in ('', '_empty', 'nl-BE', 'und'): |         if eng_lang in ('', '_empty', 'nl-BE', 'und') or eng_lang.startswith('anti__'): | ||||||
|             continue |             continue | ||||||
|         try: |         try: | ||||||
|             locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-') |             locale = babel.Locale.parse(lang_map.get(eng_lang, eng_lang), sep='-') | ||||||
| @ -186,10 +186,12 @@ def fetch_traits(engine_traits: EngineTraits): | |||||||
|         engine_traits.languages[sxng_lang] = eng_lang |         engine_traits.languages[sxng_lang] = eng_lang | ||||||
| 
 | 
 | ||||||
|     for x in eval_xpath_list(dom, "//form//input[@name='content']"): |     for x in eval_xpath_list(dom, "//form//input[@name='content']"): | ||||||
|         engine_traits.custom['content'].append(x.get("value")) |         if not x.get("value").startswith("anti__"): | ||||||
|  |             engine_traits.custom['content'].append(x.get("value")) | ||||||
| 
 | 
 | ||||||
|     for x in eval_xpath_list(dom, "//form//input[@name='ext']"): |     for x in eval_xpath_list(dom, "//form//input[@name='ext']"): | ||||||
|         engine_traits.custom['ext'].append(x.get("value")) |         if not x.get("value").startswith("anti__"): | ||||||
|  |             engine_traits.custom['ext'].append(x.get("value")) | ||||||
| 
 | 
 | ||||||
|     for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"): |     for x in eval_xpath_list(dom, "//form//select[@name='sort']//option"): | ||||||
|         engine_traits.custom['sort'].append(x.get("value")) |         engine_traits.custom['sort'].append(x.get("value")) | ||||||
|  | |||||||
| @ -192,8 +192,21 @@ def fetch_traits(engine_traits: EngineTraits): | |||||||
|     # pylint: disable=import-outside-toplevel |     # pylint: disable=import-outside-toplevel | ||||||
| 
 | 
 | ||||||
|     from searx.network import get  # see https://github.com/searxng/searxng/issues/762 |     from searx.network import get  # see https://github.com/searxng/searxng/issues/762 | ||||||
|  |     from searx.utils import gen_useragent | ||||||
| 
 | 
 | ||||||
|     resp = get("https://www.bing.com/account/general") |     headers = { | ||||||
|  |         "User-Agent": gen_useragent(), | ||||||
|  |         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", | ||||||
|  |         "Accept-Language": "en-US;q=0.5,en;q=0.3", | ||||||
|  |         "Accept-Encoding": "gzip, deflate, br", | ||||||
|  |         "DNT": "1", | ||||||
|  |         "Connection": "keep-alive", | ||||||
|  |         "Upgrade-Insecure-Requests": "1", | ||||||
|  |         "Sec-GPC": "1", | ||||||
|  |         "Cache-Control": "max-age=0", | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     resp = get("https://www.bing.com/account/general", headers=headers) | ||||||
|     if not resp.ok:  # type: ignore |     if not resp.ok:  # type: ignore | ||||||
|         print("ERROR: response from bing is not OK.") |         print("ERROR: response from bing is not OK.") | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -183,17 +183,27 @@ def fetch_traits(engine_traits: EngineTraits) -> None: | |||||||
|     from searx.network import get  # see https://github.com/searxng/searxng/issues/762 |     from searx.network import get  # see https://github.com/searxng/searxng/issues/762 | ||||||
|     from searx.locales import language_tag |     from searx.locales import language_tag | ||||||
| 
 | 
 | ||||||
|     resp = get(base_url, verify=False) |     def _use_old_values(): | ||||||
|  |         # don't change anything, re-use the existing values | ||||||
|  |         engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"] | ||||||
|  |         engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"] | ||||||
|  |         engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"] | ||||||
|  | 
 | ||||||
|  |     try: | ||||||
|  |         resp = get(base_url, verify=False) | ||||||
|  |     except SearxException as exc: | ||||||
|  |         print(f"ERROR: zlibrary domain '{base_url}' is seized?") | ||||||
|  |         print(f"  --> {exc}") | ||||||
|  |         _use_old_values() | ||||||
|  |         return | ||||||
|  | 
 | ||||||
|     if not resp.ok:  # type: ignore |     if not resp.ok:  # type: ignore | ||||||
|         raise RuntimeError("Response from zlibrary's search page is not OK.") |         raise RuntimeError("Response from zlibrary's search page is not OK.") | ||||||
|     dom = html.fromstring(resp.text)  # type: ignore |     dom = html.fromstring(resp.text)  # type: ignore | ||||||
| 
 | 
 | ||||||
|     if domain_is_seized(dom): |     if domain_is_seized(dom): | ||||||
|         print(f"ERROR: zlibrary domain is seized: {base_url}") |         print(f"ERROR: zlibrary domain is seized: {base_url}") | ||||||
|         # don't change anything, re-use the existing values |         _use_old_values() | ||||||
|         engine_traits.all_locale = ENGINE_TRAITS["z-library"]["all_locale"] |  | ||||||
|         engine_traits.custom = ENGINE_TRAITS["z-library"]["custom"] |  | ||||||
|         engine_traits.languages = ENGINE_TRAITS["z-library"]["languages"] |  | ||||||
|         return |         return | ||||||
| 
 | 
 | ||||||
|     engine_traits.all_locale = "" |     engine_traits.all_locale = "" | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user