| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  | # lint: pylint | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | """
 | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  | Arch Linux Wiki | 
					
						
							|  |  |  | ~~~~~~~~~~~~~~~ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | This implementation does not use a official API: Mediawiki provides API, but | 
					
						
							|  |  |  | Arch Wiki blocks access to it. | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  | from typing import TYPE_CHECKING | 
					
						
							|  |  |  | from urllib.parse import urlencode, urljoin, urlparse | 
					
						
							|  |  |  | import lxml | 
					
						
							|  |  |  | import babel | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from searx import network | 
					
						
							| 
									
										
										
										
											2020-11-26 15:49:33 +01:00
										 |  |  | from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  | from searx.enginelib.traits import EngineTraits | 
					
						
							|  |  |  | from searx.locales import language_tag | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | if TYPE_CHECKING: | 
					
						
							|  |  |  |     import logging | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     logger: logging.Logger | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | traits: EngineTraits | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://wiki.archlinux.org/', | 
					
						
							|  |  |  |     "wikidata_id": 'Q101445877', | 
					
						
							|  |  |  |     "official_api_documentation": None, | 
					
						
							|  |  |  |     "use_official_api": False, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": 'HTML', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | # engine dependent config | 
					
						
							| 
									
										
										
										
											2021-12-22 16:58:52 +01:00
										 |  |  | categories = ['it', 'software wikis'] | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | paging = True | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  | main_wiki = 'wiki.archlinux.org' | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  |     sxng_lang = params['searxng_locale'].split('-')[0] | 
					
						
							|  |  |  |     netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) | 
					
						
							|  |  |  |     title = traits.custom['title'].get(sxng_lang, 'Special:Search') | 
					
						
							|  |  |  |     base_url = 'https://' + netloc + '/index.php?' | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  |     offset = (params['pageno'] - 1) * 20 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  |     if netloc == main_wiki: | 
					
						
							|  |  |  |         eng_lang: str = traits.get_language(sxng_lang, 'English') | 
					
						
							|  |  |  |         query += ' (' + eng_lang + ')' | 
					
						
							|  |  |  |     elif netloc == 'wiki.archlinuxcn.org': | 
					
						
							|  |  |  |         base_url = 'https://' + netloc + '/wzh/index.php?' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     args = { | 
					
						
							|  |  |  |         'search': query, | 
					
						
							|  |  |  |         'title': title, | 
					
						
							|  |  |  |         'limit': 20, | 
					
						
							|  |  |  |         'offset': offset, | 
					
						
							|  |  |  |         'profile': 'default', | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  |     params['url'] = base_url + urlencode(args) | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     results = [] | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  |     dom = lxml.html.fromstring(resp.text) | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  |     # get the base URL for the language in which request was made | 
					
						
							|  |  |  |     sxng_lang = resp.search_params['searxng_locale'].split('-')[0] | 
					
						
							|  |  |  |     netloc = traits.custom['wiki_netloc'].get(sxng_lang, main_wiki) | 
					
						
							|  |  |  |     base_url = 'https://' + netloc + '/index.php?' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for result in eval_xpath_list(dom, '//ul[@class="mw-search-results"]/li'): | 
					
						
							|  |  |  |         link = eval_xpath_getindex(result, './/div[@class="mw-search-result-heading"]/a', 0) | 
					
						
							|  |  |  |         content = extract_text(result.xpath('.//div[@class="searchresult"]')) | 
					
						
							|  |  |  |         results.append( | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 'url': urljoin(base_url, link.get('href')), | 
					
						
							|  |  |  |                 'title': extract_text(link), | 
					
						
							|  |  |  |                 'content': content, | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  |     return results | 
					
						
							| 
									
										
										
										
											2016-03-23 20:57:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-01-07 16:04:19 +01:00
										 |  |  | def fetch_traits(engine_traits: EngineTraits): | 
					
						
							|  |  |  |     """Fetch languages from Archlinix-Wiki.  The location of the Wiki address of a
 | 
					
						
							|  |  |  |     language is mapped in a :py:obj:`custom field | 
					
						
							|  |  |  |     <searx.enginelib.traits.EngineTraits.custom>` (``wiki_netloc``).  Depending | 
					
						
							|  |  |  |     on the location, the ``title`` argument in the request is translated. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     .. code:: python | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |        "custom": { | 
					
						
							|  |  |  |          "wiki_netloc": { | 
					
						
							|  |  |  |            "de": "wiki.archlinux.de", | 
					
						
							|  |  |  |             # ... | 
					
						
							|  |  |  |            "zh": "wiki.archlinuxcn.org" | 
					
						
							|  |  |  |          } | 
					
						
							|  |  |  |          "title": { | 
					
						
							|  |  |  |            "de": "Spezial:Suche", | 
					
						
							|  |  |  |             # ... | 
					
						
							|  |  |  |            "zh": "Special:\u641c\u7d22" | 
					
						
							|  |  |  |          }, | 
					
						
							|  |  |  |        }, | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     engine_traits.custom['wiki_netloc'] = {} | 
					
						
							|  |  |  |     engine_traits.custom['title'] = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     title_map = { | 
					
						
							|  |  |  |         'de': 'Spezial:Suche', | 
					
						
							|  |  |  |         'fa': 'ویژه:جستجو', | 
					
						
							|  |  |  |         'ja': '特別:検索', | 
					
						
							|  |  |  |         'zh': 'Special:搜索', | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     resp = network.get('https://wiki.archlinux.org/') | 
					
						
							|  |  |  |     if not resp.ok: | 
					
						
							|  |  |  |         print("ERROR: response from wiki.archlinix.org is not OK.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dom = lxml.html.fromstring(resp.text) | 
					
						
							|  |  |  |     for a in eval_xpath_list(dom, "//a[@class='interlanguage-link-target']"): | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         sxng_tag = language_tag(babel.Locale.parse(a.get('lang'), sep='-')) | 
					
						
							|  |  |  |         # zh_Hans --> zh | 
					
						
							|  |  |  |         sxng_tag = sxng_tag.split('_')[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         netloc = urlparse(a.get('href')).netloc | 
					
						
							|  |  |  |         if netloc != 'wiki.archlinux.org': | 
					
						
							|  |  |  |             title = title_map.get(sxng_tag) | 
					
						
							|  |  |  |             if not title: | 
					
						
							|  |  |  |                 print("ERROR: title tag from %s (%s) is unknown" % (netloc, sxng_tag)) | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             engine_traits.custom['wiki_netloc'][sxng_tag] = netloc | 
					
						
							|  |  |  |             engine_traits.custom['title'][sxng_tag] = title | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         eng_tag = extract_text(eval_xpath_list(a, ".//span")) | 
					
						
							|  |  |  |         engine_traits.languages[sxng_tag] = eng_tag | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     engine_traits.languages['en'] = 'English' |