| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2023-08-03 19:52:52 +02:00
										 |  |  | """YaCy_ is a free distributed search engine, built on the principles of
 | 
					
						
							|  |  |  | peer-to-peer (P2P) networks. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | API: Dev:APIyacysearch_ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Releases: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | - https://github.com/yacy/yacy_search_server/tags | 
					
						
							|  |  |  | - https://download.yacy.net/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | .. _Yacy: https://yacy.net/ | 
					
						
							|  |  |  | .. _Dev:APIyacysearch: https://wiki.yacy.net/index.php/Dev:APIyacysearch | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Configuration | 
					
						
							|  |  |  | ============= | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The engine has the following (additional) settings: | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  | - :py:obj:`http_digest_auth_user` | 
					
						
							|  |  |  | - :py:obj:`http_digest_auth_pass` | 
					
						
							|  |  |  | - :py:obj:`search_mode` | 
					
						
							|  |  |  | - :py:obj:`search_type` | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-09 10:40:55 +02:00
										 |  |  | The :py:obj:`base_url` has to be set in the engine named `yacy` and is used by | 
					
						
							|  |  |  | all yacy engines. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-03 19:52:52 +02:00
										 |  |  | .. code:: yaml | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  |   - name: yacy | 
					
						
							|  |  |  |     engine: yacy | 
					
						
							|  |  |  |     categories: general | 
					
						
							|  |  |  |     search_type: text | 
					
						
							|  |  |  |     shortcut: ya | 
					
						
							| 
									
										
										
										
											2024-05-09 10:40:55 +02:00
										 |  |  |     base_url: | 
					
						
							|  |  |  |       - https://yacy.searchlab.eu | 
					
						
							|  |  |  |       - https://search.lomig.me | 
					
						
							|  |  |  |       - https://yacy.ecosys.eu | 
					
						
							|  |  |  |       - https://search.webproject.link | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |   - name: yacy images | 
					
						
							|  |  |  |     engine: yacy | 
					
						
							|  |  |  |     categories: images | 
					
						
							|  |  |  |     search_type: image | 
					
						
							|  |  |  |     shortcut: yai | 
					
						
							|  |  |  |     disabled: true | 
					
						
							| 
									
										
										
										
											2023-08-03 19:52:52 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Implementations | 
					
						
							|  |  |  | =============== | 
					
						
							| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | """
 | 
					
						
							| 
									
										
										
										
											2023-08-03 19:52:52 +02:00
										 |  |  | # pylint: disable=fixme | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-09 10:40:55 +02:00
										 |  |  | from __future__ import annotations | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import random | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | from json import loads | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from urllib.parse import urlencode | 
					
						
							| 
									
										
										
										
											2023-08-03 19:52:52 +02:00
										 |  |  | from dateutil import parser | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-18 19:59:01 +01:00
										 |  |  | from httpx import DigestAuth | 
					
						
							| 
									
										
										
										
											2020-10-09 15:05:13 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-11 14:05:07 +01:00
										 |  |  | from searx.utils import html_to_text | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | # about | 
					
						
							|  |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://yacy.net/', | 
					
						
							|  |  |  |     "wikidata_id": 'Q1759675', | 
					
						
							|  |  |  |     "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API', | 
					
						
							|  |  |  |     "use_official_api": True, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": 'JSON', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | # engine dependent config | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  | categories = ['general'] | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | paging = True | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  | number_of_results = 10 | 
					
						
							| 
									
										
										
										
											2020-10-09 15:05:13 +02:00
										 |  |  | http_digest_auth_user = "" | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  | """HTTP digest user for the local YACY instance""" | 
					
						
							| 
									
										
										
										
											2020-10-09 15:05:13 +02:00
										 |  |  | http_digest_auth_pass = "" | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  | """HTTP digest password for the local YACY instance""" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-03 19:52:52 +02:00
										 |  |  | search_mode = 'global' | 
					
						
							|  |  |  | """Yacy search mode ``global`` or ``local``.  By default, Yacy operates in ``global``
 | 
					
						
							|  |  |  | mode. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | ``global`` | 
					
						
							|  |  |  |   Peer-to-Peer search | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-03 19:52:52 +02:00
										 |  |  | ``local`` | 
					
						
							|  |  |  |   Privacy or Stealth mode, restricts the search to local yacy instance. | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  | search_type = 'text' | 
					
						
							|  |  |  | """One of ``text``, ``image`` / The search-types ``app``, ``audio`` and
 | 
					
						
							|  |  |  | ``video`` are not yet implemented (Pull-Requests are welcome). | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-09 10:40:55 +02:00
										 |  |  | base_url: list | str = 'https://yacy.searchlab.eu' | 
					
						
							|  |  |  | """The value is an URL or a list of URLs.  In the latter case instance will be
 | 
					
						
							|  |  |  | selected randomly. | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | def init(_): | 
					
						
							|  |  |  |     valid_types = [ | 
					
						
							|  |  |  |         'text', | 
					
						
							|  |  |  |         'image', | 
					
						
							|  |  |  |         # 'app', 'audio', 'video', | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  |     if search_type not in valid_types: | 
					
						
							|  |  |  |         raise ValueError('search_type "%s" is  not one of %s' % (search_type, valid_types)) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-09 10:40:55 +02:00
										 |  |  | def _base_url() -> str: | 
					
						
							|  |  |  |     from searx.engines import engines  # pylint: disable=import-outside-toplevel | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     url = engines['yacy'].base_url  # type: ignore | 
					
						
							|  |  |  |     if isinstance(url, list): | 
					
						
							|  |  |  |         url = random.choice(url) | 
					
						
							|  |  |  |     return url | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2024-05-09 10:40:55 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     offset = (params['pageno'] - 1) * number_of_results | 
					
						
							| 
									
										
										
										
											2024-05-09 10:40:55 +02:00
										 |  |  |     args = { | 
					
						
							|  |  |  |         'query': query, | 
					
						
							|  |  |  |         'startRecord': offset, | 
					
						
							|  |  |  |         'maximumRecords': number_of_results, | 
					
						
							|  |  |  |         'contentdom': search_type, | 
					
						
							|  |  |  |         'resource': search_mode, | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-09 10:40:55 +02:00
										 |  |  |     # add language tag if specified | 
					
						
							|  |  |  |     if params['language'] != 'all': | 
					
						
							|  |  |  |         args['lr'] = 'lang_' + params['language'].split('-')[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     params["url"] = f"{_base_url()}/yacysearch.json?{urlencode(args)}" | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 15:05:13 +02:00
										 |  |  |     if http_digest_auth_user and http_digest_auth_pass: | 
					
						
							| 
									
										
										
										
											2021-03-18 19:59:01 +01:00
										 |  |  |         params['auth'] = DigestAuth(http_digest_auth_user, http_digest_auth_pass) | 
					
						
							| 
									
										
										
										
											2020-10-09 15:05:13 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | def response(resp): | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |     raw_search_results = loads(resp.text) | 
					
						
							| 
									
										
										
										
											2014-01-05 00:46:42 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     # return empty array if there are no results | 
					
						
							| 
									
										
										
										
											2014-02-11 13:13:51 +01:00
										 |  |  |     if not raw_search_results: | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-09 16:55:01 +01:00
										 |  |  |     search_results = raw_search_results.get('channels', []) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-09 16:55:01 +01:00
										 |  |  |     if len(search_results) == 0: | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for result in search_results[0].get('items', []): | 
					
						
							| 
									
										
										
										
											2015-02-01 11:48:15 +01:00
										 |  |  |         # parse image results | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  |         if search_type == 'image': | 
					
						
							| 
									
										
										
										
											2018-01-06 14:52:14 +01:00
										 |  |  |             result_url = '' | 
					
						
							|  |  |  |             if 'url' in result: | 
					
						
							|  |  |  |                 result_url = result['url'] | 
					
						
							|  |  |  |             elif 'link' in result: | 
					
						
							|  |  |  |                 result_url = result['link'] | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-01 11:48:15 +01:00
										 |  |  |             # append result | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |             results.append( | 
					
						
							|  |  |  |                 { | 
					
						
							|  |  |  |                     'url': result_url, | 
					
						
							|  |  |  |                     'title': result['title'], | 
					
						
							|  |  |  |                     'content': '', | 
					
						
							|  |  |  |                     'img_src': result['image'], | 
					
						
							|  |  |  |                     'template': 'images.html', | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2015-02-01 11:48:15 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |         # parse general results | 
					
						
							| 
									
										
										
										
											2015-02-01 11:48:15 +01:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  |             publishedDate = None | 
					
						
							|  |  |  |             if 'pubDate' in result: | 
					
						
							|  |  |  |                 publishedDate = parser.parse(result['pubDate']) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |             # append result | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |             results.append( | 
					
						
							|  |  |  |                 { | 
					
						
							| 
									
										
										
										
											2023-10-08 19:27:48 +02:00
										 |  |  |                     'url': result['link'] or '', | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |                     'title': result['title'], | 
					
						
							|  |  |  |                     'content': html_to_text(result['description']), | 
					
						
							|  |  |  |                     'publishedDate': publishedDate, | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-09 16:55:01 +01:00
										 |  |  |         # TODO parse video, audio and file results | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results |