| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  |  Yacy (Web, Images, Videos, Music, Files) | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | from json import loads | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | from dateutil import parser | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from urllib.parse import urlencode | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 15:05:13 +02:00
										 |  |  | from requests.auth import HTTPDigestAuth | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-12-11 14:05:07 +01:00
										 |  |  | from searx.utils import html_to_text | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-01-13 11:31:25 +01:00
										 |  |  | # about | 
					
						
							|  |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://yacy.net/', | 
					
						
							|  |  |  |     "wikidata_id": 'Q1759675', | 
					
						
							|  |  |  |     "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API', | 
					
						
							|  |  |  |     "use_official_api": True, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": 'JSON', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | # engine dependent config | 
					
						
							| 
									
										
										
										
											2014-12-07 16:37:56 +01:00
										 |  |  | categories = ['general', 'images']  # TODO , 'music', 'videos', 'files' | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | paging = True | 
					
						
							|  |  |  | number_of_results = 5 | 
					
						
							| 
									
										
										
										
											2020-10-09 15:05:13 +02:00
										 |  |  | http_digest_auth_user = "" | 
					
						
							|  |  |  | http_digest_auth_pass = "" | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | # search-url | 
					
						
							|  |  |  | base_url = 'http://localhost:8090' | 
					
						
							| 
									
										
										
										
											2014-12-16 17:10:20 +01:00
										 |  |  | search_url = '/yacysearch.json?{query}'\ | 
					
						
							| 
									
										
										
										
											2015-02-09 16:55:01 +01:00
										 |  |  |              '&startRecord={offset}'\ | 
					
						
							|  |  |  |              '&maximumRecords={limit}'\ | 
					
						
							|  |  |  |              '&contentdom={search_type}'\ | 
					
						
							|  |  |  |              '&resource=global' | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | # yacy specific type-definitions | 
					
						
							|  |  |  | search_types = {'general': 'text', | 
					
						
							|  |  |  |                 'images': 'image', | 
					
						
							| 
									
										
										
										
											2014-12-07 16:37:56 +01:00
										 |  |  |                 'files': 'app', | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |                 'music': 'audio', | 
					
						
							|  |  |  |                 'videos': 'video'} | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | # do search-request | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     offset = (params['pageno'] - 1) * number_of_results | 
					
						
							| 
									
										
										
										
											2015-02-09 16:55:01 +01:00
										 |  |  |     search_type = search_types.get(params.get('category'), '0') | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-16 17:10:20 +01:00
										 |  |  |     params['url'] = base_url +\ | 
					
						
							|  |  |  |         search_url.format(query=urlencode({'query': query}), | 
					
						
							|  |  |  |                           offset=offset, | 
					
						
							|  |  |  |                           limit=number_of_results, | 
					
						
							|  |  |  |                           search_type=search_type) | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-09 15:05:13 +02:00
										 |  |  |     if http_digest_auth_user and http_digest_auth_pass: | 
					
						
							|  |  |  |         params['auth'] = HTTPDigestAuth(http_digest_auth_user, http_digest_auth_pass) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-06 15:27:46 +01:00
										 |  |  |     # add language tag if specified | 
					
						
							|  |  |  |     if params['language'] != 'all': | 
					
						
							|  |  |  |         params['url'] += '&lr=lang_' + params['language'].split('-')[0] | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | # get response from search-request | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | def response(resp): | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |     raw_search_results = loads(resp.text) | 
					
						
							| 
									
										
										
										
											2014-01-05 00:46:42 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     # return empty array if there are no results | 
					
						
							| 
									
										
										
										
											2014-02-11 13:13:51 +01:00
										 |  |  |     if not raw_search_results: | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-09 16:55:01 +01:00
										 |  |  |     search_results = raw_search_results.get('channels', []) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-09 16:55:01 +01:00
										 |  |  |     if len(search_results) == 0: | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for result in search_results[0].get('items', []): | 
					
						
							| 
									
										
										
										
											2015-02-01 11:48:15 +01:00
										 |  |  |         # parse image results | 
					
						
							| 
									
										
										
										
											2020-10-09 15:05:13 +02:00
										 |  |  |         if resp.search_params.get('category') == 'images': | 
					
						
							| 
									
										
										
										
											2018-01-06 14:52:14 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |             result_url = '' | 
					
						
							|  |  |  |             if 'url' in result: | 
					
						
							|  |  |  |                 result_url = result['url'] | 
					
						
							|  |  |  |             elif 'link' in result: | 
					
						
							|  |  |  |                 result_url = result['link'] | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-01 11:48:15 +01:00
										 |  |  |             # append result | 
					
						
							| 
									
										
										
										
											2018-01-06 14:52:14 +01:00
										 |  |  |             results.append({'url': result_url, | 
					
						
							| 
									
										
										
										
											2015-02-01 11:48:15 +01:00
										 |  |  |                             'title': result['title'], | 
					
						
							|  |  |  |                             'content': '', | 
					
						
							|  |  |  |                             'img_src': result['image'], | 
					
						
							|  |  |  |                             'template': 'images.html'}) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |         # parse general results | 
					
						
							| 
									
										
										
										
											2015-02-01 11:48:15 +01:00
										 |  |  |         else: | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |             publishedDate = parser.parse(result['pubDate']) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |             # append result | 
					
						
							|  |  |  |             results.append({'url': result['link'], | 
					
						
							| 
									
										
										
										
											2014-12-16 17:10:20 +01:00
										 |  |  |                             'title': result['title'], | 
					
						
							| 
									
										
										
										
											2016-12-11 14:05:07 +01:00
										 |  |  |                             'content': html_to_text(result['description']), | 
					
						
							| 
									
										
										
										
											2014-12-16 17:10:20 +01:00
										 |  |  |                             'publishedDate': publishedDate}) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-09 16:55:01 +01:00
										 |  |  |         # TODO parse video, audio and file results | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results |