Merge branch 'master' into conditional-sigusr1
This commit is contained in:
		
						commit
						4a27dabcf7
					
				
							
								
								
									
										1
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								Makefile
									
									
									
									
									
								
							| @ -192,6 +192,7 @@ PYLINT_FILES=\ | ||||
| 	searx/engines/google_images.py \
 | ||||
| 	searx/engines/mediathekviewweb.py \
 | ||||
| 	searx/engines/solidtorrents.py \
 | ||||
| 	searx/engines/solr.py \
 | ||||
| 	searx/engines/google_scholar.py \
 | ||||
| 	searx/engines/yahoo_news.py \
 | ||||
| 	searx/engines/apkmirror.py \
 | ||||
|  | ||||
| @ -1,74 +0,0 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Acgsou (Japanese Animation/Music/Comics Bittorrent tracker) | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.acgsou.com/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'images', 'videos', 'music'] | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://www.acgsou.com/' | ||||
| search_url = base_url + 'search.php?{query}&page={offset}' | ||||
| # xpath queries | ||||
| xpath_results = '//table[contains(@class, "list_style table_fixed")]//tr[not(th)]' | ||||
| xpath_category = './/td[2]/a[1]' | ||||
| xpath_title = './/td[3]/a[last()]' | ||||
| xpath_torrent_links = './/td[3]/a' | ||||
| xpath_filesize = './/td[4]/text()' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     query = urlencode({'keyword': query}) | ||||
|     params['url'] = search_url.format(query=query, offset=params['pageno']) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     results = [] | ||||
|     dom = html.fromstring(resp.text) | ||||
|     for result in eval_xpath_list(dom, xpath_results): | ||||
|         # defaults | ||||
|         filesize = 0 | ||||
|         magnet_link = "magnet:?xt=urn:btih:{}&tr=https://tracker.acgsou.com:2710/announce" | ||||
| 
 | ||||
|         category = extract_text(eval_xpath_getindex(result, xpath_category, 0, default=[])) | ||||
|         page_a = eval_xpath_getindex(result, xpath_title, 0) | ||||
|         title = extract_text(page_a) | ||||
|         href = base_url + page_a.attrib.get('href') | ||||
| 
 | ||||
|         magnet_link = magnet_link.format(page_a.attrib.get('href')[5:-5]) | ||||
| 
 | ||||
|         filesize_info = eval_xpath_getindex(result, xpath_filesize, 0, default=None) | ||||
|         if filesize_info: | ||||
|             try: | ||||
|                 filesize = filesize_info[:-2] | ||||
|                 filesize_multiplier = filesize_info[-2:] | ||||
|                 filesize = get_torrent_size(filesize, filesize_multiplier) | ||||
|             except: | ||||
|                 pass | ||||
|         # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime | ||||
|         content = 'Category: "{category}".' | ||||
|         content = content.format(category=category) | ||||
| 
 | ||||
|         results.append({'url': href, | ||||
|                         'title': title, | ||||
|                         'content': content, | ||||
|                         'filesize': filesize, | ||||
|                         'magnetlink': magnet_link, | ||||
|                         'template': 'torrent.html'}) | ||||
|     return results | ||||
| @ -3,10 +3,7 @@ | ||||
|  Microsoft Academic (Science) | ||||
| """ | ||||
| 
 | ||||
| from datetime import datetime | ||||
| from json import loads | ||||
| from uuid import uuid4 | ||||
| from urllib.parse import urlencode | ||||
| from json import dumps, loads | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| # about | ||||
| @ -21,26 +18,25 @@ about = { | ||||
| 
 | ||||
| categories = ['images'] | ||||
| paging = True | ||||
| result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}' | ||||
| search_url = 'https://academic.microsoft.com/api/search' | ||||
| _paper_url = 'https://academic.microsoft.com/paper/{id}/reference' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     correlation_id = uuid4() | ||||
|     msacademic = uuid4() | ||||
|     time_now = datetime.now() | ||||
| 
 | ||||
|     params['url'] = result_url.format(query=urlencode({'correlationId': correlation_id})) | ||||
|     params['cookies']['msacademic'] = str(msacademic) | ||||
|     params['cookies']['ai_user'] = 'vhd0H|{now}'.format(now=str(time_now)) | ||||
|     params['url'] = search_url | ||||
|     params['method'] = 'POST' | ||||
|     params['data'] = { | ||||
|         'Query': '@{query}@'.format(query=query), | ||||
|         'Limit': 10, | ||||
|         'Offset': params['pageno'] - 1, | ||||
|         'Filters': '', | ||||
|         'OrderBy': '', | ||||
|         'SortAscending': False, | ||||
|     } | ||||
|     params['headers']['content-type'] = 'application/json; charset=utf-8' | ||||
|     params['data'] = dumps({ | ||||
|         'query': query, | ||||
|         'queryExpression': '', | ||||
|         'filters': [], | ||||
|         'orderBy': 0, | ||||
|         'skip': (params['pageno'] - 1) * 10, | ||||
|         'sortAscending': True, | ||||
|         'take': 10, | ||||
|         'includeCitationContexts': False, | ||||
|         'profileId': '', | ||||
|     }) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| @ -51,10 +47,13 @@ def response(resp): | ||||
|     if not response_data: | ||||
|         return results | ||||
| 
 | ||||
|     for result in response_data['results']: | ||||
|         url = _get_url(result) | ||||
|         title = result['e']['dn'] | ||||
|         content = _get_content(result) | ||||
|     for result in response_data['pr']: | ||||
|         if 'dn' not in result['paper']: | ||||
|             continue | ||||
| 
 | ||||
|         title = result['paper']['dn'] | ||||
|         content = _get_content(result['paper']) | ||||
|         url = _paper_url.format(id=result['paper']['id']) | ||||
|         results.append({ | ||||
|             'url': url, | ||||
|             'title': html_to_text(title), | ||||
| @ -64,15 +63,9 @@ def response(resp): | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def _get_url(result): | ||||
|     if 's' in result['e']: | ||||
|         return result['e']['s'][0]['u'] | ||||
|     return 'https://academic.microsoft.com/#/detail/{pid}'.format(pid=result['id']) | ||||
| 
 | ||||
| 
 | ||||
| def _get_content(result): | ||||
|     if 'd' in result['e']: | ||||
|         content = result['e']['d'] | ||||
|     if 'd' in result: | ||||
|         content = result['d'] | ||||
|         if len(content) > 300: | ||||
|             return content[:300] + '...' | ||||
|         return content | ||||
|  | ||||
							
								
								
									
										74
									
								
								searx/engines/solr.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								searx/engines/solr.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,74 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Solr | ||||
| """ | ||||
| 
 | ||||
| # pylint: disable=global-statement, missing-function-docstring | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| from searx.exceptions import SearxEngineAPIException | ||||
| 
 | ||||
| 
 | ||||
| base_url = 'http://localhost:8983' | ||||
| collection = '' | ||||
| rows = 10 | ||||
| sort = '' # sorting: asc or desc | ||||
| field_list = 'name' # list of field names to display on the UI | ||||
| default_fields = '' # default field to query | ||||
| query_fields = '' # query fields | ||||
| _search_url = '' | ||||
| paging = True | ||||
| 
 | ||||
| 
 | ||||
| def init(_): | ||||
|     if collection == '': | ||||
|         raise ValueError('collection cannot be empty') | ||||
| 
 | ||||
|     global _search_url | ||||
|     _search_url = base_url + '/solr/' + collection + '/select?{params}' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     query_params = {'q': query, 'rows': rows} | ||||
|     if field_list != '': | ||||
|         query_params['fl'] = field_list | ||||
|     if query_fields != '': | ||||
|         query_params['qf'] = query_fields | ||||
|     if default_fields != '': | ||||
|         query_params['df'] = default_fields | ||||
|     if sort != '': | ||||
|         query_params['sort'] = sort | ||||
| 
 | ||||
|     if 'pageno' in params: | ||||
|         query_params['start'] = rows * (params['pageno'] - 1) | ||||
| 
 | ||||
|     params['url'] = _search_url.format(params=urlencode(query_params)) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     resp_json = __get_response(resp) | ||||
| 
 | ||||
|     results = [] | ||||
|     for result in resp_json['response']['docs']: | ||||
|         r = {key: str(value) for key, value in result.items()} | ||||
|         if len(r) == 0: | ||||
|             continue | ||||
|         r['template'] = 'key-value.html' | ||||
|         results.append(r) | ||||
| 
 | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def __get_response(resp): | ||||
|     try: | ||||
|         resp_json = loads(resp.text) | ||||
|     except Exception as e: | ||||
|         raise SearxEngineAPIException("failed to parse response") from e | ||||
| 
 | ||||
|     if 'error' in resp_json: | ||||
|         raise SearxEngineAPIException(resp_json['error']['msg']) | ||||
| 
 | ||||
|     return resp_json | ||||
| @ -22,13 +22,6 @@ from searx.utils import ( | ||||
| 
 | ||||
| from searx.engines.yahoo import parse_url | ||||
| 
 | ||||
| # pylint: disable=unused-import | ||||
| from searx.engines.yahoo import ( | ||||
|     _fetch_supported_languages, | ||||
|     supported_languages_url, | ||||
| ) | ||||
| # pylint: enable=unused-import | ||||
| 
 | ||||
| logger = logger.getChild('yahoo_news engine') | ||||
| 
 | ||||
| # about | ||||
|  | ||||
| @ -4,7 +4,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from functools import reduce | ||||
| from json import loads | ||||
| from json import loads, dumps | ||||
| from urllib.parse import quote_plus | ||||
| 
 | ||||
| # about | ||||
| @ -20,12 +20,15 @@ about = { | ||||
| # engine dependent config | ||||
| categories = ['videos', 'music'] | ||||
| paging = True | ||||
| language_support = False | ||||
| time_range_support = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://www.youtube.com/results' | ||||
| search_url = base_url + '?search_query={query}&page={page}' | ||||
| time_range_url = '&sp=EgII{time_range}%253D%253D' | ||||
| # the key seems to be constant | ||||
| next_page_url = 'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' | ||||
| time_range_dict = {'day': 'Ag', | ||||
|                    'week': 'Aw', | ||||
|                    'month': 'BA', | ||||
| @ -40,21 +43,73 @@ base_youtube_url = 'https://www.youtube.com/watch?v=' | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     params['url'] = search_url.format(query=quote_plus(query), | ||||
|                                       page=params['pageno']) | ||||
|     if not params['engine_data'].get('next_page_token'): | ||||
|         params['url'] = search_url.format(query=quote_plus(query), page=params['pageno']) | ||||
|         if params['time_range'] in time_range_dict: | ||||
|             params['url'] += time_range_url.format(time_range=time_range_dict[params['time_range']]) | ||||
|     else: | ||||
|         print(params['engine_data']['next_page_token']) | ||||
|         params['url'] = next_page_url | ||||
|         params['method'] = 'POST' | ||||
|         params['data'] = dumps({ | ||||
|             'context': {"client": {"clientName": "WEB", "clientVersion": "2.20210310.12.01"}}, | ||||
|             'continuation': params['engine_data']['next_page_token'], | ||||
|         }) | ||||
|         params['headers']['Content-Type'] = 'application/json' | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     if resp.search_params.get('engine_data'): | ||||
|         return parse_next_page_response(resp.text) | ||||
|     return parse_first_page_response(resp.text) | ||||
| 
 | ||||
| 
 | ||||
| def parse_next_page_response(response_text): | ||||
|     results = [] | ||||
|     result_json = loads(response_text) | ||||
|     with open("/tmp/x", "w") as f: | ||||
|         f.write(response_text) | ||||
|     for section in (result_json['onResponseReceivedCommands'][0] | ||||
|                     .get('appendContinuationItemsAction')['continuationItems'][0] | ||||
|                     .get('itemSectionRenderer')['contents']): | ||||
|         if 'videoRenderer' not in section: | ||||
|             continue | ||||
|         section = section['videoRenderer'] | ||||
|         content = "-" | ||||
|         if 'descriptionSnippet' in section: | ||||
|             content = ' '.join(x['text'] for x in section['descriptionSnippet']['runs']) | ||||
|         results.append({ | ||||
|             'url': base_youtube_url + section['videoId'], | ||||
|             'title': ' '.join(x['text'] for x in section['title']['runs']), | ||||
|             'content': content, | ||||
|             'author': section['ownerText']['runs'][0]['text'], | ||||
|             'length': section['lengthText']['simpleText'], | ||||
|             'template': 'videos.html', | ||||
|             'embedded': embedded_url.format(videoid=section['videoId']), | ||||
|             'thumbnail': section['thumbnail']['thumbnails'][-1]['url'], | ||||
|         }) | ||||
|     try: | ||||
|         token = result_json['onResponseReceivedCommands'][0]\ | ||||
|             .get('appendContinuationItemsAction')['continuationItems'][1]\ | ||||
|             .get('continuationItemRenderer')['continuationEndpoint']\ | ||||
|             .get('continuationCommand')['token'] | ||||
|         results.append({ | ||||
|             "engine_data": token, | ||||
|             "key": "next_page_token", | ||||
|         }) | ||||
|     except: | ||||
|         pass | ||||
| 
 | ||||
|     results_data = resp.text[resp.text.find('ytInitialData'):] | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def parse_first_page_response(response_text): | ||||
|     results = [] | ||||
|     results_data = response_text[response_text.find('ytInitialData'):] | ||||
|     results_data = results_data[results_data.find('{'):results_data.find(';</script>')] | ||||
| 
 | ||||
|     results_json = loads(results_data) if results_data else {} | ||||
|     sections = results_json.get('contents', {})\ | ||||
|                            .get('twoColumnSearchResultsRenderer', {})\ | ||||
| @ -63,6 +118,16 @@ def response(resp): | ||||
|                            .get('contents', []) | ||||
| 
 | ||||
|     for section in sections: | ||||
|         if "continuationItemRenderer" in section: | ||||
|             next_page_token = section["continuationItemRenderer"]\ | ||||
|                 .get("continuationEndpoint", {})\ | ||||
|                 .get("continuationCommand", {})\ | ||||
|                 .get("token", "") | ||||
|             if next_page_token: | ||||
|                 results.append({ | ||||
|                     "engine_data": next_page_token, | ||||
|                     "key": "next_page_token", | ||||
|                 }) | ||||
|         for video_container in section.get('itemSectionRenderer', {}).get('contents', []): | ||||
|             video = video_container.get('videoRenderer', {}) | ||||
|             videoid = video.get('videoId') | ||||
|  | ||||
| @ -82,6 +82,8 @@ outgoing: # communication with search engines | ||||
| #        https: | ||||
| #            - http://proxy1:8080 | ||||
| #            - http://proxy2:8080 | ||||
| #    using_tor_proxy : True | ||||
| #    extra_proxy_timeout : 10.0 # Extra seconds to add in order to account for the time taken by the proxy | ||||
| # uncomment below section only if you have more than one network interface | ||||
| # which can be the source of outgoing search requests | ||||
| #    source_ips: | ||||
| @ -159,6 +161,7 @@ engines: | ||||
|   - name : ahmia | ||||
|     engine : ahmia | ||||
|     categories : onions | ||||
|     enable_http : True | ||||
|     shortcut : ah | ||||
| 
 | ||||
|   - name : arch linux wiki | ||||
| @ -730,6 +733,8 @@ engines: | ||||
| # Requires Tor | ||||
|   - name : not evil | ||||
|     engine : not_evil | ||||
|     categories : onions | ||||
|     enable_http : True | ||||
|     shortcut : ne | ||||
| 
 | ||||
|   - name : nyaa | ||||
| @ -737,12 +742,6 @@ engines: | ||||
|     shortcut : nt | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : acgsou | ||||
|     engine : acgsou | ||||
|     shortcut : acg | ||||
|     disabled : True | ||||
|     timeout: 5.0 | ||||
| 
 | ||||
|   - name : openairedatasets | ||||
|     engine : json_engine | ||||
|     paging : True | ||||
| @ -943,6 +942,17 @@ engines: | ||||
| #    api_client_id : ******* | ||||
| #    api_client_secret : ******* | ||||
| 
 | ||||
| #  - name : solr | ||||
| #    engine : solr | ||||
| #    shortcut : slr | ||||
| #    base_url : http://localhost:8983 | ||||
| #    collection : collection_name | ||||
| #    sort : '' # sorting: asc or desc | ||||
| #    field_list : '' # comma separated list of field names to display on the UI | ||||
| #    default_fields : '' # default field to query | ||||
| #    query_fields : '' # query fields | ||||
| #    enable_http : True | ||||
| 
 | ||||
|   - name : startpage | ||||
|     engine : startpage | ||||
|     shortcut : sp | ||||
| @ -979,6 +989,7 @@ engines: | ||||
|     title_xpath : ./td[2]/b | ||||
|     content_xpath : ./td[2]/small | ||||
|     categories : onions | ||||
|     enable_http : True | ||||
|     shortcut : tch | ||||
| 
 | ||||
| # maybe in a fun category | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user