commit
						2370234d09
					
				
							
								
								
									
										5
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								Makefile
									
									
									
									
									
								
							| @ -213,10 +213,6 @@ gecko.driver: | ||||
| PHONY += test test.sh test.pylint test.pep8 test.unit test.coverage test.robot | ||||
| test: buildenv test.pylint test.pep8 test.unit gecko.driver test.robot | ||||
| 
 | ||||
| ifeq ($(PY),2) | ||||
| test.pylint: | ||||
| 	@echo "LINT      skip liniting py2" | ||||
| else | ||||
| # TODO: balance linting with pylint
 | ||||
| 
 | ||||
| test.pylint: pyenvinstall | ||||
| @ -225,7 +221,6 @@ test.pylint: pyenvinstall | ||||
| 		searx/testing.py \
 | ||||
| 		searx/engines/gigablast.py \
 | ||||
| 	) | ||||
| endif | ||||
| 
 | ||||
| # ignored rules:
 | ||||
| #  E402 module level import not at top of file
 | ||||
|  | ||||
| @ -39,7 +39,7 @@ install_geckodriver() { | ||||
|         return | ||||
|     fi | ||||
|     GECKODRIVER_VERSION="v0.24.0" | ||||
|     PLATFORM="`python -c "import six; import platform; six.print_(platform.system().lower(), platform.architecture()[0])"`" | ||||
|     PLATFORM="`python3 -c "import platform; print(platform.system().lower(), platform.architecture()[0])"`" | ||||
|     case "$PLATFORM" in | ||||
|         "linux 32bit" | "linux2 32bit") ARCH="linux32";; | ||||
|         "linux 64bit" | "linux2 64bit") ARCH="linux64";; | ||||
| @ -136,7 +136,7 @@ docker_build() { | ||||
|     # Check consistency between the git tag and the searx/version.py file | ||||
|     # /!\ HACK : parse Python file with bash /!\ | ||||
|     # otherwise it is not possible build the docker image without all Python dependencies ( version.py loads __init__.py ) | ||||
|     # SEARX_PYTHON_VERSION=$(python -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)") | ||||
|     # SEARX_PYTHON_VERSION=$(python3 -c "import six; import searx.version; six.print_(searx.version.VERSION_STRING)") | ||||
|     SEARX_PYTHON_VERSION=$(cat searx/version.py | grep "\(VERSION_MAJOR\|VERSION_MINOR\|VERSION_BUILD\) =" | cut -d\= -f2 | sed -e 's/^[[:space:]]*//' | paste -sd "." -) | ||||
|     if [ $(echo "$SEARX_GIT_VERSION" | cut -d- -f1) != "$SEARX_PYTHON_VERSION" ]; then | ||||
| 	echo "Inconsistency between the last git tag and the searx/version.py file" | ||||
|  | ||||
| @ -21,12 +21,8 @@ from os import environ | ||||
| from os.path import realpath, dirname, join, abspath, isfile | ||||
| from io import open | ||||
| from ssl import OPENSSL_VERSION_INFO, OPENSSL_VERSION | ||||
| try: | ||||
|     from yaml import safe_load | ||||
| except: | ||||
|     from sys import exit, stderr | ||||
|     stderr.write('[E] install pyyaml\n') | ||||
|     exit(2) | ||||
| from yaml import safe_load | ||||
| 
 | ||||
| 
 | ||||
| searx_dir = abspath(dirname(__file__)) | ||||
| engine_dir = dirname(realpath(__file__)) | ||||
|  | ||||
| @ -1,12 +1,8 @@ | ||||
| from os import listdir | ||||
| from os.path import realpath, dirname, join, isdir | ||||
| from sys import version_info | ||||
| from searx.utils import load_module | ||||
| from collections import defaultdict | ||||
| 
 | ||||
| if version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| 
 | ||||
| answerers_dir = dirname(realpath(__file__)) | ||||
| 
 | ||||
| @ -36,10 +32,10 @@ def ask(query): | ||||
|     results = [] | ||||
|     query_parts = list(filter(None, query.query.split())) | ||||
| 
 | ||||
|     if query_parts[0].decode('utf-8') not in answerers_by_keywords: | ||||
|     if query_parts[0] not in answerers_by_keywords: | ||||
|         return results | ||||
| 
 | ||||
|     for answerer in answerers_by_keywords[query_parts[0].decode('utf-8')]: | ||||
|     for answerer in answerers_by_keywords[query_parts[0]]: | ||||
|         result = answerer(query) | ||||
|         if result: | ||||
|             results.append(result) | ||||
|  | ||||
| @ -1,7 +1,6 @@ | ||||
| import hashlib | ||||
| import random | ||||
| import string | ||||
| import sys | ||||
| import uuid | ||||
| from flask_babel import gettext | ||||
| 
 | ||||
| @ -10,12 +9,7 @@ from flask_babel import gettext | ||||
| keywords = ('random',) | ||||
| 
 | ||||
| random_int_max = 2**31 | ||||
| 
 | ||||
| if sys.version_info[0] == 2: | ||||
|     random_string_letters = string.lowercase + string.digits + string.uppercase | ||||
| else: | ||||
|     unicode = str | ||||
|     random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase | ||||
| random_string_letters = string.ascii_lowercase + string.digits + string.ascii_uppercase | ||||
| 
 | ||||
| 
 | ||||
| def random_characters(): | ||||
| @ -24,32 +18,32 @@ def random_characters(): | ||||
| 
 | ||||
| 
 | ||||
| def random_string(): | ||||
|     return u''.join(random_characters()) | ||||
|     return ''.join(random_characters()) | ||||
| 
 | ||||
| 
 | ||||
| def random_float(): | ||||
|     return unicode(random.random()) | ||||
|     return str(random.random()) | ||||
| 
 | ||||
| 
 | ||||
| def random_int(): | ||||
|     return unicode(random.randint(-random_int_max, random_int_max)) | ||||
|     return str(random.randint(-random_int_max, random_int_max)) | ||||
| 
 | ||||
| 
 | ||||
| def random_sha256(): | ||||
|     m = hashlib.sha256() | ||||
|     m.update(''.join(random_characters()).encode()) | ||||
|     return unicode(m.hexdigest()) | ||||
|     return str(m.hexdigest()) | ||||
| 
 | ||||
| 
 | ||||
| def random_uuid(): | ||||
|     return unicode(uuid.uuid4()) | ||||
|     return str(uuid.uuid4()) | ||||
| 
 | ||||
| 
 | ||||
| random_types = {b'string': random_string, | ||||
|                 b'int': random_int, | ||||
|                 b'float': random_float, | ||||
|                 b'sha256': random_sha256, | ||||
|                 b'uuid': random_uuid} | ||||
| random_types = {'string': random_string, | ||||
|                 'int': random_int, | ||||
|                 'float': random_float, | ||||
|                 'sha256': random_sha256, | ||||
|                 'uuid': random_uuid} | ||||
| 
 | ||||
| 
 | ||||
| # required answerer function | ||||
| @ -70,4 +64,4 @@ def answer(query): | ||||
| def self_info(): | ||||
|     return {'name': gettext('Random value generator'), | ||||
|             'description': gettext('Generate different random values'), | ||||
|             'examples': [u'random {}'.format(x.decode('utf-8')) for x in random_types]} | ||||
|             'examples': ['random {}'.format(x) for x in random_types]} | ||||
|  | ||||
| @ -1,11 +1,8 @@ | ||||
| from sys import version_info | ||||
| from functools import reduce | ||||
| from operator import mul | ||||
| 
 | ||||
| from flask_babel import gettext | ||||
| 
 | ||||
| if version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| keywords = ('min', | ||||
|             'max', | ||||
| @ -30,21 +27,21 @@ def answer(query): | ||||
|     func = parts[0] | ||||
|     answer = None | ||||
| 
 | ||||
|     if func == b'min': | ||||
|     if func == 'min': | ||||
|         answer = min(args) | ||||
|     elif func == b'max': | ||||
|     elif func == 'max': | ||||
|         answer = max(args) | ||||
|     elif func == b'avg': | ||||
|     elif func == 'avg': | ||||
|         answer = sum(args) / len(args) | ||||
|     elif func == b'sum': | ||||
|     elif func == 'sum': | ||||
|         answer = sum(args) | ||||
|     elif func == b'prod': | ||||
|     elif func == 'prod': | ||||
|         answer = reduce(mul, args, 1) | ||||
| 
 | ||||
|     if answer is None: | ||||
|         return [] | ||||
| 
 | ||||
|     return [{'answer': unicode(answer)}] | ||||
|     return [{'answer': str(answer)}] | ||||
| 
 | ||||
| 
 | ||||
| # required answerer function | ||||
|  | ||||
| @ -16,19 +16,16 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| ''' | ||||
| 
 | ||||
| 
 | ||||
| import sys | ||||
| from lxml import etree | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| from searx import settings | ||||
| from searx.languages import language_codes | ||||
| from searx.engines import ( | ||||
|     categories, engines, engine_shortcuts | ||||
| ) | ||||
| from searx.poolrequests import get as http_get | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| if sys.version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| 
 | ||||
| def get(*args, **kwargs): | ||||
| @ -85,22 +82,22 @@ def searx_bang(full_query): | ||||
|             engine_query = full_query.getSearchQuery()[1:] | ||||
| 
 | ||||
|             for lc in language_codes: | ||||
|                 lang_id, lang_name, country, english_name = map(unicode.lower, lc) | ||||
|                 lang_id, lang_name, country, english_name = map(str.lower, lc) | ||||
| 
 | ||||
|                 # check if query starts with language-id | ||||
|                 if lang_id.startswith(engine_query): | ||||
|                     if len(engine_query) <= 2: | ||||
|                         results.append(u':{lang_id}'.format(lang_id=lang_id.split('-')[0])) | ||||
|                         results.append(':{lang_id}'.format(lang_id=lang_id.split('-')[0])) | ||||
|                     else: | ||||
|                         results.append(u':{lang_id}'.format(lang_id=lang_id)) | ||||
|                         results.append(':{lang_id}'.format(lang_id=lang_id)) | ||||
| 
 | ||||
|                 # check if query starts with language name | ||||
|                 if lang_name.startswith(engine_query) or english_name.startswith(engine_query): | ||||
|                     results.append(u':{lang_name}'.format(lang_name=lang_name)) | ||||
|                     results.append(':{lang_name}'.format(lang_name=lang_name)) | ||||
| 
 | ||||
|                 # check if query starts with country | ||||
|                 if country.startswith(engine_query.replace('_', ' ')): | ||||
|                     results.append(u':{country}'.format(country=country.replace(' ', '_'))) | ||||
|                     results.append(':{country}'.format(country=country.replace(' ', '_'))) | ||||
| 
 | ||||
|     # remove duplicates | ||||
|     result_set = set(results) | ||||
|  | ||||
| @ -1,7 +1,8 @@ | ||||
| from urllib.parse import quote, urljoin | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.utils import get_torrent_size | ||||
| from searx.url_utils import quote, urljoin | ||||
| 
 | ||||
| 
 | ||||
| url = 'https://1337x.to/' | ||||
| search_url = url + 'search/{search_term}/{pageno}/' | ||||
|  | ||||
| @ -9,9 +9,9 @@ | ||||
|  @parse        url, title, content, seed, leech, torrentfile | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import get_torrent_size, int_or_zero | ||||
| 
 | ||||
| # engine dependent config | ||||
| @ -63,7 +63,7 @@ def response(resp): | ||||
|         except: | ||||
|             pass | ||||
|         # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime | ||||
|         content = u'Category: "{category}".' | ||||
|         content = 'Category: "{category}".' | ||||
|         content = content.format(category=category) | ||||
| 
 | ||||
|         results.append({'url': href, | ||||
|  | ||||
| @ -9,9 +9,10 @@ | ||||
|  @parse       url, title, thumbnail_src | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
|  | ||||
| @ -11,9 +11,9 @@ | ||||
|  @parse        url, title | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urljoin | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode, urljoin | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| @ -105,7 +105,7 @@ def request(query, params): | ||||
|     # if our language is hosted on the main site, we need to add its name | ||||
|     # to the query in order to narrow the results to that language | ||||
|     if language in main_langs: | ||||
|         query += b' (' + main_langs[language] + b')' | ||||
|         query += ' (' + main_langs[language] + ')' | ||||
| 
 | ||||
|     # prepare the request parameters | ||||
|     query = urlencode({'search': query}) | ||||
|  | ||||
| @ -11,9 +11,9 @@ | ||||
|  More info on api: https://arxiv.org/help/api/user-manual | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from datetime import datetime | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| 
 | ||||
| categories = ['science'] | ||||
| @ -30,7 +30,7 @@ def request(query, params): | ||||
|     # basic search | ||||
|     offset = (params['pageno'] - 1) * number_of_results | ||||
| 
 | ||||
|     string_args = dict(query=query.decode('utf-8'), | ||||
|     string_args = dict(query=query, | ||||
|                        offset=offset, | ||||
|                        number_of_results=number_of_results) | ||||
| 
 | ||||
|  | ||||
| @ -13,10 +13,10 @@ | ||||
|  More info on api: http://base-search.net/about/download/base_interface.pdf | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import etree | ||||
| from datetime import datetime | ||||
| import re | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import searx_useragent | ||||
| 
 | ||||
| 
 | ||||
| @ -55,7 +55,7 @@ shorcut_dict = { | ||||
| def request(query, params): | ||||
|     # replace shortcuts with API advanced search keywords | ||||
|     for key in shorcut_dict.keys(): | ||||
|         query = re.sub(key, shorcut_dict[key], str(query)) | ||||
|         query = re.sub(key, shorcut_dict[key], query) | ||||
| 
 | ||||
|     # basic search | ||||
|     offset = (params['pageno'] - 1) * number_of_results | ||||
|  | ||||
| @ -14,10 +14,10 @@ | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx import logger, utils | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import match_language, gen_useragent, eval_xpath | ||||
| 
 | ||||
| logger = logger.getChild('bing engine') | ||||
| @ -47,7 +47,7 @@ def request(query, params): | ||||
|     else: | ||||
|         lang = match_language(params['language'], supported_languages, language_aliases) | ||||
| 
 | ||||
|     query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8') | ||||
|     query = 'language:{} {}'.format(lang.split('-')[0].upper(), query) | ||||
| 
 | ||||
|     search_path = search_string.format( | ||||
|         query=urlencode({'q': query}), | ||||
|  | ||||
| @ -12,10 +12,10 @@ | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from json import loads | ||||
| import re | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import match_language | ||||
| 
 | ||||
| from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases | ||||
| @ -91,7 +91,7 @@ def response(resp): | ||||
| 
 | ||||
|             # strip 'Unicode private use area' highlighting, they render to Tux | ||||
|             # the Linux penguin and a standing diamond on my machine... | ||||
|             title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '') | ||||
|             title = m.get('t', '').replace('\ue000', '').replace('\ue001', '') | ||||
|             results.append({'template': 'images.html', | ||||
|                             'url': m['purl'], | ||||
|                             'thumbnail_src': m['turl'], | ||||
|  | ||||
| @ -13,10 +13,9 @@ | ||||
| 
 | ||||
| from datetime import datetime | ||||
| from dateutil import parser | ||||
| from urllib.parse import urlencode, urlparse, parse_qsl | ||||
| from lxml import etree | ||||
| from searx.utils import list_get, match_language | ||||
| from searx.url_utils import urlencode, urlparse, parse_qsl | ||||
| 
 | ||||
| from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -12,7 +12,7 @@ | ||||
| 
 | ||||
| from json import loads | ||||
| from lxml import html | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import match_language | ||||
| 
 | ||||
| from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases | ||||
|  | ||||
| @ -12,8 +12,8 @@ | ||||
| 
 | ||||
| from lxml import html | ||||
| from operator import itemgetter | ||||
| from urllib.parse import quote, urljoin | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import quote, urljoin | ||||
| from searx.utils import get_torrent_size | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -1,26 +1,23 @@ | ||||
| import json | ||||
| import re | ||||
| import os | ||||
| import sys | ||||
| import unicodedata | ||||
| 
 | ||||
| from io import open | ||||
| from datetime import datetime | ||||
| 
 | ||||
| if sys.version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| categories = [] | ||||
| url = 'https://duckduckgo.com/js/spice/currency/1/{0}/{1}' | ||||
| weight = 100 | ||||
| 
 | ||||
| parser_re = re.compile(b'.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) | ||||
| parser_re = re.compile('.*?(\\d+(?:\\.\\d+)?) ([^.0-9]+) (?:in|to) ([^.0-9]+)', re.I) | ||||
| 
 | ||||
| db = 1 | ||||
| 
 | ||||
| 
 | ||||
| def normalize_name(name): | ||||
|     name = name.decode('utf-8').lower().replace('-', ' ').rstrip('s') | ||||
|     name = name.lower().replace('-', ' ').rstrip('s') | ||||
|     name = re.sub(' +', ' ', name) | ||||
|     return unicodedata.normalize('NFKD', name).lower() | ||||
| 
 | ||||
|  | ||||
| @ -14,7 +14,7 @@ | ||||
| 
 | ||||
| from json import loads | ||||
| from datetime import datetime | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import match_language, html_to_text | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -11,7 +11,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['music'] | ||||
| @ -50,7 +50,7 @@ def response(resp): | ||||
|             if url.startswith('http://'): | ||||
|                 url = 'https' + url[4:] | ||||
| 
 | ||||
|             content = u'{} - {} - {}'.format( | ||||
|             content = '{} - {} - {}'.format( | ||||
|                 result['artist']['name'], | ||||
|                 result['album']['title'], | ||||
|                 result['title']) | ||||
|  | ||||
| @ -14,8 +14,9 @@ | ||||
| 
 | ||||
| from lxml import html | ||||
| import re | ||||
| from urllib.parse import urlencode | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['images'] | ||||
|  | ||||
| @ -10,15 +10,15 @@ | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from urllib.parse import urljoin | ||||
| from lxml import html | ||||
| from searx.utils import is_valid_lang, eval_xpath | ||||
| from searx.url_utils import urljoin | ||||
| 
 | ||||
| categories = ['general'] | ||||
| url = u'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' | ||||
| url = 'https://dictzone.com/{from_lang}-{to_lang}-dictionary/{query}' | ||||
| weight = 100 | ||||
| 
 | ||||
| parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) | ||||
| parser_re = re.compile('.*?([a-z]+)-([a-z]+) ([^ ]+)$', re.I) | ||||
| results_xpath = './/table[@id="r"]/tr' | ||||
| 
 | ||||
| 
 | ||||
| @ -37,7 +37,7 @@ def request(query, params): | ||||
| 
 | ||||
|     params['url'] = url.format(from_lang=from_lang[2], | ||||
|                                to_lang=to_lang[2], | ||||
|                                query=query.decode('utf-8')) | ||||
|                                query=query) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
|  | ||||
| @ -10,14 +10,11 @@ | ||||
|  @parse       url, title, content, magnetlink | ||||
| """ | ||||
| 
 | ||||
| from sys import version_info | ||||
| from urllib.parse import urljoin | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.utils import get_torrent_size | ||||
| from searx.url_utils import urljoin | ||||
| 
 | ||||
| if version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| categories = ['videos', 'music', 'files'] | ||||
| paging = True | ||||
|  | ||||
| @ -14,8 +14,8 @@ import random | ||||
| import string | ||||
| from dateutil import parser | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.url_utils import urlencode | ||||
| from datetime import datetime | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -9,10 +9,10 @@ | ||||
| # @stable      yes | ||||
| # @parse       (general)    url, title, content | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml.html import fromstring | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.utils import eval_xpath | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general']  # TODO , 'images', 'music', 'videos', 'files' | ||||
|  | ||||
| @ -15,9 +15,9 @@ | ||||
| 
 | ||||
| from lxml.html import fromstring | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.poolrequests import get | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import match_language, eval_xpath | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -10,11 +10,11 @@ DuckDuckGo (definitions) | ||||
| """ | ||||
| 
 | ||||
| import json | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from re import compile | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import html_to_text, match_language | ||||
| 
 | ||||
| url = 'https://api.duckduckgo.com/'\ | ||||
|  | ||||
| @ -14,13 +14,13 @@ | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.engines.duckduckgo import ( | ||||
|     _fetch_supported_languages, supported_languages_url, | ||||
|     get_region_code, language_aliases | ||||
| ) | ||||
| from searx.poolrequests import get | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['images'] | ||||
|  | ||||
| @ -10,9 +10,9 @@ | ||||
| 
 | ||||
| from lxml import html, etree | ||||
| import re | ||||
| from urllib.parse import quote, urljoin | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.utils import eval_xpath | ||||
| from searx.url_utils import quote, urljoin | ||||
| from searx import logger | ||||
| 
 | ||||
| categories = ['general'] | ||||
|  | ||||
| @ -10,8 +10,8 @@ | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from urllib.parse import quote | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import quote | ||||
| from searx.utils import eval_xpath | ||||
| 
 | ||||
| categories = ['general'] | ||||
|  | ||||
| @ -9,9 +9,9 @@ | ||||
|  @parse        url, title, content | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files'] | ||||
|  | ||||
| @ -1,9 +1,6 @@ | ||||
| from searx.url_utils import urlencode | ||||
| from html.parser import HTMLParser | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| try: | ||||
|     from HTMLParser import HTMLParser | ||||
| except: | ||||
|     from html.parser import HTMLParser | ||||
| 
 | ||||
| url = 'http://www.filecrop.com/' | ||||
| search_url = url + '/search.php?{query}&size_i=0&size_f=100000000&engine_r=1&engine_d=1&engine_e=1&engine_4=1&engine_m=1&pos={index}'  # noqa | ||||
|  | ||||
| @ -14,7 +14,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| categories = ['images'] | ||||
| 
 | ||||
|  | ||||
| @ -15,8 +15,8 @@ | ||||
| from json import loads | ||||
| from time import time | ||||
| import re | ||||
| from urllib.parse import urlencode | ||||
| from searx.engines import logger | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import ecma_unescape, html_to_text | ||||
| 
 | ||||
| logger = logger.getChild('flickr-noapi') | ||||
| @ -117,10 +117,10 @@ def response(resp): | ||||
|             'img_format': img_format, | ||||
|             'template': 'images.html' | ||||
|         } | ||||
|         result['author'] = author.encode('utf-8', 'ignore').decode('utf-8') | ||||
|         result['source'] = source.encode('utf-8', 'ignore').decode('utf-8') | ||||
|         result['title'] = title.encode('utf-8', 'ignore').decode('utf-8') | ||||
|         result['content'] = content.encode('utf-8', 'ignore').decode('utf-8') | ||||
|         result['author'] = author.encode(errors='ignore').decode() | ||||
|         result['source'] = source.encode(errors='ignore').decode() | ||||
|         result['title'] = title.encode(errors='ignore').decode() | ||||
|         result['content'] = content.encode(errors='ignore').decode() | ||||
|         results.append(result) | ||||
| 
 | ||||
|     return results | ||||
|  | ||||
| @ -10,13 +10,10 @@ | ||||
|  @parse       url, title, content, thumbnail, img_src | ||||
| """ | ||||
| 
 | ||||
| try: | ||||
|     from cgi import escape | ||||
| except: | ||||
|     from html import escape | ||||
| from html import escape | ||||
| from urllib.parse import urljoin, urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urljoin, urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
|  | ||||
| @ -10,7 +10,7 @@ Frinkiac (Images) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| categories = ['images'] | ||||
| 
 | ||||
|  | ||||
| @ -11,7 +11,7 @@ Genius | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| from datetime import datetime | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -11,9 +11,9 @@ | ||||
|  @parse        url, title | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urljoin | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode, urljoin | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| @ -90,7 +90,7 @@ def request(query, params): | ||||
|     # if our language is hosted on the main site, we need to add its name | ||||
|     # to the query in order to narrow the results to that language | ||||
|     if language in main_langs: | ||||
|         query += b' (' + (main_langs[language]).encode('utf-8') + b')' | ||||
|         query += ' (' + main_langs[language] + ')' | ||||
| 
 | ||||
|     # prepare the request parameters | ||||
|     query = urlencode({'search': query}) | ||||
|  | ||||
| @ -14,8 +14,8 @@ | ||||
| 
 | ||||
| import re | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| # from searx import logger | ||||
| from searx.url_utils import urlencode | ||||
| from searx.poolrequests import get | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -11,7 +11,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
|  | ||||
| @ -18,11 +18,11 @@ Definitions`_. | ||||
| 
 | ||||
| # pylint: disable=invalid-name, missing-function-docstring | ||||
| 
 | ||||
| from urllib.parse import urlencode, urlparse | ||||
| from lxml import html | ||||
| from flask_babel import gettext | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx import logger | ||||
| from searx.url_utils import urlencode, urlparse | ||||
| from searx.utils import match_language, eval_xpath | ||||
| 
 | ||||
| logger = logger.getChild('google engine') | ||||
|  | ||||
| @ -24,11 +24,10 @@ Definitions`_. | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| import urllib | ||||
| from urllib.parse import urlencode, urlparse, unquote | ||||
| from lxml import html | ||||
| from flask_babel import gettext | ||||
| from searx import logger | ||||
| from searx.url_utils import urlencode, urlparse | ||||
| from searx.utils import eval_xpath | ||||
| from searx.engines.xpath import extract_text | ||||
| 
 | ||||
| @ -87,7 +86,7 @@ def scrap_img_by_id(script, data_id): | ||||
|         if 'gstatic.com/images' in line and data_id in line: | ||||
|             url_line = _script[i + 1] | ||||
|             img_url = url_line.split('"')[1] | ||||
|             img_url = urllib.parse.unquote(img_url.replace(r'\u00', r'%')) | ||||
|             img_url = unquote(img_url.replace(r'\u00', r'%')) | ||||
|     return img_url | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -10,9 +10,9 @@ | ||||
|  @parse       url, title, content, publishedDate | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.google import _fetch_supported_languages, supported_languages_url | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import match_language | ||||
| 
 | ||||
| # search-url | ||||
|  | ||||
| @ -12,9 +12,9 @@ | ||||
| 
 | ||||
| from datetime import date, timedelta | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| import re | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -12,15 +12,12 @@ | ||||
| # @todo        embedded (needs some md5 from video page) | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from dateutil import parser | ||||
| from html.parser import HTMLParser | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| try: | ||||
|     from HTMLParser import HTMLParser | ||||
| except: | ||||
|     from html.parser import HTMLParser | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos'] | ||||
|  | ||||
| @ -8,7 +8,7 @@ | ||||
| # @stable      yes | ||||
| # @parse       url, title, content, publishedDate, thumbnail, embedded, author, length | ||||
| 
 | ||||
| from searx.url_utils import quote_plus | ||||
| from urllib.parse import quote_plus | ||||
| from dateutil import parser | ||||
| import time | ||||
| 
 | ||||
|  | ||||
| @ -1,11 +1,8 @@ | ||||
| from collections import Iterable | ||||
| from json import loads | ||||
| from sys import version_info | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import to_string | ||||
| 
 | ||||
| if version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| search_url = None | ||||
| url_query = None | ||||
| @ -37,8 +34,6 @@ def iterate(iterable): | ||||
| def is_iterable(obj): | ||||
|     if type(obj) == str: | ||||
|         return False | ||||
|     if type(obj) == unicode: | ||||
|         return False | ||||
|     return isinstance(obj, Iterable) | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -12,9 +12,9 @@ | ||||
| 
 | ||||
| from lxml import html | ||||
| from operator import itemgetter | ||||
| from urllib.parse import quote, urljoin | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.utils import get_torrent_size, convert_str_to_int | ||||
| from searx.url_utils import quote, urljoin | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos', 'music', 'files'] | ||||
|  | ||||
| @ -14,7 +14,7 @@ | ||||
| 
 | ||||
| from json import loads | ||||
| from string import Formatter | ||||
| from searx.url_utils import urlencode, quote | ||||
| from urllib.parse import urlencode, quote | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| @ -79,7 +79,7 @@ def response(resp): | ||||
|         if result.get('snippet', '').startswith('#REDIRECT'): | ||||
|             continue | ||||
|         url = base_url.format(language=resp.search_params['language']) +\ | ||||
|             'wiki/' + quote(result['title'].replace(' ', '_').encode('utf-8')) | ||||
|             'wiki/' + quote(result['title'].replace(' ', '_').encode()) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': url, | ||||
|  | ||||
| @ -12,8 +12,7 @@ Microsoft Academic (Science) | ||||
| from datetime import datetime | ||||
| from json import loads | ||||
| from uuid import uuid4 | ||||
| 
 | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| categories = ['images'] | ||||
|  | ||||
| @ -12,7 +12,7 @@ | ||||
| 
 | ||||
| from json import loads | ||||
| from dateutil import parser | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['music'] | ||||
|  | ||||
| @ -10,8 +10,8 @@ | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from urllib.parse import urlencode | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import get_torrent_size, int_or_zero | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -30,8 +30,8 @@ route_re = re.compile('(?:from )?(.+) to (.+)') | ||||
| # do search-request | ||||
| def request(query, params): | ||||
| 
 | ||||
|     params['url'] = base_url + search_string.format(query=query.decode('utf-8')) | ||||
|     params['route'] = route_re.match(query.decode('utf-8')) | ||||
|     params['url'] = base_url + search_string.format(query=query) | ||||
|     params['route'] = route_re.match(query) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| @ -52,7 +52,7 @@ def response(resp): | ||||
|         if 'display_name' not in r: | ||||
|             continue | ||||
| 
 | ||||
|         title = r['display_name'] or u'' | ||||
|         title = r['display_name'] or '' | ||||
|         osm_type = r.get('osm_type', r.get('type')) | ||||
|         url = result_base_url.format(osm_type=osm_type, | ||||
|                                      osm_id=r['osm_id']) | ||||
| @ -64,7 +64,7 @@ def response(resp): | ||||
| 
 | ||||
|         # if no geojson is found and osm_type is a node, add geojson Point | ||||
|         if not geojson and osm_type == 'node': | ||||
|             geojson = {u'type': u'Point', u'coordinates': [r['lon'], r['lat']]} | ||||
|             geojson = {'type': 'Point', 'coordinates': [r['lon'], r['lat']]} | ||||
| 
 | ||||
|         address_raw = r.get('address') | ||||
|         address = {} | ||||
|  | ||||
| @ -14,7 +14,7 @@ | ||||
| 
 | ||||
| from json import loads | ||||
| from datetime import datetime | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -11,8 +11,8 @@ | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import searx_useragent | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['map'] | ||||
|  | ||||
| @ -11,7 +11,9 @@ | ||||
| from json import loads | ||||
| from datetime import datetime | ||||
| from operator import itemgetter | ||||
| from searx.url_utils import quote | ||||
| 
 | ||||
| from urllib.parse import quote, urljoin | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.utils import get_torrent_size | ||||
| 
 | ||||
| # engine dependent config | ||||
| @ -62,8 +64,8 @@ def response(resp): | ||||
|     # parse results | ||||
|     for result in search_res: | ||||
|         link = url + "description.php?id=" + result["id"] | ||||
|         magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + \ | ||||
|             "&dn=" + result["name"] + "&tr=" + "&tr=".join(trackers) | ||||
|         magnetlink = "magnet:?xt=urn:btih:" + result["info_hash"] + "&dn=" + result["name"]\ | ||||
|                      + "&tr=" + "&tr=".join(trackers) | ||||
| 
 | ||||
|         params = { | ||||
|             "url": link, | ||||
|  | ||||
| @ -14,7 +14,7 @@ | ||||
| from flask_babel import gettext | ||||
| from lxml import etree | ||||
| from datetime import datetime | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| from searx.poolrequests import get | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -12,9 +12,9 @@ | ||||
| 
 | ||||
| from datetime import datetime | ||||
| from json import loads | ||||
| from searx.utils import html_to_text | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import match_language | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import html_to_text, match_language | ||||
| 
 | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = None | ||||
|  | ||||
| @ -12,7 +12,7 @@ | ||||
| 
 | ||||
| import json | ||||
| from datetime import datetime | ||||
| from searx.url_utils import urlencode, urljoin, urlparse | ||||
| from urllib.parse import urlencode, urljoin, urlparse | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general', 'images', 'news', 'social media'] | ||||
|  | ||||
| @ -11,7 +11,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from json import loads, dumps | ||||
| from searx.utils import html_to_text | ||||
| from urllib.parse import html_to_text | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['science'] | ||||
| @ -29,7 +29,7 @@ def request(query, params): | ||||
|     params['url'] = search_url | ||||
|     params['method'] = 'POST' | ||||
|     params['headers']['Content-type'] = "application/json" | ||||
|     params['data'] = dumps({"query": query.decode('utf-8'), | ||||
|     params['data'] = dumps({"query": query, | ||||
|                             "searchField": "ALL", | ||||
|                             "sortDirection": "ASC", | ||||
|                             "sortOrder": "RELEVANCY", | ||||
|  | ||||
| @ -11,7 +11,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -11,7 +11,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
|  | ||||
| @ -11,7 +11,7 @@ | ||||
| from lxml import html | ||||
| from json import loads | ||||
| from operator import itemgetter | ||||
| from searx.url_utils import quote, urljoin | ||||
| from urllib.parse import quote, urljoin | ||||
| from searx.engines.xpath import extract_text | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -14,14 +14,11 @@ import re | ||||
| from json import loads | ||||
| from lxml import html | ||||
| from dateutil import parser | ||||
| from io import StringIO | ||||
| from urllib.parse import quote_plus, urlencode | ||||
| from searx import logger | ||||
| from searx.poolrequests import get as http_get | ||||
| from searx.url_utils import quote_plus, urlencode | ||||
| 
 | ||||
| try: | ||||
|     from cStringIO import StringIO | ||||
| except: | ||||
|     from io import StringIO | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['music'] | ||||
| @ -61,7 +58,7 @@ def get_client_id(): | ||||
|             # gets app_js and searches for the clientid | ||||
|             response = http_get(app_js_url) | ||||
|             if response.ok: | ||||
|                 cids = cid_re.search(response.content.decode("utf-8")) | ||||
|                 cids = cid_re.search(response.content.decode()) | ||||
|                 if cids is not None and len(cids.groups()): | ||||
|                     return cids.groups()[0] | ||||
|     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") | ||||
|  | ||||
| @ -11,7 +11,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| import requests | ||||
| import base64 | ||||
| 
 | ||||
| @ -39,8 +39,8 @@ def request(query, params): | ||||
|         'https://accounts.spotify.com/api/token', | ||||
|         data={'grant_type': 'client_credentials'}, | ||||
|         headers={'Authorization': 'Basic ' + base64.b64encode( | ||||
|             "{}:{}".format(api_client_id, api_client_secret).encode('utf-8') | ||||
|         ).decode('utf-8')} | ||||
|             "{}:{}".format(api_client_id, api_client_secret).encode() | ||||
|         ).decode()} | ||||
|     ) | ||||
|     j = loads(r.text) | ||||
|     params['headers'] = {'Authorization': 'Bearer {}'.format(j.get('access_token'))} | ||||
| @ -59,7 +59,7 @@ def response(resp): | ||||
|         if result['type'] == 'track': | ||||
|             title = result['name'] | ||||
|             url = result['external_urls']['spotify'] | ||||
|             content = u'{} - {} - {}'.format( | ||||
|             content = '{} - {} - {}'.format( | ||||
|                 result['artists'][0]['name'], | ||||
|                 result['album']['name'], | ||||
|                 result['name']) | ||||
|  | ||||
| @ -10,9 +10,9 @@ | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urljoin | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode, urljoin | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
|  | ||||
| @ -11,10 +11,10 @@ | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from datetime import datetime | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import get_torrent_size, int_or_zero | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -12,10 +12,10 @@ | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from datetime import datetime | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import get_torrent_size | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -12,11 +12,11 @@ import re | ||||
| from searx.utils import is_valid_lang | ||||
| 
 | ||||
| categories = ['general'] | ||||
| url = u'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' | ||||
| web_url = u'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' | ||||
| url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' | ||||
| web_url = 'https://mymemory.translated.net/en/{from_lang}/{to_lang}/{query}' | ||||
| weight = 100 | ||||
| 
 | ||||
| parser_re = re.compile(b'.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) | ||||
| parser_re = re.compile('.*?([a-z]+)-([a-z]+) (.{2,})$', re.I) | ||||
| api_key = '' | ||||
| 
 | ||||
| 
 | ||||
| @ -39,9 +39,9 @@ def request(query, params): | ||||
|         key_form = '' | ||||
|     params['url'] = url.format(from_lang=from_lang[1], | ||||
|                                to_lang=to_lang[1], | ||||
|                                query=query.decode('utf-8'), | ||||
|                                query=query, | ||||
|                                key=key_form) | ||||
|     params['query'] = query.decode('utf-8') | ||||
|     params['query'] = query | ||||
|     params['from_lang'] = from_lang | ||||
|     params['to_lang'] = to_lang | ||||
| 
 | ||||
|  | ||||
| @ -12,10 +12,10 @@ | ||||
|  @todo        publishedDate | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urljoin | ||||
| from lxml import html | ||||
| from datetime import datetime | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import urlencode, urljoin | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['social media'] | ||||
|  | ||||
| @ -10,7 +10,7 @@ | ||||
|  @parse       url, title, img_src, thumbnail_src | ||||
| """ | ||||
| 
 | ||||
| from searx.url_utils import urlencode, urlparse, urlunparse, parse_qsl | ||||
| from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl | ||||
| from json import loads | ||||
| 
 | ||||
| url = 'https://unsplash.com/' | ||||
|  | ||||
| @ -12,9 +12,9 @@ | ||||
| # @todo        rewrite to api | ||||
| # @todo        set content-parameter with correct data | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from json import loads | ||||
| from dateutil import parser | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos'] | ||||
|  | ||||
| @ -15,9 +15,9 @@ from searx import logger | ||||
| from searx.poolrequests import get | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import match_language, eval_xpath | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from json import loads | ||||
| from lxml.html import fromstring | ||||
| from lxml import etree | ||||
| @ -76,7 +76,7 @@ def request(query, params): | ||||
| def response(resp): | ||||
|     results = [] | ||||
|     htmlparser = etree.HTMLParser() | ||||
|     html = fromstring(resp.content.decode("utf-8"), parser=htmlparser) | ||||
|     html = fromstring(resp.content.decode(), parser=htmlparser) | ||||
|     search_results = eval_xpath(html, wikidata_ids_xpath) | ||||
| 
 | ||||
|     if resp.search_params['language'].split('-')[0] == 'all': | ||||
| @ -89,7 +89,7 @@ def response(resp): | ||||
|         wikidata_id = search_result.split('/')[-1] | ||||
|         url = url_detail.format(query=urlencode({'page': wikidata_id, 'uselang': language})) | ||||
|         htmlresponse = get(url) | ||||
|         jsonresponse = loads(htmlresponse.content.decode("utf-8")) | ||||
|         jsonresponse = loads(htmlresponse.content.decode()) | ||||
|         results += getDetail(jsonresponse, wikidata_id, language, resp.search_params['language'], htmlparser) | ||||
| 
 | ||||
|     return results | ||||
| @ -453,16 +453,16 @@ def get_geolink(result): | ||||
|     latitude, longitude = coordinates.split(',') | ||||
| 
 | ||||
|     # convert to decimal | ||||
|     lat = int(latitude[:latitude.find(u'°')]) | ||||
|     lat = int(latitude[:latitude.find('°')]) | ||||
|     if latitude.find('\'') >= 0: | ||||
|         lat += int(latitude[latitude.find(u'°') + 1:latitude.find('\'')] or 0) / 60.0 | ||||
|         lat += int(latitude[latitude.find('°') + 1:latitude.find('\'')] or 0) / 60.0 | ||||
|     if latitude.find('"') >= 0: | ||||
|         lat += float(latitude[latitude.find('\'') + 1:latitude.find('"')] or 0) / 3600.0 | ||||
|     if latitude.find('S') >= 0: | ||||
|         lat *= -1 | ||||
|     lon = int(longitude[:longitude.find(u'°')]) | ||||
|     lon = int(longitude[:longitude.find('°')]) | ||||
|     if longitude.find('\'') >= 0: | ||||
|         lon += int(longitude[longitude.find(u'°') + 1:longitude.find('\'')] or 0) / 60.0 | ||||
|         lon += int(longitude[longitude.find('°') + 1:longitude.find('\'')] or 0) / 60.0 | ||||
|     if longitude.find('"') >= 0: | ||||
|         lon += float(longitude[longitude.find('\'') + 1:longitude.find('"')] or 0) / 3600.0 | ||||
|     if longitude.find('W') >= 0: | ||||
|  | ||||
| @ -10,13 +10,13 @@ | ||||
|  @parse       url, infobox | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import quote | ||||
| from json import loads | ||||
| from lxml.html import fromstring | ||||
| from searx.url_utils import quote | ||||
| from searx.utils import match_language, searx_useragent | ||||
| 
 | ||||
| # search-url | ||||
| search_url = u'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' | ||||
| search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' | ||||
| supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -9,7 +9,7 @@ | ||||
| # @parse       url, infobox | ||||
| 
 | ||||
| from lxml import etree | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # search-url | ||||
| search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' | ||||
| @ -45,15 +45,15 @@ def request(query, params): | ||||
| 
 | ||||
| # replace private user area characters to make text legible | ||||
| def replace_pua_chars(text): | ||||
|     pua_chars = {u'\uf522': u'\u2192',  # rigth arrow | ||||
|                  u'\uf7b1': u'\u2115',  # set of natural numbers | ||||
|                  u'\uf7b4': u'\u211a',  # set of rational numbers | ||||
|                  u'\uf7b5': u'\u211d',  # set of real numbers | ||||
|                  u'\uf7bd': u'\u2124',  # set of integer numbers | ||||
|                  u'\uf74c': 'd',        # differential | ||||
|                  u'\uf74d': u'\u212f',  # euler's number | ||||
|                  u'\uf74e': 'i',        # imaginary number | ||||
|                  u'\uf7d9': '='}        # equals sign | ||||
|     pua_chars = {'\uf522': '\u2192',  # rigth arrow | ||||
|                  '\uf7b1': '\u2115',  # set of natural numbers | ||||
|                  '\uf7b4': '\u211a',  # set of rational numbers | ||||
|                  '\uf7b5': '\u211d',  # set of real numbers | ||||
|                  '\uf7bd': '\u2124',  # set of integer numbers | ||||
|                  '\uf74c': 'd',       # differential | ||||
|                  '\uf74d': '\u212f',  # euler's number | ||||
|                  '\uf74e': 'i',       # imaginary number | ||||
|                  '\uf7d9': '='}       # equals sign | ||||
| 
 | ||||
|     for k, v in pua_chars.items(): | ||||
|         text = text.replace(k, v) | ||||
|  | ||||
| @ -10,9 +10,9 @@ | ||||
| 
 | ||||
| from json import loads | ||||
| from time import time | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| from searx.poolrequests import get as http_get | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| # search-url | ||||
| url = 'https://www.wolframalpha.com/' | ||||
|  | ||||
| @ -11,7 +11,7 @@ | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from searx.url_utils import urlencode, urljoin | ||||
| from urllib.parse import urlencode, urljoin | ||||
| from searx.engines.xpath import extract_text | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| from urllib.parse import unquote, urlencode, urljoin, urlparse | ||||
| from lxml import html | ||||
| from lxml.etree import _ElementStringResult, _ElementUnicodeResult | ||||
| from searx.utils import html_to_text, eval_xpath | ||||
| from searx.url_utils import unquote, urlencode, urljoin, urlparse | ||||
| 
 | ||||
| search_url = None | ||||
| url_xpath = None | ||||
| @ -56,7 +56,7 @@ def extract_url(xpath_results, search_url): | ||||
|     if url.startswith('//'): | ||||
|         # add http or https to this kind of url //example.com/ | ||||
|         parsed_search_url = urlparse(search_url) | ||||
|         url = u'{0}:{1}'.format(parsed_search_url.scheme or 'http', url) | ||||
|         url = '{0}:{1}'.format(parsed_search_url.scheme or 'http', url) | ||||
|     elif url.startswith('/'): | ||||
|         # fix relative url to the search engine | ||||
|         url = urljoin(search_url, url) | ||||
| @ -86,7 +86,7 @@ def normalize_url(url): | ||||
|         p = parsed_url.path | ||||
|         mark = p.find('/**') | ||||
|         if mark != -1: | ||||
|             return unquote(p[mark + 3:]).decode('utf-8') | ||||
|             return unquote(p[mark + 3:]).decode() | ||||
| 
 | ||||
|     return url | ||||
| 
 | ||||
|  | ||||
| @ -14,7 +14,7 @@ | ||||
| 
 | ||||
| from json import loads | ||||
| from dateutil import parser | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
|  | ||||
| @ -11,9 +11,9 @@ | ||||
|  @parse       url, title, content, suggestion | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import unquote, urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text, extract_url | ||||
| from searx.url_utils import unquote, urlencode | ||||
| from searx.utils import match_language, eval_xpath | ||||
| 
 | ||||
| # engine dependent config | ||||
|  | ||||
| @ -11,13 +11,13 @@ | ||||
| 
 | ||||
| import re | ||||
| from datetime import datetime, timedelta | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text, extract_url | ||||
| from searx.engines.yahoo import ( | ||||
|     parse_url, _fetch_supported_languages, supported_languages_url, language_aliases | ||||
| ) | ||||
| from dateutil import parser | ||||
| from searx.url_utils import urlencode | ||||
| from searx.utils import match_language | ||||
| 
 | ||||
| # engine dependent config | ||||
| @ -58,7 +58,7 @@ def request(query, params): | ||||
| 
 | ||||
| def sanitize_url(url): | ||||
|     if ".yahoo.com/" in url: | ||||
|         return re.sub(u"\\;\\_ylt\\=.+$", "", url) | ||||
|         return re.sub("\\;\\_ylt\\=.+$", "", url) | ||||
|     else: | ||||
|         return url | ||||
| 
 | ||||
|  | ||||
| @ -9,9 +9,9 @@ | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx import logger | ||||
| from searx.url_utils import urlencode | ||||
| 
 | ||||
| logger = logger.getChild('yandex engine') | ||||
| 
 | ||||
|  | ||||
| @ -11,8 +11,8 @@ | ||||
| from lxml import html | ||||
| from operator import itemgetter | ||||
| from datetime import datetime | ||||
| from urllib.parse import quote | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.url_utils import quote | ||||
| from searx.utils import get_torrent_size | ||||
| from searx.poolrequests import get as http_get | ||||
| 
 | ||||
|  | ||||
| @ -10,7 +10,7 @@ | ||||
| 
 | ||||
| from json import loads | ||||
| from dateutil import parser | ||||
| from searx.url_utils import urlencode | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos', 'music'] | ||||
|  | ||||
| @ -10,9 +10,9 @@ | ||||
| 
 | ||||
| from functools import reduce | ||||
| from json import loads | ||||
| from urllib.parse import quote_plus | ||||
| from searx.engines.xpath import extract_text | ||||
| from searx.utils import list_get | ||||
| from searx.url_utils import quote_plus | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos', 'music'] | ||||
|  | ||||
| @ -27,7 +27,7 @@ class SearxParameterException(SearxException): | ||||
|             message = 'Empty ' + name + ' parameter' | ||||
|         else: | ||||
|             message = 'Invalid value "' + value + '" for parameter ' + name | ||||
|         super(SearxParameterException, self).__init__(message) | ||||
|         super().__init__(message) | ||||
|         self.message = message | ||||
|         self.parameter_name = name | ||||
|         self.parameter_value = value | ||||
|  | ||||
| @ -23,7 +23,7 @@ def get_bang_url(search_query): | ||||
|     """ | ||||
| 
 | ||||
|     if search_query.external_bang: | ||||
|         query = search_query.query.decode('utf-8', 'ignore') | ||||
|         query = search_query.query | ||||
|         bang = _get_bang(search_query.external_bang) | ||||
| 
 | ||||
|         if bang and query: | ||||
|  | ||||
| @ -3,73 +3,73 @@ | ||||
| # this file is generated automatically by utils/update_search_languages.py | ||||
| 
 | ||||
| language_codes = ( | ||||
|     (u"af-NA", u"Afrikaans", u"", u"Afrikaans"), | ||||
|     (u"ar-SA", u"العربية", u"", u"Arabic"), | ||||
|     (u"be-BY", u"Беларуская", u"", u"Belarusian"), | ||||
|     (u"bg-BG", u"Български", u"", u"Bulgarian"), | ||||
|     (u"ca-AD", u"Català", u"", u"Catalan"), | ||||
|     (u"cs-CZ", u"Čeština", u"", u"Czech"), | ||||
|     (u"da-DK", u"Dansk", u"", u"Danish"), | ||||
|     (u"de", u"Deutsch", u"", u"German"), | ||||
|     (u"de-AT", u"Deutsch", u"Österreich", u"German"), | ||||
|     (u"de-CH", u"Deutsch", u"Schweiz", u"German"), | ||||
|     (u"de-DE", u"Deutsch", u"Deutschland", u"German"), | ||||
|     (u"el-GR", u"Ελληνικά", u"", u"Greek"), | ||||
|     (u"en", u"English", u"", u"English"), | ||||
|     (u"en-AU", u"English", u"Australia", u"English"), | ||||
|     (u"en-CA", u"English", u"Canada", u"English"), | ||||
|     (u"en-GB", u"English", u"United Kingdom", u"English"), | ||||
|     (u"en-IE", u"English", u"Ireland", u"English"), | ||||
|     (u"en-IN", u"English", u"India", u"English"), | ||||
|     (u"en-NZ", u"English", u"New Zealand", u"English"), | ||||
|     (u"en-PH", u"English", u"Philippines", u"English"), | ||||
|     (u"en-SG", u"English", u"Singapore", u"English"), | ||||
|     (u"en-US", u"English", u"United States", u"English"), | ||||
|     (u"es", u"Español", u"", u"Spanish"), | ||||
|     (u"es-AR", u"Español", u"Argentina", u"Spanish"), | ||||
|     (u"es-CL", u"Español", u"Chile", u"Spanish"), | ||||
|     (u"es-ES", u"Español", u"España", u"Spanish"), | ||||
|     (u"es-MX", u"Español", u"México", u"Spanish"), | ||||
|     (u"et-EE", u"Eesti", u"", u"Estonian"), | ||||
|     (u"fa-IR", u"فارسی", u"", u"Persian"), | ||||
|     (u"fi-FI", u"Suomi", u"", u"Finnish"), | ||||
|     (u"fr", u"Français", u"", u"French"), | ||||
|     (u"fr-BE", u"Français", u"Belgique", u"French"), | ||||
|     (u"fr-CA", u"Français", u"Canada", u"French"), | ||||
|     (u"fr-CH", u"Français", u"Suisse", u"French"), | ||||
|     (u"fr-FR", u"Français", u"France", u"French"), | ||||
|     (u"he-IL", u"עברית", u"", u"Hebrew"), | ||||
|     (u"hr-HR", u"Hrvatski", u"", u"Croatian"), | ||||
|     (u"hu-HU", u"Magyar", u"", u"Hungarian"), | ||||
|     (u"hy-AM", u"Հայերեն", u"", u"Armenian"), | ||||
|     (u"id-ID", u"Indonesia", u"", u"Indonesian"), | ||||
|     (u"is-IS", u"Íslenska", u"", u"Icelandic"), | ||||
|     (u"it-IT", u"Italiano", u"", u"Italian"), | ||||
|     (u"ja-JP", u"日本語", u"", u"Japanese"), | ||||
|     (u"ko-KR", u"한국어", u"", u"Korean"), | ||||
|     (u"lt-LT", u"Lietuvių", u"", u"Lithuanian"), | ||||
|     (u"lv-LV", u"Latviešu", u"", u"Latvian"), | ||||
|     (u"ms-MY", u"Melayu", u"", u"Malay"), | ||||
|     (u"nb-NO", u"Norsk Bokmål", u"", u"Norwegian Bokmål"), | ||||
|     (u"nl", u"Nederlands", u"", u"Dutch"), | ||||
|     (u"nl-BE", u"Nederlands", u"België", u"Dutch"), | ||||
|     (u"nl-NL", u"Nederlands", u"Nederland", u"Dutch"), | ||||
|     (u"pl-PL", u"Polski", u"", u"Polish"), | ||||
|     (u"pt", u"Português", u"", u"Portuguese"), | ||||
|     (u"pt-BR", u"Português", u"Brasil", u"Portuguese"), | ||||
|     (u"pt-PT", u"Português", u"Portugal", u"Portuguese"), | ||||
|     (u"ro-RO", u"Română", u"", u"Romanian"), | ||||
|     (u"ru-RU", u"Русский", u"", u"Russian"), | ||||
|     (u"sk-SK", u"Slovenčina", u"", u"Slovak"), | ||||
|     (u"sl-SI", u"Slovenščina", u"", u"Slovenian"), | ||||
|     (u"sr-RS", u"Srpski", u"", u"Serbian"), | ||||
|     (u"sv-SE", u"Svenska", u"", u"Swedish"), | ||||
|     (u"sw-KE", u"Kiswahili", u"", u"Swahili"), | ||||
|     (u"th-TH", u"ไทย", u"", u"Thai"), | ||||
|     (u"tr-TR", u"Türkçe", u"", u"Turkish"), | ||||
|     (u"uk-UA", u"Українська", u"", u"Ukrainian"), | ||||
|     (u"vi-VN", u"Tiếng Việt", u"", u"Vietnamese"), | ||||
|     (u"zh", u"中文", u"", u"Chinese"), | ||||
|     (u"zh-CN", u"中文", u"中国", u"Chinese"), | ||||
|     (u"zh-TW", u"中文", u"台灣", u"Chinese") | ||||
|     ("af-NA", "Afrikaans", "", "Afrikaans"), | ||||
|     ("ar-SA", "العربية", "", "Arabic"), | ||||
|     ("be-BY", "Беларуская", "", "Belarusian"), | ||||
|     ("bg-BG", "Български", "", "Bulgarian"), | ||||
|     ("ca-AD", "Català", "", "Catalan"), | ||||
|     ("cs-CZ", "Čeština", "", "Czech"), | ||||
|     ("da-DK", "Dansk", "", "Danish"), | ||||
|     ("de", "Deutsch", "", "German"), | ||||
|     ("de-AT", "Deutsch", "Österreich", "German"), | ||||
|     ("de-CH", "Deutsch", "Schweiz", "German"), | ||||
|     ("de-DE", "Deutsch", "Deutschland", "German"), | ||||
|     ("el-GR", "Ελληνικά", "", "Greek"), | ||||
|     ("en", "English", "", "English"), | ||||
|     ("en-AU", "English", "Australia", "English"), | ||||
|     ("en-CA", "English", "Canada", "English"), | ||||
|     ("en-GB", "English", "United Kingdom", "English"), | ||||
|     ("en-IE", "English", "Ireland", "English"), | ||||
|     ("en-IN", "English", "India", "English"), | ||||
|     ("en-NZ", "English", "New Zealand", "English"), | ||||
|     ("en-PH", "English", "Philippines", "English"), | ||||
|     ("en-SG", "English", "Singapore", "English"), | ||||
|     ("en-US", "English", "United States", "English"), | ||||
|     ("es", "Español", "", "Spanish"), | ||||
|     ("es-AR", "Español", "Argentina", "Spanish"), | ||||
|     ("es-CL", "Español", "Chile", "Spanish"), | ||||
|     ("es-ES", "Español", "España", "Spanish"), | ||||
|     ("es-MX", "Español", "México", "Spanish"), | ||||
|     ("et-EE", "Eesti", "", "Estonian"), | ||||
|     ("fa-IR", "فارسی", "", "Persian"), | ||||
|     ("fi-FI", "Suomi", "", "Finnish"), | ||||
|     ("fr", "Français", "", "French"), | ||||
|     ("fr-BE", "Français", "Belgique", "French"), | ||||
|     ("fr-CA", "Français", "Canada", "French"), | ||||
|     ("fr-CH", "Français", "Suisse", "French"), | ||||
|     ("fr-FR", "Français", "France", "French"), | ||||
|     ("he-IL", "עברית", "", "Hebrew"), | ||||
|     ("hr-HR", "Hrvatski", "", "Croatian"), | ||||
|     ("hu-HU", "Magyar", "", "Hungarian"), | ||||
|     ("hy-AM", "Հայերեն", "", "Armenian"), | ||||
|     ("id-ID", "Indonesia", "", "Indonesian"), | ||||
|     ("is-IS", "Íslenska", "", "Icelandic"), | ||||
|     ("it-IT", "Italiano", "", "Italian"), | ||||
|     ("ja-JP", "日本語", "", "Japanese"), | ||||
|     ("ko-KR", "한국어", "", "Korean"), | ||||
|     ("lt-LT", "Lietuvių", "", "Lithuanian"), | ||||
|     ("lv-LV", "Latviešu", "", "Latvian"), | ||||
|     ("ms-MY", "Melayu", "", "Malay"), | ||||
|     ("nb-NO", "Norsk Bokmål", "", "Norwegian Bokmål"), | ||||
|     ("nl", "Nederlands", "", "Dutch"), | ||||
|     ("nl-BE", "Nederlands", "België", "Dutch"), | ||||
|     ("nl-NL", "Nederlands", "Nederland", "Dutch"), | ||||
|     ("pl-PL", "Polski", "", "Polish"), | ||||
|     ("pt", "Português", "", "Portuguese"), | ||||
|     ("pt-BR", "Português", "Brasil", "Portuguese"), | ||||
|     ("pt-PT", "Português", "Portugal", "Portuguese"), | ||||
|     ("ro-RO", "Română", "", "Romanian"), | ||||
|     ("ru-RU", "Русский", "", "Russian"), | ||||
|     ("sk-SK", "Slovenčina", "", "Slovak"), | ||||
|     ("sl-SI", "Slovenščina", "", "Slovenian"), | ||||
|     ("sr-RS", "Srpski", "", "Serbian"), | ||||
|     ("sv-SE", "Svenska", "", "Swedish"), | ||||
|     ("sw-KE", "Kiswahili", "", "Swahili"), | ||||
|     ("th-TH", "ไทย", "", "Thai"), | ||||
|     ("tr-TR", "Türkçe", "", "Turkish"), | ||||
|     ("uk-UA", "Українська", "", "Ukrainian"), | ||||
|     ("vi-VN", "Tiếng Việt", "", "Vietnamese"), | ||||
|     ("zh", "中文", "", "Chinese"), | ||||
|     ("zh-CN", "中文", "中国", "Chinese"), | ||||
|     ("zh-TW", "中文", "台灣", "Chinese") | ||||
| ) | ||||
|  | ||||
| @ -20,13 +20,10 @@ from importlib import import_module | ||||
| from os import listdir, makedirs, remove, stat, utime | ||||
| from os.path import abspath, basename, dirname, exists, join | ||||
| from shutil import copyfile | ||||
| from sys import version_info | ||||
| from traceback import print_exc | ||||
| 
 | ||||
| from searx import logger, settings, static_path | ||||
| 
 | ||||
| if version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| logger = logger.getChild('plugins') | ||||
| 
 | ||||
| @ -38,8 +35,8 @@ from searx.plugins import (oa_doi_rewrite, | ||||
|                            tracker_url_remover, | ||||
|                            vim_hotkeys) | ||||
| 
 | ||||
| required_attrs = (('name', (str, unicode)), | ||||
|                   ('description', (str, unicode)), | ||||
| required_attrs = (('name', str), | ||||
|                   ('description', str), | ||||
|                   ('default_on', bool)) | ||||
| 
 | ||||
| optional_attrs = (('js_dependencies', tuple), | ||||
|  | ||||
| @ -16,17 +16,14 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| ''' | ||||
| 
 | ||||
| import re | ||||
| import sys | ||||
| from urllib.parse import urlparse | ||||
| from lxml import etree | ||||
| from os import listdir, environ | ||||
| from os.path import isfile, isdir, join | ||||
| from searx.plugins import logger | ||||
| from flask_babel import gettext | ||||
| from searx import searx_dir | ||||
| from searx.url_utils import urlparse | ||||
| 
 | ||||
| if sys.version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| name = "HTTPS rewrite" | ||||
| description = gettext('Rewrite HTTP links to HTTPS if possible') | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| from urllib.parse import urlparse, parse_qsl | ||||
| from flask_babel import gettext | ||||
| import re | ||||
| from searx.url_utils import urlparse, parse_qsl | ||||
| from searx import settings | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -22,7 +22,7 @@ default_on = True | ||||
| 
 | ||||
| 
 | ||||
| # Self User Agent regex | ||||
| p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE) | ||||
| p = re.compile('.*user[ -]agent.*', re.IGNORECASE) | ||||
| 
 | ||||
| 
 | ||||
| # attach callback to the post search hook | ||||
| @ -31,7 +31,7 @@ p = re.compile(b'.*user[ -]agent.*', re.IGNORECASE) | ||||
| def post_search(request, search): | ||||
|     if search.search_query.pageno > 1: | ||||
|         return True | ||||
|     if search.search_query.query == b'ip': | ||||
|     if search.search_query.query == 'ip': | ||||
|         x_forwarded_for = request.headers.getlist("X-Forwarded-For") | ||||
|         if x_forwarded_for: | ||||
|             ip = x_forwarded_for[0] | ||||
|  | ||||
| @ -17,7 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| 
 | ||||
| from flask_babel import gettext | ||||
| import re | ||||
| from searx.url_utils import urlunparse, parse_qsl, urlencode | ||||
| from urllib.parse import urlunparse, parse_qsl, urlencode | ||||
| 
 | ||||
| regexes = {re.compile(r'utm_[^&]+'), | ||||
|            re.compile(r'(wkey|wemail)[^&]*'), | ||||
|  | ||||
| @ -20,7 +20,7 @@ class HTTPAdapterWithConnParams(requests.adapters.HTTPAdapter): | ||||
|         self.config = {} | ||||
|         self.proxy_manager = {} | ||||
| 
 | ||||
|         super(requests.adapters.HTTPAdapter, self).__init__() | ||||
|         super().__init__() | ||||
| 
 | ||||
|         self._pool_connections = pool_connections | ||||
|         self._pool_maxsize = pool_maxsize | ||||
| @ -60,7 +60,7 @@ else: | ||||
| class SessionSinglePool(requests.Session): | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         super(SessionSinglePool, self).__init__() | ||||
|         super().__init__() | ||||
| 
 | ||||
|         # reuse the same adapters | ||||
|         with RLock(): | ||||
| @ -71,7 +71,7 @@ class SessionSinglePool(requests.Session): | ||||
|     def close(self): | ||||
|         """Call super, but clear adapters since there are managed globaly""" | ||||
|         self.adapters.clear() | ||||
|         super(SessionSinglePool, self).close() | ||||
|         super().close() | ||||
| 
 | ||||
| 
 | ||||
| def set_timeout_for_thread(timeout, start_time=None): | ||||
|  | ||||
| @ -6,16 +6,11 @@ | ||||
| 
 | ||||
| from base64 import urlsafe_b64encode, urlsafe_b64decode | ||||
| from zlib import compress, decompress | ||||
| from sys import version | ||||
| from urllib.parse import parse_qs, urlencode | ||||
| 
 | ||||
| from searx import settings, autocomplete | ||||
| from searx.languages import language_codes as languages | ||||
| from searx.utils import match_language | ||||
| from searx.url_utils import parse_qs, urlencode | ||||
| 
 | ||||
| if version[0] == '3': | ||||
|     # pylint: disable=invalid-name | ||||
|     unicode = str | ||||
| 
 | ||||
| 
 | ||||
| COOKIE_MAX_AGE = 60 * 60 * 24 * 365 * 5  # 5 years | ||||
| @ -37,7 +32,7 @@ class ValidationException(Exception): | ||||
|     """ | ||||
| 
 | ||||
| 
 | ||||
| class Setting(object): | ||||
| class Setting: | ||||
|     """Base class of user settings""" | ||||
| 
 | ||||
|     def __init__(self, default_value, **kwargs): | ||||
| @ -315,7 +310,7 @@ class PluginsSetting(SwitchableSetting): | ||||
|         return [item[len('plugin_'):] for item in items] | ||||
| 
 | ||||
| 
 | ||||
| class Preferences(object): | ||||
| class Preferences: | ||||
|     """Validates and saves preferences to cookies""" | ||||
| 
 | ||||
|     def __init__(self, themes, categories, engines, plugins): | ||||
| @ -402,14 +397,14 @@ class Preferences(object): | ||||
| 
 | ||||
|         settings_kv['tokens'] = ','.join(self.tokens.values) | ||||
| 
 | ||||
|         return urlsafe_b64encode(compress(urlencode(settings_kv).encode('utf-8'))).decode('utf-8') | ||||
|         return urlsafe_b64encode(compress(urlencode(settings_kv).encode())).decode() | ||||
| 
 | ||||
|     def parse_encoded_data(self, input_data): | ||||
|         """parse (base64) preferences from request (``flask.request.form['preferences']``)""" | ||||
|         decoded_data = decompress(urlsafe_b64decode(input_data.encode('utf-8'))) | ||||
|         decoded_data = decompress(urlsafe_b64decode(input_data.encode())) | ||||
|         dict_data = {} | ||||
|         for x, y in parse_qs(decoded_data).items(): | ||||
|             dict_data[x.decode('utf8')] = y[0].decode('utf8') | ||||
|             dict_data[x.decode()] = y[0].decode() | ||||
|         self.parse_dict(dict_data) | ||||
| 
 | ||||
|     def parse_dict(self, input_data): | ||||
|  | ||||
| @ -17,23 +17,22 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| (C) 2014 by Thomas Pointhuber, <thomas.pointhuber@gmx.at> | ||||
| ''' | ||||
| 
 | ||||
| import re | ||||
| 
 | ||||
| from searx.languages import language_codes | ||||
| from searx.engines import ( | ||||
|     categories, engines, engine_shortcuts | ||||
| ) | ||||
| import re | ||||
| import sys | ||||
| 
 | ||||
| if sys.version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| VALID_LANGUAGE_CODE = re.compile(r'^[a-z]{2,3}(-[a-zA-Z]{2})?$') | ||||
| 
 | ||||
| 
 | ||||
| class RawTextQuery(object): | ||||
| class RawTextQuery: | ||||
|     """parse raw text query (the value from the html input)""" | ||||
| 
 | ||||
|     def __init__(self, query, disabled_engines): | ||||
|         assert isinstance(query, str) | ||||
|         self.query = query | ||||
|         self.disabled_engines = [] | ||||
| 
 | ||||
| @ -53,7 +52,7 @@ class RawTextQuery(object): | ||||
|         self.query_parts = [] | ||||
| 
 | ||||
|         # split query, including whitespaces | ||||
|         raw_query_parts = re.split(r'(\s+)' if isinstance(self.query, str) else b'(\s+)', self.query) | ||||
|         raw_query_parts = re.split(r'(\s+)', self.query) | ||||
| 
 | ||||
|         parse_next = True | ||||
| 
 | ||||
| @ -93,7 +92,7 @@ class RawTextQuery(object): | ||||
|                 # check if any language-code is equal with | ||||
|                 # declared language-codes | ||||
|                 for lc in language_codes: | ||||
|                     lang_id, lang_name, country, english_name = map(unicode.lower, lc) | ||||
|                     lang_id, lang_name, country, english_name = map(str.lower, lc) | ||||
| 
 | ||||
|                     # if correct language-code is found | ||||
|                     # set it as new search-language | ||||
| @ -177,15 +176,15 @@ class RawTextQuery(object): | ||||
| 
 | ||||
|     def getFullQuery(self): | ||||
|         # get full querry including whitespaces | ||||
|         return u''.join(self.query_parts) | ||||
|         return ''.join(self.query_parts) | ||||
| 
 | ||||
| 
 | ||||
| class SearchQuery(object): | ||||
| class SearchQuery: | ||||
|     """container for all the search parameters (query, language, etc...)""" | ||||
| 
 | ||||
|     def __init__(self, query, engines, categories, lang, safesearch, pageno, time_range, | ||||
|                  timeout_limit=None, preferences=None, external_bang=None): | ||||
|         self.query = query.encode('utf-8') | ||||
|         self.query = query | ||||
|         self.engines = engines | ||||
|         self.categories = categories | ||||
|         self.lang = lang | ||||
| @ -197,4 +196,4 @@ class SearchQuery(object): | ||||
|         self.external_bang = external_bang | ||||
| 
 | ||||
|     def __str__(self): | ||||
|         return str(self.query) + ";" + str(self.engines) | ||||
|         return self.query + ";" + str(self.engines) | ||||
|  | ||||
| @ -1,14 +1,11 @@ | ||||
| import re | ||||
| import sys | ||||
| from collections import defaultdict | ||||
| from operator import itemgetter | ||||
| from threading import RLock | ||||
| from urllib.parse import urlparse, unquote | ||||
| from searx import logger | ||||
| from searx.engines import engines | ||||
| from searx.url_utils import urlparse, unquote | ||||
| 
 | ||||
| if sys.version_info[0] == 3: | ||||
|     basestring = str | ||||
| 
 | ||||
| CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) | ||||
| WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) | ||||
| @ -16,7 +13,7 @@ WHITESPACE_REGEX = re.compile('( |\t|\n)+', re.M | re.U) | ||||
| 
 | ||||
| # return the meaningful length of the content for a result | ||||
| def result_content_len(content): | ||||
|     if isinstance(content, basestring): | ||||
|     if isinstance(content, str): | ||||
|         return len(CONTENT_LEN_IGNORED_CHARS_REGEX.sub('', content)) | ||||
|     else: | ||||
|         return 0 | ||||
| @ -125,14 +122,14 @@ def result_score(result): | ||||
|     return sum((occurences * weight) / position for position in result['positions']) | ||||
| 
 | ||||
| 
 | ||||
| class ResultContainer(object): | ||||
| class ResultContainer: | ||||
|     """docstring for ResultContainer""" | ||||
| 
 | ||||
|     __slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\ | ||||
|                 '_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url' | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         super(ResultContainer, self).__init__() | ||||
|         super().__init__() | ||||
|         self._merged_results = [] | ||||
|         self.infoboxes = [] | ||||
|         self.suggestions = set() | ||||
| @ -161,11 +158,11 @@ class ResultContainer(object): | ||||
|                 self._number_of_results.append(result['number_of_results']) | ||||
|             else: | ||||
|                 # standard result (url, title, content) | ||||
|                 if 'url' in result and not isinstance(result['url'], basestring): | ||||
|                 if 'url' in result and not isinstance(result['url'], str): | ||||
|                     logger.debug('result: invalid URL: %s', str(result)) | ||||
|                 elif 'title' in result and not isinstance(result['title'], basestring): | ||||
|                 elif 'title' in result and not isinstance(result['title'], str): | ||||
|                     logger.debug('result: invalid title: %s', str(result)) | ||||
|                 elif 'content' in result and not isinstance(result['content'], basestring): | ||||
|                 elif 'content' in result and not isinstance(result['content'], str): | ||||
|                     logger.debug('result: invalid content: %s', str(result)) | ||||
|                 else: | ||||
|                     self._merge_result(result, standard_result_count + 1) | ||||
|  | ||||
| @ -20,8 +20,8 @@ import sys | ||||
| import threading | ||||
| from time import time | ||||
| from uuid import uuid4 | ||||
| from _thread import start_new_thread | ||||
| 
 | ||||
| import six | ||||
| from flask_babel import gettext | ||||
| import requests.exceptions | ||||
| import searx.poolrequests as requests_lib | ||||
| @ -37,13 +37,6 @@ from searx import logger | ||||
| from searx.plugins import plugins | ||||
| from searx.exceptions import SearxParameterException | ||||
| 
 | ||||
| try: | ||||
|     from thread import start_new_thread | ||||
| except: | ||||
|     from _thread import start_new_thread | ||||
| 
 | ||||
| if sys.version_info[0] == 3: | ||||
|     unicode = str | ||||
| 
 | ||||
| logger = logger.getChild('search') | ||||
| 
 | ||||
| @ -355,11 +348,11 @@ def get_search_query_from_webapp(preferences, form): | ||||
|         load_default_categories = True | ||||
|         for pd_name, pd in form.items(): | ||||
|             if pd_name == 'categories': | ||||
|                 query_categories.extend(categ for categ in map(unicode.strip, pd.split(',')) if categ in categories) | ||||
|                 query_categories.extend(categ for categ in map(str.strip, pd.split(',')) if categ in categories) | ||||
|             elif pd_name == 'engines': | ||||
|                 pd_engines = [{'category': engines[engine].categories[0], | ||||
|                                'name': engine} | ||||
|                               for engine in map(unicode.strip, pd.split(',')) if engine in engines] | ||||
|                               for engine in map(str.strip, pd.split(',')) if engine in engines] | ||||
|                 if pd_engines: | ||||
|                     query_engines.extend(pd_engines) | ||||
|                     load_default_categories = False | ||||
| @ -414,12 +407,12 @@ def get_search_query_from_webapp(preferences, form): | ||||
|             raw_text_query) | ||||
| 
 | ||||
| 
 | ||||
| class Search(object): | ||||
| class Search: | ||||
|     """Search information container""" | ||||
| 
 | ||||
|     def __init__(self, search_query): | ||||
|         # init vars | ||||
|         super(Search, self).__init__() | ||||
|         super().__init__() | ||||
|         self.search_query = search_query | ||||
|         self.result_container = ResultContainer() | ||||
|         self.actual_timeout = None | ||||
| @ -434,7 +427,7 @@ class Search(object): | ||||
| 
 | ||||
|             # This means there was a valid bang and the | ||||
|             # rest of the search does not need to be continued | ||||
|             if isinstance(self.result_container.redirect_url, six.string_types): | ||||
|             if isinstance(self.result_container.redirect_url, str): | ||||
|                 return self.result_container | ||||
|         # start time | ||||
|         start_time = time() | ||||
| @ -541,13 +534,13 @@ class SearchWithPlugins(Search): | ||||
|     """Similar to the Search class but call the plugins.""" | ||||
| 
 | ||||
|     def __init__(self, search_query, ordered_plugin_list, request): | ||||
|         super(SearchWithPlugins, self).__init__(search_query) | ||||
|         super().__init__(search_query) | ||||
|         self.ordered_plugin_list = ordered_plugin_list | ||||
|         self.request = request | ||||
| 
 | ||||
|     def search(self): | ||||
|         if plugins.call(self.ordered_plugin_list, 'pre_search', self.request, self): | ||||
|             super(SearchWithPlugins, self).search() | ||||
|             super().search() | ||||
| 
 | ||||
|         plugins.call(self.ordered_plugin_list, 'post_search', self.request, self) | ||||
| 
 | ||||
|  | ||||
| @ -3,7 +3,7 @@ | ||||
| <div class="center"> | ||||
|     <h1>{{ _('Page not found') }}</h1> | ||||
|     {% autoescape false %} | ||||
|     <p>{{ _('Go to %(search_page)s.', search_page=unicode('<a href="{}">{}</a>').format(url_for('index'), _('search page'))) }}</p> | ||||
|     <p>{{ _('Go to %(search_page)s.', search_page='<a href="{}">{}</a>'.format(url_for('index'), _('search page'))) }}</p> | ||||
|     {% endautoescape %} | ||||
| </div> | ||||
| {% endblock %} | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Loading…
	
		Reference in New Issue
	
	Block a user