diff --git a/Makefile b/Makefile index 2bba32328..2f10c62bc 100644 --- a/Makefile +++ b/Makefile @@ -223,6 +223,7 @@ test.pylint: pyenvinstall $(call cmd,pylint,\ searx/preferences.py \ searx/testing.py \ + searx/engines/gigablast.py \ ) endif @@ -243,7 +244,7 @@ test.sh: test.pep8: pyenvinstall @echo "TEST pep8" - $(Q)$(PY_ENV_ACT); pep8 --exclude=searx/static --max-line-length=120 --ignore "E402,W503" searx tests + $(Q)$(PY_ENV_ACT); pep8 --exclude='searx/static, searx/engines/gigablast.py' --max-line-length=120 --ignore "E402,W503" searx tests test.unit: pyenvinstall @echo "TEST tests/unit" diff --git a/README.rst b/README.rst index a7b75d6e2..55deb674e 100644 --- a/README.rst +++ b/README.rst @@ -1,63 +1,73 @@ -searx -===== +.. SPDX-License-Identifier: AGPL-3.0-or-later -A privacy-respecting, hackable `metasearch -engine `__. +.. figure:: https://raw.githubusercontent.com/asciimoo/searx/master/searx/static/themes/oscar/img/logo_searx_a.png + :target: https://asciimoo.github.io/searx/ + :alt: searX + :width: 100% + :align: center -Pronunciation: səːks - -List of `running -instances `__. - -See the `documentation `__ and the `wiki `__ for more information. +------- +|searx install| +|searx homepage| +|searx wiki| +|AGPL License| +|Issues| +|commits| |OpenCollective searx backers| |OpenCollective searx sponsors| -Installation -~~~~~~~~~~~~ +Privacy-respecting, hackable `metasearch engine`_ / *pronunciation* **səːks**. -With Docker ------------ -Go to the `searx-docker `__ project. +.. _metasearch engine: https://en.wikipedia.org/wiki/Metasearch_engine -Without Docker --------------- -For all of the details, follow this `step by step installation `__. +.. |searx install| image:: https://img.shields.io/badge/-install-blue + :target: https://asciimoo.github.io/searx/admin/installation.html -Note: the documentation needs to be updated. +.. |searx homepage| image:: https://img.shields.io/badge/-homepage-blue + :target: https://asciimoo.github.io/searx -If you are in a hurry ---------------------- -- clone the source: - ``git clone https://github.com/asciimoo/searx.git && cd searx`` -- install dependencies: ``./manage.sh update_packages`` -- edit your - `settings.yml `__ - (set your ``secret_key``!) -- run ``python searx/webapp.py`` to start the application +.. |searx wiki| image:: https://img.shields.io/badge/-wiki-blue + :target: https://github.com/asciimoo/searx/wiki +.. |AGPL License| image:: https://img.shields.io/badge/license-AGPL-blue.svg + :target: https://github.com/asciimoo/searx/blob/master/LICENSE -Bugs -~~~~ +.. |Issues| image:: https://img.shields.io/github/issues/asciimoo/searx?color=yellow&label=issues + :target: https://github.com/asciimoo/searx/issues -Bugs or suggestions? Visit the `issue -tracker `__. - -`License `__ -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -More about searx -~~~~~~~~~~~~~~~~ - -- `openhub `__ -- `twitter `__ -- IRC: #searx @ freenode +.. |PR| image:: https://img.shields.io/github/issues-pr-raw/asciimoo/searx?color=yellow&label=PR + :target: https://github.com/asciimoo/searx/pulls +.. |commits| image:: https://img.shields.io/github/commit-activity/y/asciimoo/searx?color=yellow&label=commits + :target: https://github.com/asciimoo/searx/commits/master .. |OpenCollective searx backers| image:: https://opencollective.com/searx/backers/badge.svg :target: https://opencollective.com/searx#backer - .. |OpenCollective searx sponsors| image:: https://opencollective.com/searx/sponsors/badge.svg :target: https://opencollective.com/searx#sponsor + + +If you are looking for running instances, ready to use, then visit searx.space_. + +Otherwise jump to the user_, admin_ and developer_ handbooks you will find on +our homepage_. + +.. _searx.space: https://searx.space +.. _user: https://asciimoo.github.io/searx/user +.. _admin: https://asciimoo.github.io/searx/admin +.. _developer: https://asciimoo.github.io/searx/dev +.. _homepage: https://asciimoo.github.io/searx + +contact: + openhub_ // twitter_ // IRC: #searx @ freenode + +.. _openhub: https://www.openhub.net/p/searx +.. _twitter: https://twitter.com/Searx_engine + +------- + +|gluten free| + +.. |gluten free| image:: https://forthebadge.com/images/featured/featured-gluten-free.svg diff --git a/docs/admin/installation-uwsgi.rst b/docs/admin/installation-uwsgi.rst index ac4c463b9..7b482975d 100644 --- a/docs/admin/installation-uwsgi.rst +++ b/docs/admin/installation-uwsgi.rst @@ -108,6 +108,7 @@ restart the uwsgi application. :start-after: START searx uwsgi-description ubuntu-20.04 :end-before: END searx uwsgi-description ubuntu-20.04 + .. hotfix: a bug group-tab need this comment .. group-tab:: Arch Linux @@ -115,6 +116,7 @@ restart the uwsgi application. :start-after: START searx uwsgi-description arch :end-before: END searx uwsgi-description arch + .. hotfix: a bug group-tab need this comment .. group-tab:: Fedora / RHEL @@ -128,22 +130,21 @@ restart the uwsgi application. .. group-tab:: Ubuntu / debian .. kernel-include:: $DOCS_BUILD/includes/searx.rst - :code: ini :start-after: START searx uwsgi-appini ubuntu-20.04 :end-before: END searx uwsgi-appini ubuntu-20.04 + .. hotfix: a bug group-tab need this comment + .. group-tab:: Arch Linux .. kernel-include:: $DOCS_BUILD/includes/searx.rst - :code: ini :start-after: START searx uwsgi-appini arch :end-before: END searx uwsgi-appini arch + .. hotfix: a bug group-tab need this comment + .. group-tab:: Fedora / RHEL .. kernel-include:: $DOCS_BUILD/includes/searx.rst - :code: ini :start-after: START searx uwsgi-appini fedora :end-before: END searx uwsgi-appini fedora - - diff --git a/docs/blog/index.rst b/docs/blog/index.rst index 5a8135fdc..04d95350d 100644 --- a/docs/blog/index.rst +++ b/docs/blog/index.rst @@ -6,6 +6,7 @@ Blog :maxdepth: 2 :caption: Contents + lxcdev-202006 python3 admin intro-offline diff --git a/docs/blog/lxcdev-202006.rst b/docs/blog/lxcdev-202006.rst new file mode 100644 index 000000000..f9ca3c2cc --- /dev/null +++ b/docs/blog/lxcdev-202006.rst @@ -0,0 +1,413 @@ +.. _blog-lxcdev-202006: + +======================================= +Developing in Linux containers [202006] +======================================= + +.. _LXC: https://linuxcontainers.org/lxc/introduction/ + +.. sidebar:: Audience + + This blog post is written for experienced admins and developers / readers + should have a serious meaning about: *distributed*, *merge* and *linux + container*. + +.. contents:: Contents + :depth: 2 + :local: + :backlinks: entry + +In PR :PR:`1803` we added a lot of scripts to Searx's boilerplate. In this blog +post I will show you, how you can make use of them in *distributed and +heterogeneous development cycles* (TL;DR; jump to the :ref:`blog-lxcdev-202006 +abstract`). + +Motivation +========== + +Normally in our development cycle, we edit the sources and run some test and/or +builds by using ``make`` before we commit. This cycle is simple and perfect but +might fail in some aspects we should not overlook. + + The environment in which we run all our development processes matters! + +The :ref:`makefile` and the :ref:`make pyenv` encapsulate a lot for us, but they +do not have access to all prerequisites. For example, there may have +dependencies on packages that are installed on the developer's desktop, but +usually are not preinstalled on a server or client system. Another examples +are; settings have been made to the software on the developer's host that would +never be set on a *production* system. + +*Linux Containers* (LXC_) are isolate environments and not to mix up on +developer's all the prerequisites of all the projects he contribute to, is +always a good choice. + +The scripts from PR :PR:`1803` can divide in those to install and maintain +software: + +- :ref:`searx.sh` +- :ref:`filtron.sh` +- :ref:`morty.sh` + +and the script :ref:`lxc.sh`, with we can scale our installation, maintenance or +even development tasks over a stack of containers, what we call: *Searx's lxc +suite*. + +Gentlemen, start your engines! +============================== + +.. _LXD: https://linuxcontainers.org/lxd/introduction/ +.. _archlinux: https://www.archlinux.org/ + +Before you can start with containers, you need to install and initiate LXD_ +once: + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ snap install lxd + $ lxd init --auto + +And you need to clone from origin or if you have your own fork, clone from your +fork: + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ cd ~/Downloads + $ git clone https://github.com/asciimoo/searx.git + $ cd searx + +The :ref:`lxc-searx.env` consists of several images, see ``export +LXC_SUITE=(...`` near by :origin:`utils/lxc-searx.env#L19`. For this blog post +we exercise on a archlinux_ image. The container of this image is named +``searx-archlinux``. Lets build the container, but be sure that this container +does not already exists, so first lets remove possible old one: + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ sudo -H ./utils/lxc.sh remove searx-archlinux + $ sudo -H ./utils/lxc.sh build searx-archlinux + +.. sidebar:: The ``searx-archlinux`` container + + is the base of all our exercises here. + +In this container we install all services :ref:`including searx, morty & filtron +` in once: + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ sudo -H ./utils/lxc.sh install suite searx-archlinux + +To proxy HTTP from filtron and morty in the container to the outside of the +container, install nginx into the container. Once for the bot blocker filtron: + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + ./utils/filtron.sh nginx install + ... + INFO: got 429 from http://10.174.184.156/searx + +and once for the content sanitizer (content proxy morty): + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + ./utils/morty.sh nginx install + ... + INFO: got 200 from http://10.174.184.156/morty/ + +.. sidebar:: Fully functional searx suite + + From here on you have a fully functional searx suite running with bot blocker + (filtron) and Web content sanitizer (content proxy morty) needed for a + *privacy protecting* search engine. + +On your system, the IP of your ``searx-archlinux`` container differs from +http://10.174.184.156/searx, just open the URL reported in your installation +protocol in your WEB browser from the desktop to test the instance from outside +of the container. + +In such a searx suite admins can maintain and access the debug log of the +different services quite easy. + +.. _working in containers: + +In containers, work as usual +============================ + +Usually you open a root-bash using ``sudo -H bash``. In case of LXC containers +open the root-bash in the container using ``./utils/lxc.sh cmd +searx-archlinux``: + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux bash + INFO: [searx-archlinux] bash + [root@searx-archlinux searx]# pwd + /share/searx + +The prompt ``[root@searx-archlinux ...]`` signals, that you are the root user in +the searx-container. To debug the running searx instance use: + +.. tabs:: + + .. group-tab:: root@searx-archlinux + + .. code:: sh + + $ ./utils/searx.sh inspect service + ... + use [CTRL-C] to stop monitoring the log + ... + +Back in the browser on your desktop open the service http://10.174.184.156/searx +and run your application tests while the debug log is shown in the terminal from +above. You can stop monitoring using ``CTRL-C``, this also disables the *"debug +option"* in searx's settings file and restarts the searx uwsgi application. To +debug services from filtron and morty analogous use: + +.. tabs:: + + .. group-tab:: root@searx-archlinux + + .. code:: sh + + $ ./utils/filtron.sh inspect service + $ ./utils/morty.sh inspect service + +Another point we have to notice is that each service (:ref:`searx `, +:ref:`filtron ` and :ref:`morty `) runs under dedicated +system user account with the same name (compare :ref:`create searx user`). To +get a shell from theses accounts, simply call one of the scripts: + +.. tabs:: + + .. group-tab:: root@searx-archlinux + + .. code:: sh + + $ ./utils/searx.sh shell + $ ./utils/filtron.sh shell + $ ./utils/morty.sh shell + +To get in touch, open a shell from the service user (searx@searx-archlinux): + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + ./utils/searx.sh shell + // exit with [CTRL-D] + (searx-pyenv) [searx@searx-archlinux ~]$ ... + +The prompt ``[searx@searx-archlinux]`` signals that you are logged in as system +user ``searx`` in the ``searx-archlinux`` container and the python *virtualenv* +``(searx-pyenv)`` environment is activated. + +.. tabs:: + + .. group-tab:: searx@searx-archlinux + + .. code:: sh + + (searx-pyenv) [searx@searx-archlinux ~]$ pwd + /usr/local/searx + + + +Wrap production into developer suite +==================================== + +In this section we will see how to change the *"Fully functional searx suite"* +from a LXC container (which is quite ready for production) into a developer +suite. For this, we have to keep an eye on the :ref:`installation basic`: + +- searx setup in: ``/etc/searx/settings.yml`` +- searx user's home: ``/usr/local/searx`` +- virtualenv in: ``/usr/local/searx/searx-pyenv`` +- searx software in: ``/usr/local/searx/searx-src`` + +The searx software is a clone of the ``GIT_URL`` (see :ref:`makefile setup`) and +the working tree is checked out from the ``GIT_BRANCH``. With the use of the +:ref:`searx.sh` the searx service was installed as :ref:`uWSGI application +`. To maintain this service, we can use ``systemctl`` (compare +:ref:`service architectures on distributions `). + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + systemctl stop uwsgi@searx + +With the command above, we stopped the searx uWSGI-App in the archlinux +container. + +The uWSGI-App for the archlinux dsitros is configured in +:origin:`utils/templates/etc/uwsgi/apps-archlinux/searx.ini`, from where at +least you should attend the settings of ``uid``, ``chdir``, ``env`` and +``http``:: + + env = SEARX_SETTINGS_PATH=/etc/searx/settings.yml + http = 127.0.0.1:8888 + + chdir = /usr/local/searx/searx-src/searx + virtualenv = /usr/local/searx/searx-pyenv + pythonpath = /usr/local/searx/searx-src + +If you have read the :ref:`"Good to know section" ` you remember, that +each container shares the root folder of the repository and the command +``utils/lxc.sh cmd`` handles relative path names **transparent**. To wrap the +searx installation into a developer one, we simple have to create a smylink to +the **transparent** reposetory from the desktop. Now lets replace the +repository at ``searx-src`` in the container with the working tree from outside +of the container: + +.. tabs:: + + .. group-tab:: container becomes a developer suite + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + mv /usr/local/searx/searx-src /usr/local/searx/searx-src.old + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + ln -s /share/searx/ /usr/local/searx/searx-src + +Now we can develop as usual in the working tree of our desktop system. Every +time the software was changed, you have to restart the searx service (in the +conatiner): + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + systemctl restart uwsgi@searx + + +Remember: :ref:`working in containers` .. here are just some examples from my +daily usage: + +.. tabs:: + + .. group-tab:: desktop + + To *inspect* the searx instance (already described above): + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + ./utils/searx.sh inspect service + + Run :ref:`makefile`, e.g. to test inside the container: + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + make test + + To install all prerequisites needed for a :ref:`buildhosts`: + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + ./utils/searx.sh install buildhost + + To build the docs on a buildhost :ref:`buildhosts`: + + .. code:: sh + + $ sudo -H ./utils/lxc.sh cmd searx-archlinux \ + make docs + +.. _blog-lxcdev-202006 abstract: + +Abstract +======== + +We build up a fully functional searx suite in a archlinux container: + +.. code:: sh + + $ sudo -H ./utils/lxc.sh install suite searx-archlinux + +To access HTTP from the desktop we installed nginx for the services inside the +conatiner: + +.. tabs:: + + .. group-tab:: [root@searx-archlinux] + + .. code:: sh + + $ ./utils/filtron.sh nginx install + $ ./utils/morty.sh nginx install + +To wrap the suite into a developer one, we created a symbolic link to the +repository which is shared **transparent** from the desktop's file system into +the container : + +.. tabs:: + + .. group-tab:: [root@searx-archlinux] + + .. code:: sh + + $ mv /usr/local/searx/searx-src /usr/local/searx/searx-src.old + $ ln -s /share/searx/ /usr/local/searx/searx-src + $ systemctl restart uwsgi@searx + +To get remarks from the suite of the archlinux container we can use: + +.. tabs:: + + .. group-tab:: desktop + + .. code:: sh + + $ sudo -H ./utils/lxc.sh show suite searx-archlinux + ... + [searx-archlinux] INFO: (eth0) filtron: http://10.174.184.156:4004/ http://10.174.184.156/searx + [searx-archlinux] INFO: (eth0) morty: http://10.174.184.156:3000/ + [searx-archlinux] INFO: (eth0) docs-live: http://10.174.184.156:8080/ + [searx-archlinux] INFO: (eth0) IPv6: http://[fd42:573b:e0b3:e97e:216:3eff:fea5:9b65] + ... + diff --git a/docs/blog/private-engines.rst b/docs/blog/private-engines.rst index c26b3ed1c..796f0fc69 100644 --- a/docs/blog/private-engines.rst +++ b/docs/blog/private-engines.rst @@ -13,7 +13,7 @@ Private engines To solve this issue private engines were introduced in :pull:`1823`. A new option was added to engines named `tokens`. It expects a list of strings. If the user making a request presents one of the tokens -of an engine, he/she is able to access information about the engine +of an engine, they can access information about the engine and make search requests. Example configuration to restrict access to the Arch Linux Wiki engine: diff --git a/docs/dev/search_api.rst b/docs/dev/search_api.rst index 922548ffb..960d2fb37 100644 --- a/docs/dev/search_api.rst +++ b/docs/dev/search_api.rst @@ -81,7 +81,7 @@ Parameters Theme of instance. Please note, available themes depend on an instance. It is possible that an - instance administrator deleted, created or renamed themes on his/her instance. + instance administrator deleted, created or renamed themes on their instance. See the available options in the preferences page of the instance. ``oscar-style`` : default ``logicodev`` @@ -91,7 +91,7 @@ Parameters ``oscar``. Please note, available styles depend on an instance. It is possible that an - instance administrator deleted, created or renamed styles on his/her + instance administrator deleted, created or renamed styles on their instance. See the available options in the preferences page of the instance. ``enabled_plugins`` : optional diff --git a/docs/user/own-instance.rst b/docs/user/own-instance.rst index 4876abe99..af415b61d 100644 --- a/docs/user/own-instance.rst +++ b/docs/user/own-instance.rst @@ -44,9 +44,9 @@ hidden from visited result pages. What are the consequences of using public instances? ---------------------------------------------------- -If someone uses a public instance, he/she has to trust the administrator of that +If someone uses a public instance, they have to trust the administrator of that instance. This means that the user of the public instance does not know whether -his/her requests are logged, aggregated and sent or sold to a third party. +their requests are logged, aggregated and sent or sold to a third party. Also, public instances without proper protection are more vulnerable to abusing the search service, In this case the external service in exchange returns diff --git a/docs/utils/lxc.sh.rst b/docs/utils/lxc.sh.rst index ae1412870..114d0911e 100644 --- a/docs/utils/lxc.sh.rst +++ b/docs/utils/lxc.sh.rst @@ -47,9 +47,9 @@ one**:: *Good to know ...* -Eeach container shares the root folder of the repository and the -command ``utils/lxc.sh cmd`` **handles relative path names transparent**, -compare output of:: +Each container shares the root folder of the repository and the command +``utils/lxc.sh cmd`` **handles relative path names transparent**, compare output +of:: $ sudo -H ./utils/lxc.sh cmd -- ls -la Makefile ... @@ -66,6 +66,7 @@ If there comes the time you want to **get rid off all** the containers and $ sudo -H ./utils/lxc.sh remove $ sudo -H ./utils/lxc.sh remove images +.. _lxc.sh install suite: Install suite ============= diff --git a/searx/engines/duckduckgo.py b/searx/engines/duckduckgo.py index 0d2c0af2d..6e07b5021 100644 --- a/searx/engines/duckduckgo.py +++ b/searx/engines/duckduckgo.py @@ -50,6 +50,7 @@ result_xpath = '//div[@class="result results_links results_links_deep web-result url_xpath = './/a[@class="result__a"]/@href' title_xpath = './/a[@class="result__a"]' content_xpath = './/a[@class="result__snippet"]' +correction_xpath = '//div[@id="did_you_mean"]//a' # match query's language to a region code that duckduckgo will accept @@ -125,6 +126,11 @@ def response(resp): 'content': content, 'url': res_url}) + # parse correction + for correction in eval_xpath(doc, correction_xpath): + # append correction + results.append({'correction': extract_text(correction)}) + # return results return results diff --git a/searx/engines/gigablast.py b/searx/engines/gigablast.py index 2bb29a9fe..b139c2a9f 100644 --- a/searx/engines/gigablast.py +++ b/searx/engines/gigablast.py @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later """ Gigablast (Web) @@ -9,121 +10,117 @@ @stable yes @parse url, title, content """ +# pylint: disable=missing-function-docstring, invalid-name -import random +import re from json import loads -from time import time -from lxml.html import fromstring -from searx.poolrequests import get +# from searx import logger from searx.url_utils import urlencode -from searx.utils import eval_xpath +from searx.poolrequests import get # engine dependent config categories = ['general'] -paging = True -number_of_results = 10 +# gigablast's pagination is totally damaged, don't use it +paging = False language_support = True safesearch = True # search-url -base_url = 'https://gigablast.com/' -search_string = 'search?{query}'\ - '&n={number_of_results}'\ - '&c=main'\ - '&s={offset}'\ - '&format=json'\ - '&langcountry={lang}'\ - '&ff={safesearch}'\ - '&rand={rxikd}' -# specific xpath variables -results_xpath = '//response//result' -url_xpath = './/url' -title_xpath = './/title' -content_xpath = './/sum' - -supported_languages_url = 'https://gigablast.com/search?&rxikd=1' - -extra_param = '' # gigablast requires a random extra parameter -# which can be extracted from the source code of the search page +base_url = 'https://gigablast.com' +# ugly hack: gigablast requires a random extra parameter which can be extracted +# from the source code of the gigablast HTTP client +extra_param = '' +extra_param_path='/search?c=main&qlangcountry=en-us&q=south&s=10' def parse_extra_param(text): - global extra_param - param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')] - extra_param = '' - for l in param_lines: - extra_param += l.split("'")[1] - extra_param = extra_param.split('&')[-1] + # example: + # + # var uxrl='/search?c=main&qlangcountry=en-us&q=south&s=10&rand=1590740241635&n'; + # uxrl=uxrl+'sab=730863287'; + # + # extra_param --> "rand=1590740241635&nsab=730863287" -def init(engine_settings=None): - parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text) + global extra_param # pylint: disable=global-statement + re_var= None + for line in text.splitlines(): + if re_var is None and extra_param_path in line: + var = line.split("=")[0].split()[1] # e.g. var --> 'uxrl' + re_var = re.compile(var + "\\s*=\\s*" + var + "\\s*\\+\\s*'" + "(.*)" + "'(.*)") + extra_param = line.split("'")[1][len(extra_param_path):] + continue + if re_var is not None and re_var.search(line): + extra_param += re_var.search(line).group(1) + break + # logger.debug('gigablast extra_param="%s"', extra_param) + +def init(engine_settings=None): # pylint: disable=unused-argument + parse_extra_param(get(base_url + extra_param_path).text) # do search-request -def request(query, params): - print("EXTRAPARAM:", extra_param) - offset = (params['pageno'] - 1) * number_of_results +def request(query, params): # pylint: disable=unused-argument - if params['language'] == 'all': - language = 'xx' - else: - language = params['language'].replace('-', '_').lower() - if language.split('-')[0] != 'zh': - language = language.split('-')[0] + # see API http://www.gigablast.com/api.html#/search + # Take into account, that the API has some quirks .. + + query_args = dict( + c = 'main' + , format = 'json' + , q = query + , dr = 1 + , showgoodimages = 0 + ) + + if params['language'] and params['language'] != 'all': + query_args['qlangcountry'] = params['language'] + query_args['qlang'] = params['language'].split('-')[0] if params['safesearch'] >= 1: - safesearch = 1 - else: - safesearch = 0 + query_args['ff'] = 1 - # rxieu is some kind of hash from the search query, but accepts random atm - search_path = search_string.format(query=urlencode({'q': query}), - offset=offset, - number_of_results=number_of_results, - lang=language, - rxikd=int(time() * 1000), - safesearch=safesearch) - - params['url'] = base_url + search_path + '&' + extra_param + search_url = '/search?' + urlencode(query_args) + params['url'] = base_url + search_url + extra_param return params - # get response from search-request def response(resp): results = [] - # parse results - try: - response_json = loads(resp.text) - except: - parse_extra_param(resp.text) - raise Exception('extra param expired, please reload') + response_json = loads(resp.text) + + # logger.debug('gigablast returns %s results', len(response_json['results'])) for result in response_json['results']: - # append result - results.append({'url': result['url'], - 'title': result['title'], - 'content': result['sum']}) + # see "Example JSON Output (&format=json)" + # at http://www.gigablast.com/api.html#/search + + # sort out meaningless result + + title = result.get('title') + if len(title) < 2: + continue + + url = result.get('url') + if len(url) < 9: + continue + + content = result.get('sum') + if len(content) < 5: + continue + + # extend fields + + subtitle = result.get('title') + if len(subtitle) > 3 and subtitle != title: + title += " - " + subtitle + + results.append(dict( + url = url + , title = title + , content = content + )) - # return results return results - - -# get supported languages from their site -def _fetch_supported_languages(resp): - supported_languages = [] - dom = fromstring(resp.text) - links = eval_xpath(dom, '//span[@id="menu2"]/a') - for link in links: - href = eval_xpath(link, './@href')[0].split('lang%3A') - if len(href) == 2: - code = href[1].split('_') - if len(code) == 2: - code = code[0] + '-' + code[1].upper() - else: - code = code[0] - supported_languages.append(code) - - return supported_languages diff --git a/searx/engines/yacy.py b/searx/engines/yacy.py index 25bc83687..f1d4c6abe 100644 --- a/searx/engines/yacy.py +++ b/searx/engines/yacy.py @@ -75,7 +75,7 @@ def response(resp): for result in search_results[0].get('items', []): # parse image results - if result.get('image'): + if result.get('image') and result.get('width') and result.get('height'): result_url = '' if 'url' in result: diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 36c1a11f8..a6b4aeb9f 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -33,7 +33,7 @@ supported_languages_url = 'https://search.yahoo.com/web/advanced' results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" url_xpath = './/h3/a/@href' title_xpath = './/h3/a' -content_xpath = './/div[@class="compText aAbs"]' +content_xpath = './/div[contains(@class, "compText")]' suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a" time_range_dict = {'day': ['1d', 'd'], diff --git a/searx/static/themes/oscar/css/logicodev-dark.css b/searx/static/themes/oscar/css/logicodev-dark.css index 71435b08b..dadf6f140 100644 --- a/searx/static/themes/oscar/css/logicodev-dark.css +++ b/searx/static/themes/oscar/css/logicodev-dark.css @@ -70,7 +70,21 @@ input[type=checkbox]:not(:checked) + .label_hide_if_checked + .label_hide_if_not -ms-user-select: none; } .onoffswitch-checkbox { - display: none; + opacity: 0; + position: absolute; +} +.onoffswitch-checkbox:before { + content: ""; + display: inline-block; + width: 16px; + height: 16px; + margin-right: 10px; + position: absolute; + left: 0; + bottom: 1px; + background-color: #fff; + border: 1px solid #ccc; + border-radius: 0px; } .onoffswitch-label { display: block; @@ -104,7 +118,7 @@ input[type=checkbox]:not(:checked) + .label_hide_if_checked + .label_hide_if_not top: 0; bottom: 0; right: 0px; - border: 2px solid #FFFFFF !important; + border: 2px solid #FFFFFF; border-radius: 50px !important; transition: all 0.3s ease-in 0s; } @@ -115,6 +129,9 @@ input[type=checkbox]:not(:checked) + .label_hide_if_checked + .label_hide_if_not right: 71px; background-color: #A1A1A1; } +.onoffswitch-checkbox:focus + .onoffswitch-label .onoffswitch-switch { + border: 3px solid #444444; +} .result_header { margin-top: 0px; margin-bottom: 2px; @@ -377,6 +394,17 @@ Ny0yNFQxMToxNTowMCswMjowMP7RDgQAAAAZdEVYdFNvZnR3YXJlAHd3dy5pbmtzY2FwZS5vcmeb .search-margin { margin-bottom: 0.6em; } +.visually-hidden { + position: absolute !important; + height: 1px; + width: 1px; + overflow: hidden; + clip: rect(1px 1px 1px 1px); + /* IE6, IE7 */ + clip: rect(1px, 1px, 1px, 1px); + white-space: nowrap; + /* added line */ +} #advanced-search-container { display: none; text-align: left; @@ -407,8 +435,8 @@ Ny0yNFQxMToxNTowMCswMjowMP7RDgQAAAAZdEVYdFNvZnR3YXJlAHd3dy5pbmtzY2FwZS5vcmeb font-weight: bold; border-bottom: #01d7d4 5px solid; } -#check-advanced { - display: none; +#check-advanced:focus + label { + text-decoration: underline; } #check-advanced:checked ~ #advanced-search-container { display: block; diff --git a/searx/static/themes/oscar/css/logicodev-dark.min.css b/searx/static/themes/oscar/css/logicodev-dark.min.css index bfb7b8d6a..ccafea854 100644 Binary files a/searx/static/themes/oscar/css/logicodev-dark.min.css and b/searx/static/themes/oscar/css/logicodev-dark.min.css differ diff --git a/searx/static/themes/oscar/css/logicodev.css b/searx/static/themes/oscar/css/logicodev.css index 77f5f34cf..f082f273b 100644 --- a/searx/static/themes/oscar/css/logicodev.css +++ b/searx/static/themes/oscar/css/logicodev.css @@ -43,7 +43,21 @@ input[type=checkbox]:not(:checked) + .label_hide_if_checked + .label_hide_if_not -ms-user-select: none; } .onoffswitch-checkbox { - display: none; + opacity: 0; + position: absolute; +} +.onoffswitch-checkbox:before { + content: ""; + display: inline-block; + width: 16px; + height: 16px; + margin-right: 10px; + position: absolute; + left: 0; + bottom: 1px; + background-color: #fff; + border: 1px solid #ccc; + border-radius: 0px; } .onoffswitch-label { display: block; @@ -77,7 +91,7 @@ input[type=checkbox]:not(:checked) + .label_hide_if_checked + .label_hide_if_not top: 0; bottom: 0; right: 0px; - border: 2px solid #FFFFFF !important; + border: 2px solid #FFFFFF; border-radius: 50px !important; transition: all 0.3s ease-in 0s; } @@ -88,6 +102,9 @@ input[type=checkbox]:not(:checked) + .label_hide_if_checked + .label_hide_if_not right: 71px; background-color: #A1A1A1; } +.onoffswitch-checkbox:focus + .onoffswitch-label .onoffswitch-switch { + border: 3px solid #444444; +} .result_header { margin-top: 0px; margin-bottom: 2px; @@ -350,6 +367,17 @@ Ny0yNFQxMToxNTowMCswMjowMP7RDgQAAAAZdEVYdFNvZnR3YXJlAHd3dy5pbmtzY2FwZS5vcmeb .search-margin { margin-bottom: 0.6em; } +.visually-hidden { + position: absolute !important; + height: 1px; + width: 1px; + overflow: hidden; + clip: rect(1px 1px 1px 1px); + /* IE6, IE7 */ + clip: rect(1px, 1px, 1px, 1px); + white-space: nowrap; + /* added line */ +} #advanced-search-container { display: none; text-align: left; @@ -380,8 +408,8 @@ Ny0yNFQxMToxNTowMCswMjowMP7RDgQAAAAZdEVYdFNvZnR3YXJlAHd3dy5pbmtzY2FwZS5vcmeb font-weight: bold; border-bottom: #01d7d4 5px solid; } -#check-advanced { - display: none; +#check-advanced:focus + label { + text-decoration: underline; } #check-advanced:checked ~ #advanced-search-container { display: block; diff --git a/searx/static/themes/oscar/css/logicodev.min.css b/searx/static/themes/oscar/css/logicodev.min.css index 97c4efb4a..37549eaa6 100644 Binary files a/searx/static/themes/oscar/css/logicodev.min.css and b/searx/static/themes/oscar/css/logicodev.min.css differ diff --git a/searx/static/themes/oscar/less/logicodev/advanced.less b/searx/static/themes/oscar/less/logicodev/advanced.less index 4c3827b30..b8e10de4d 100644 --- a/searx/static/themes/oscar/less/logicodev/advanced.less +++ b/searx/static/themes/oscar/less/logicodev/advanced.less @@ -31,8 +31,8 @@ } } -#check-advanced { - display: none; +#check-advanced:focus + label { + text-decoration: underline; } #check-advanced:checked ~ #advanced-search-container { diff --git a/searx/static/themes/oscar/less/logicodev/onoff.less b/searx/static/themes/oscar/less/logicodev/onoff.less index f47189216..fbd2983eb 100644 --- a/searx/static/themes/oscar/less/logicodev/onoff.less +++ b/searx/static/themes/oscar/less/logicodev/onoff.less @@ -9,7 +9,21 @@ -ms-user-select: none; } .onoffswitch-checkbox { - display: none; + opacity: 0; + position: absolute; +} +.onoffswitch-checkbox:before { + content: ""; + display: inline-block; + width: 16px; + height: 16px; + margin-right: 10px; + position: absolute; + left: 0; + bottom: 1px; + background-color: #fff; + border: 1px solid #ccc; + border-radius: 0px; } .onoffswitch-label { display: block; @@ -44,7 +58,7 @@ top: 0; bottom: 0; right: 0px; - border: 2px solid #FFFFFF !important; + border: 2px solid #FFFFFF; border-radius: 50px !important; transition: all 0.3s ease-in 0s; } @@ -55,3 +69,6 @@ right: 71px; background-color: #A1A1A1; } +.onoffswitch-checkbox:focus + .onoffswitch-label .onoffswitch-switch { + border: 3px solid #444444; +} diff --git a/searx/static/themes/oscar/less/logicodev/search.less b/searx/static/themes/oscar/less/logicodev/search.less index d65e30563..ff94bfefb 100644 --- a/searx/static/themes/oscar/less/logicodev/search.less +++ b/searx/static/themes/oscar/less/logicodev/search.less @@ -77,4 +77,14 @@ Ny0yNFQxMToxNTowMCswMjowMP7RDgQAAAAZdEVYdFNvZnR3YXJlAHd3dy5pbmtzY2FwZS5vcmeb .search-margin { margin-bottom: 0.6em; -} \ No newline at end of file +} + +.visually-hidden { + position: absolute !important; + height: 1px; + width: 1px; + overflow: hidden; + clip: rect(1px 1px 1px 1px); /* IE6, IE7 */ + clip: rect(1px, 1px, 1px, 1px); + white-space: nowrap; /* added line */ +} diff --git a/searx/templates/__common__/about.html b/searx/templates/__common__/about.html index d3e8d06a6..9741b5162 100644 --- a/searx/templates/__common__/about.html +++ b/searx/templates/__common__/about.html @@ -59,12 +59,16 @@

How to set as the default search engine?

-
Firefox
+

+ Searx supports OpenSearch. + For more information on changing your default search engine, see your browser's documentation: +

-
- Install - searx as a search engine on any version of Firefox! (javascript required) -
+

Where to find anonymous usage statistics of this instance ?

diff --git a/searx/templates/oscar/advanced.html b/searx/templates/oscar/advanced.html index bf5f86324..0b13d50d6 100644 --- a/searx/templates/oscar/advanced.html +++ b/searx/templates/oscar/advanced.html @@ -1,4 +1,4 @@ - +