Merge pull request #2460 from dalf/engine-about
[enh] engines: add about variable
This commit is contained in:
		
						commit
						1d13ad8452
					
				| @ -1,7 +1,21 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  1337x | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import quote, urljoin | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://1337x.to/', | ||||
|     "wikidata_id": 'Q28134166', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| url = 'https://1337x.to/' | ||||
| search_url = url + 'search/{search_term}/{pageno}/' | ||||
|  | ||||
| @ -1,18 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Acgsou (Japanese Animation/Music/Comics Bittorrent tracker) | ||||
| 
 | ||||
|  @website      https://www.acgsou.com/ | ||||
|  @provide-api  no | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse        url, title, content, seed, leech, torrentfile | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.acgsou.com/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'images', 'videos', 'music'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,19 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Ahmia (Onions) | ||||
| 
 | ||||
|  @website      http://msydqstlz2kzerdg.onion | ||||
|  @provides-api no | ||||
| 
 | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no | ||||
|  @parse        url, title, content | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urlparse, parse_qs | ||||
| from lxml.html import fromstring | ||||
| from searx.engines.xpath import extract_url, extract_text, eval_xpath_list, eval_xpath | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'http://msydqstlz2kzerdg.onion', | ||||
|     "wikidata_id": 'Q18693938', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine config | ||||
| categories = ['onions'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,18 +1,21 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  APK Mirror | ||||
| 
 | ||||
|  @website     https://www.apkmirror.com | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, thumbnail_src | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.apkmirror.com', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
|  | ||||
| @ -1,20 +1,24 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Arch Linux Wiki | ||||
| 
 | ||||
|  @website      https://wiki.archlinux.org | ||||
|  @provide-api  no (Mediawiki provides API, but Arch Wiki blocks access to it | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse        url, title | ||||
|  API: Mediawiki provides API, but Arch Wiki blocks access to it | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urljoin | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://wiki.archlinux.org/', | ||||
|     "wikidata_id": 'Q101445877', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| language_support = True | ||||
|  | ||||
| @ -1,20 +1,21 @@ | ||||
| #!/usr/bin/env python | ||||
| 
 | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  ArXiV (Scientific preprints) | ||||
|  @website     https://arxiv.org | ||||
|  @provide-api yes (export.arxiv.org/api/query) | ||||
|  @using-api   yes | ||||
|  @results     XML-RSS | ||||
|  @stable      yes | ||||
|  @parse       url, title, publishedDate, content | ||||
|  More info on api: https://arxiv.org/help/api/user-manual | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from datetime import datetime | ||||
| from searx.utils import eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://arxiv.org', | ||||
|     "wikidata_id": 'Q118398', | ||||
|     "official_api_documentation": 'https://arxiv.org/help/api', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'XML-RSS', | ||||
| } | ||||
| 
 | ||||
| categories = ['science'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,16 +1,6 @@ | ||||
| #!/usr/bin/env python | ||||
| 
 | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  BASE (Scholar publications) | ||||
| 
 | ||||
|  @website     https://base-search.net | ||||
|  @provide-api yes with authorization (https://api.base-search.net/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     XML | ||||
|  @stable      ? | ||||
|  @parse       url, title, publishedDate, content | ||||
|  More info on api: http://base-search.net/about/download/base_interface.pdf | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| @ -19,6 +9,15 @@ from datetime import datetime | ||||
| import re | ||||
| from searx.utils import searx_useragent | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://base-search.net', | ||||
|     "wikidata_id": 'Q448335', | ||||
|     "official_api_documentation": 'https://api.base-search.net/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'XML', | ||||
| } | ||||
| 
 | ||||
| categories = ['science'] | ||||
| 
 | ||||
|  | ||||
| @ -1,16 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Bing (Web) | ||||
| 
 | ||||
|  @website     https://www.bing.com | ||||
|  @provide-api yes (http://datamarket.azure.com/dataset/bing/search), | ||||
|               max. 5000 query/month | ||||
| 
 | ||||
|  @using-api   no (because of query limit) | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content | ||||
| 
 | ||||
|  @todo        publishedDate | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| @ -21,6 +11,16 @@ from searx.utils import eval_xpath, extract_text, match_language | ||||
| 
 | ||||
| logger = logger.getChild('bing engine') | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.bing.com', | ||||
|     "wikidata_id": 'Q182496', | ||||
|     "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-web-search-api', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,15 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Bing (Images) | ||||
| 
 | ||||
|  @website     https://www.bing.com/images | ||||
|  @provide-api yes (http://datamarket.azure.com/dataset/bing/search), | ||||
|               max. 5000 query/month | ||||
| 
 | ||||
|  @using-api   no (because of query limit) | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, img_src | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| @ -20,6 +11,16 @@ from searx.utils import match_language | ||||
| from searx.engines.bing import language_aliases | ||||
| from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.bing.com/images', | ||||
|     "wikidata_id": 'Q182496', | ||||
|     "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['images'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,14 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Bing (News) | ||||
| 
 | ||||
|  @website     https://www.bing.com/news | ||||
|  @provide-api yes (http://datamarket.azure.com/dataset/bing/search), | ||||
|               max. 5000 query/month | ||||
| 
 | ||||
|  @using-api   no (because of query limit) | ||||
|  @results     RSS (using search portal) | ||||
|  @stable      yes (except perhaps for the images) | ||||
|  @parse       url, title, content, publishedDate, thumbnail | ||||
| """ | ||||
| 
 | ||||
| from datetime import datetime | ||||
| @ -20,6 +12,16 @@ from searx.utils import match_language, eval_xpath_getindex | ||||
| from searx.engines.bing import language_aliases | ||||
| from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.bing.com/news', | ||||
|     "wikidata_id": 'Q2878637', | ||||
|     "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-news-search-api', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'RSS', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['news'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Bing (Videos) | ||||
| 
 | ||||
|  @website     https://www.bing.com/videos | ||||
|  @provide-api yes (http://datamarket.azure.com/dataset/bing/search) | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no | ||||
|  @parse       url, title, content, thumbnail | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| @ -18,6 +11,16 @@ from searx.utils import match_language | ||||
| from searx.engines.bing import language_aliases | ||||
| from searx.engines.bing import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.bing.com/videos', | ||||
|     "wikidata_id": 'Q4914152', | ||||
|     "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-video-search-api', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| categories = ['videos'] | ||||
| paging = True | ||||
| safesearch = True | ||||
|  | ||||
| @ -1,19 +1,25 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  BTDigg (Videos, Music, Files) | ||||
| 
 | ||||
|  @website     https://btdig.com | ||||
|  @provide-api yes (on demand) | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content, seed, leech, magnetlink | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from urllib.parse import quote, urljoin | ||||
| from searx.utils import extract_text, get_torrent_size | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://btdig.com', | ||||
|     "wikidata_id": 'Q4836698', | ||||
|     "official_api_documentation": { | ||||
|         'url': 'https://btdig.com/contacts', | ||||
|         'comment': 'on demand' | ||||
|     }, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos', 'music', 'files'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,18 +1,7 @@ | ||||
| ''' | ||||
| searx is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU Affero General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| searx is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU Affero General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU Affero General Public License | ||||
| along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| ''' | ||||
| 
 | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Command (offline) | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from os.path import expanduser, isabs, realpath, commonprefix | ||||
|  | ||||
| @ -1,5 +1,19 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  currency convert (DuckDuckGo) | ||||
| """ | ||||
| 
 | ||||
| import json | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://duckduckgo.com/', | ||||
|     "wikidata_id": 'Q12805', | ||||
|     "official_api_documentation": 'https://duckduckgo.com/api', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSONP', | ||||
| } | ||||
| 
 | ||||
| engine_type = 'online_currency' | ||||
| categories = [] | ||||
|  | ||||
| @ -1,15 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Dailymotion (Videos) | ||||
| 
 | ||||
|  @website     https://www.dailymotion.com | ||||
|  @provide-api yes (http://www.dailymotion.com/developer) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, thumbnail, publishedDate, embedded | ||||
| 
 | ||||
|  @todo        set content-parameter with correct data | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| @ -17,6 +8,16 @@ from datetime import datetime | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import match_language, html_to_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.dailymotion.com', | ||||
|     "wikidata_id": 'Q769222', | ||||
|     "official_api_documentation": 'https://www.dailymotion.com/developer', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,18 +1,21 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Deezer (Music) | ||||
| 
 | ||||
|  @website     https://deezer.com | ||||
|  @provide-api yes (http://developers.deezer.com/api/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content, embedded | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://deezer.com', | ||||
|     "wikidata_id": 'Q602243', | ||||
|     "official_api_documentation": 'https://developers.deezer.com/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['music'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,21 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Deviantart (Images) | ||||
| 
 | ||||
|  @website     https://www.deviantart.com/ | ||||
|  @provide-api yes (https://www.deviantart.com/developers/) (RSS) | ||||
| 
 | ||||
|  @using-api   no (TODO, rewrite to api) | ||||
|  @results     HTML | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, img_src | ||||
| 
 | ||||
|  @todo        rewrite to api | ||||
| """ | ||||
| # pylint: disable=missing-function-docstring | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.deviantart.com/', | ||||
|     "wikidata_id": 'Q46523', | ||||
|     "official_api_documentation": 'https://www.deviantart.com/developers/', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['images'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,18 +1,21 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Dictzone | ||||
| 
 | ||||
|  @website     https://dictzone.com/ | ||||
|  @provide-api no | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urljoin | ||||
| from lxml import html | ||||
| from searx.utils import eval_xpath | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://dictzone.com/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| engine_type = 'online_dictionnary' | ||||
| categories = ['general'] | ||||
|  | ||||
| @ -1,19 +1,21 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  DigBT (Videos, Music, Files) | ||||
| 
 | ||||
|  @website     https://digbt.org | ||||
|  @provide-api no | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content, magnetlink | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urljoin | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, get_torrent_size | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://digbt.org', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| categories = ['videos', 'music', 'files'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Digg (News, Social media) | ||||
| 
 | ||||
|  @website     https://digg.com | ||||
|  @provide-api no | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content, publishedDate, thumbnail | ||||
| """ | ||||
| # pylint: disable=missing-function-docstring | ||||
| 
 | ||||
| @ -17,6 +10,16 @@ from datetime import datetime | ||||
| 
 | ||||
| from lxml import html | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://digg.com', | ||||
|     "wikidata_id": 'Q270478', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['news', 'social media'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,18 +1,22 @@ | ||||
| # Doku Wiki | ||||
| # | ||||
| # @website     https://www.dokuwiki.org/ | ||||
| # @provide-api yes | ||||
| #              (https://www.dokuwiki.org/devel:xmlrpc) | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     HTML | ||||
| # @stable      yes | ||||
| # @parse       (general)    url, title, content | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Doku Wiki | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml.html import fromstring | ||||
| from searx.utils import extract_text, eval_xpath | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.dokuwiki.org/', | ||||
|     "wikidata_id": 'Q851864', | ||||
|     "official_api_documentation": 'https://www.dokuwiki.org/devel:xmlrpc', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general']  # TODO , 'images', 'music', 'videos', 'files' | ||||
| paging = False | ||||
|  | ||||
| @ -1,22 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  DuckDuckGo (Web) | ||||
| 
 | ||||
|  @website     https://duckduckgo.com/ | ||||
|  @provide-api yes (https://duckduckgo.com/api), | ||||
|               but not all results from search-site | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content | ||||
| 
 | ||||
|  @todo        rewrite to api | ||||
| """ | ||||
| 
 | ||||
| from lxml.html import fromstring | ||||
| from json import loads | ||||
| from searx.utils import extract_text, match_language, eval_xpath | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://duckduckgo.com/', | ||||
|     "wikidata_id": 'Q12805', | ||||
|     "official_api_documentation": 'https://duckduckgo.com/api', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| paging = False | ||||
|  | ||||
| @ -1,12 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
| DuckDuckGo (definitions) | ||||
| 
 | ||||
| - `Instant Answer API`_ | ||||
| - `DuckDuckGo query`_ | ||||
| 
 | ||||
| .. _Instant Answer API: https://duckduckgo.com/api | ||||
| .. _DuckDuckGo query: https://api.duckduckgo.com/?q=DuckDuckGo&format=json&pretty=1 | ||||
| 
 | ||||
|  DuckDuckGo (Instant Answer API) | ||||
| """ | ||||
| 
 | ||||
| import json | ||||
| @ -22,6 +16,16 @@ from searx.external_urls import get_external_url, get_earth_coordinates_url, are | ||||
| 
 | ||||
| logger = logger.getChild('duckduckgo_definitions') | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://duckduckgo.com/', | ||||
|     "wikidata_id": 'Q12805', | ||||
|     "official_api_documentation": 'https://duckduckgo.com/api', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| URL = 'https://api.duckduckgo.com/'\ | ||||
|     + '?{query}&format=json&pretty=0&no_redirect=1&d=1' | ||||
| 
 | ||||
|  | ||||
| @ -1,16 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  DuckDuckGo (Images) | ||||
| 
 | ||||
|  @website     https://duckduckgo.com/ | ||||
|  @provide-api yes (https://duckduckgo.com/api), | ||||
|               but images are not supported | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     JSON (site requires js to get images) | ||||
|  @stable      no (JSON can change) | ||||
|  @parse       url, title, img_src | ||||
| 
 | ||||
|  @todo        avoid extra request | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| @ -20,6 +10,19 @@ from searx.engines.duckduckgo import get_region_code | ||||
| from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import | ||||
| from searx.poolrequests import get | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://duckduckgo.com/', | ||||
|     "wikidata_id": 'Q12805', | ||||
|     "official_api_documentation": { | ||||
|         'url': 'https://duckduckgo.com/api', | ||||
|         'comment': 'but images are not supported', | ||||
|     }, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON (site requires js to get images)', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['images'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,11 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Duden | ||||
|  @website     https://www.duden.de | ||||
|  @provide-api no | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| @ -13,6 +8,16 @@ from urllib.parse import quote, urljoin | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.duden.de', | ||||
|     "wikidata_id": 'Q73624591', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| categories = ['general'] | ||||
| paging = True | ||||
| language_support = False | ||||
|  | ||||
| @ -1,11 +1,19 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Dummy Offline | ||||
| 
 | ||||
|  @results     one result | ||||
|  @stable      yes | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| def search(query, request_params): | ||||
|     return [{ | ||||
|         'result': 'this is what you get', | ||||
|  | ||||
| @ -1,10 +1,18 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Dummy | ||||
| 
 | ||||
|  @results     empty array | ||||
|  @stable      yes | ||||
| """ | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": None, | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'empty array', | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|  | ||||
| @ -1,17 +1,22 @@ | ||||
| #  Ebay (Videos, Music, Files) | ||||
| # | ||||
| # @website     https://www.ebay.com | ||||
| # @provide-api no (nothing found) | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     HTML (using search portal) | ||||
| # @stable      yes (HTML can change) | ||||
| # @parse       url, title, content, price, shipping, source | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Ebay (Videos, Music, Files) | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| from urllib.parse import quote | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.ebay.com', | ||||
|     "wikidata_id": 'Q58024', | ||||
|     "official_api_documentation": 'https://developer.ebay.com/', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| categories = ['shopping'] | ||||
| paging = True | ||||
| 
 | ||||
|  | ||||
| @ -1,3 +1,8 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Elasticsearch | ||||
| """ | ||||
| 
 | ||||
| from json import loads, dumps | ||||
| from requests.auth import HTTPBasicAuth | ||||
| from searx.exceptions import SearxEngineAPIException | ||||
|  | ||||
| @ -1,18 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  eTools (Web) | ||||
| 
 | ||||
|  @website      https://www.etools.ch | ||||
|  @provide-api  no | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse        url, title, content | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from urllib.parse import quote | ||||
| from searx.utils import extract_text, eval_xpath | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.etools.ch', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| categories = ['general'] | ||||
| paging = False | ||||
| language_support = False | ||||
|  | ||||
| @ -1,18 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  F-Droid (a repository of FOSS applications for Android) | ||||
| 
 | ||||
|  @website      https://f-droid.org/ | ||||
|  @provide-api  no | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse        url, title, content | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.utils import extract_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://f-droid.org/', | ||||
|     "wikidata_id": 'Q1386210', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,21 +1,23 @@ | ||||
| #!/usr/bin/env python | ||||
| 
 | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Flickr (Images) | ||||
| 
 | ||||
|  @website     https://www.flickr.com | ||||
|  @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, thumbnail, img_src | ||||
|  More info on api-key : https://www.flickr.com/services/apps/create/ | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.flickr.com', | ||||
|     "wikidata_id": 'Q103204', | ||||
|     "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": True, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| categories = ['images'] | ||||
| 
 | ||||
| nb_per_page = 15 | ||||
|  | ||||
| @ -1,15 +1,6 @@ | ||||
| #!/usr/bin/env python | ||||
| 
 | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Flickr (Images) | ||||
| 
 | ||||
|  @website     https://www.flickr.com | ||||
|  @provide-api yes (https://secure.flickr.com/services/api/flickr.photos.search.html) | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no | ||||
|  @parse       url, title, thumbnail, img_src | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| @ -21,6 +12,16 @@ from searx.utils import ecma_unescape, html_to_text | ||||
| 
 | ||||
| logger = logger.getChild('flickr-noapi') | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.flickr.com', | ||||
|     "wikidata_id": 'Q103204', | ||||
|     "official_api_documentation": 'https://secure.flickr.com/services/api/flickr.photos.search.html', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| categories = ['images'] | ||||
| 
 | ||||
| url = 'https://www.flickr.com/' | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  FramaLibre (It) | ||||
| 
 | ||||
|  @website     https://framalibre.org/ | ||||
|  @provide-api no | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content, thumbnail, img_src | ||||
| """ | ||||
| 
 | ||||
| from html import escape | ||||
| @ -15,6 +8,16 @@ from urllib.parse import urljoin, urlencode | ||||
| from lxml import html | ||||
| from searx.utils import extract_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://framalibre.org/', | ||||
|     "wikidata_id": 'Q30213882', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,17 +1,24 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
| Frinkiac (Images) | ||||
| 
 | ||||
| @website     https://www.frinkiac.com | ||||
| @provide-api no | ||||
| @using-api   no | ||||
| @results     JSON | ||||
| @stable      no | ||||
| @parse       url, title, img_src | ||||
|  Frinkiac (Images) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://frinkiac.com', | ||||
|     "wikidata_id": 'Q24882614', | ||||
|     "official_api_documentation": { | ||||
|         'url': None, | ||||
|         'comment': 'see https://github.com/MitchellAW/CompuGlobal' | ||||
|     }, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| categories = ['images'] | ||||
| 
 | ||||
| BASE = 'https://frinkiac.com/' | ||||
|  | ||||
| @ -1,19 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
| Genius | ||||
| 
 | ||||
|  @website     https://www.genius.com/ | ||||
|  @provide-api yes (https://docs.genius.com/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content, thumbnail, publishedDate | ||||
|  Genius | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| from datetime import datetime | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://genius.com/', | ||||
|     "wikidata_id": 'Q3419343', | ||||
|     "official_api_documentation": 'https://docs.genius.com/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['music'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,20 +1,22 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| 
 | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Gentoo Wiki | ||||
| 
 | ||||
|  @website      https://wiki.gentoo.org | ||||
|  @provide-api  yes | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse        url, title | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urljoin | ||||
| from lxml import html | ||||
| from searx.utils import extract_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://wiki.gentoo.org/', | ||||
|     "wikidata_id": 'Q1050637', | ||||
|     "official_api_documentation": 'https://wiki.gentoo.org/api.php', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| language_support = True | ||||
|  | ||||
| @ -1,14 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Gigablast (Web) | ||||
| 
 | ||||
|  @website     https://gigablast.com | ||||
|  @provide-api yes (https://gigablast.com/api.html) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     XML | ||||
|  @stable      yes | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| # pylint: disable=missing-function-docstring, invalid-name | ||||
| 
 | ||||
| @ -18,6 +10,16 @@ from urllib.parse import urlencode | ||||
| # from searx import logger | ||||
| from searx.poolrequests import get | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.gigablast.com', | ||||
|     "wikidata_id": 'Q3105449', | ||||
|     "official_api_documentation": 'https://gigablast.com/api.html', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| # gigablast's pagination is totally damaged, don't use it | ||||
|  | ||||
| @ -1,18 +1,21 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Github (It) | ||||
| 
 | ||||
|  @website     https://github.com/ | ||||
|  @provide-api yes (https://developer.github.com/v3/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes (using api) | ||||
|  @parse       url, title, content | ||||
|  Github (IT) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://github.com/', | ||||
|     "wikidata_id": 'Q364', | ||||
|     "official_api_documentation": 'https://developer.github.com/v3/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| 
 | ||||
|  | ||||
| @ -1,19 +1,11 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """Google (Web) | ||||
| 
 | ||||
| :website:     https://www.google.com | ||||
| :provide-api: yes (https://developers.google.com/custom-search/) | ||||
| :using-api:   not the offical, since it needs registration to another service | ||||
| :results:     HTML | ||||
| :stable:      no | ||||
| :parse:       url, title, content, number_of_results, answer, suggestion, correction | ||||
|  For detailed description of the *REST-full* API see: `Query Parameter | ||||
|  Definitions`_. | ||||
| 
 | ||||
| For detailed description of the *REST-full* API see: `Query Parameter | ||||
| Definitions`_. | ||||
| 
 | ||||
| .. _Query Parameter Definitions: | ||||
|  .. _Query Parameter Definitions: | ||||
|  https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| # pylint: disable=invalid-name, missing-function-docstring | ||||
| @ -27,6 +19,16 @@ from searx.exceptions import SearxEngineCaptchaException | ||||
| 
 | ||||
| logger = logger.getChild('google engine') | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.google.com', | ||||
|     "wikidata_id": 'Q9366', | ||||
|     "official_api_documentation": 'https://developers.google.com/custom-search/', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,14 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """Google (Images) | ||||
| 
 | ||||
| :website:     https://images.google.com (redirected to subdomain www.) | ||||
| :provide-api: yes (https://developers.google.com/custom-search/) | ||||
| :using-api:   not the offical, since it needs registration to another service | ||||
| :results:     HTML | ||||
| :stable:      no | ||||
| :template:    images.html | ||||
| :parse:       url, title, content, source, thumbnail_src, img_src | ||||
| 
 | ||||
| For detailed description of the *REST-full* API see: `Query Parameter | ||||
| Definitions`_. | ||||
| 
 | ||||
| @ -18,10 +10,6 @@ Definitions`_. | ||||
|    ``data:` scheme).:: | ||||
| 
 | ||||
|      Header set Content-Security-Policy "img-src 'self' data: ;" | ||||
| 
 | ||||
| .. _Query Parameter Definitions: | ||||
|    https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urlparse, unquote | ||||
| @ -39,6 +27,16 @@ from searx.engines.google import ( | ||||
| 
 | ||||
| logger = logger.getChild('google images') | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://images.google.com/', | ||||
|     "wikidata_id": 'Q521550', | ||||
|     "official_api_documentation": 'https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions',  # NOQA | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| 
 | ||||
| categories = ['images'] | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Google (News) | ||||
| 
 | ||||
|  @website     https://news.google.com | ||||
|  @provide-api no | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no | ||||
|  @parse       url, title, content, publishedDate | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| @ -15,6 +8,16 @@ from lxml import html | ||||
| from searx.utils import match_language | ||||
| from searx.engines.google import _fetch_supported_languages, supported_languages_url  # NOQA # pylint: disable=unused-import | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://news.google.com', | ||||
|     "wikidata_id": 'Q12020', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # search-url | ||||
| categories = ['news'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Google (Videos) | ||||
| 
 | ||||
|  @website     https://www.google.com | ||||
|  @provide-api yes (https://developers.google.com/custom-search/) | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no | ||||
|  @parse       url, title, content, thumbnail | ||||
| """ | ||||
| 
 | ||||
| from datetime import date, timedelta | ||||
| @ -16,6 +9,16 @@ from lxml import html | ||||
| from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex | ||||
| import re | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.google.com', | ||||
|     "wikidata_id": 'Q219885', | ||||
|     "official_api_documentation": 'https://developers.google.com/custom-search/', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,15 +1,7 @@ | ||||
| #  INA (Videos) | ||||
| # | ||||
| # @website     https://www.ina.fr/ | ||||
| # @provide-api no | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     HTML (using search portal) | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, content, publishedDate, thumbnail | ||||
| # | ||||
| # @todo        set content-parameter with correct data | ||||
| # @todo        embedded (needs some md5 from video page) | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  INA (Videos) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from html import unescape | ||||
| @ -18,6 +10,15 @@ from lxml import html | ||||
| from dateutil import parser | ||||
| from searx.utils import extract_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.ina.fr/', | ||||
|     "wikidata_id": 'Q1665109', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos'] | ||||
|  | ||||
| @ -1,17 +1,22 @@ | ||||
| # Invidious (Videos) | ||||
| # | ||||
| # @website     https://invidio.us/ | ||||
| # @provide-api yes (https://github.com/omarroth/invidious/wiki/API) | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     JSON | ||||
| # @stable      yes | ||||
| # @parse       url, title, content, publishedDate, thumbnail, embedded, author, length | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Invidious (Videos) | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import quote_plus | ||||
| from dateutil import parser | ||||
| import time | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://instances.invidio.us/', | ||||
|     "wikidata_id": 'Q79343316', | ||||
|     "official_api_documentation": 'https://github.com/omarroth/invidious/wiki/API', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ["videos", "music"] | ||||
| paging = True | ||||
|  | ||||
| @ -1,3 +1,5 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| 
 | ||||
| from collections.abc import Iterable | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Kickass Torrent (Videos, Music, Files) | ||||
| 
 | ||||
|  @website     https://kickass.so | ||||
|  @provide-api no (nothing found) | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      yes (HTML can change) | ||||
|  @parse       url, title, content, seed, leech, magnetlink | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| @ -15,6 +8,16 @@ from operator import itemgetter | ||||
| from urllib.parse import quote, urljoin | ||||
| from searx.utils import extract_text, get_torrent_size, convert_str_to_int | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://kickass.so', | ||||
|     "wikidata_id": 'Q17062285', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos', 'music', 'files'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,21 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  general mediawiki-engine (Web) | ||||
| 
 | ||||
|  @website     websites built on mediawiki (https://www.mediawiki.org) | ||||
|  @provide-api yes (http://www.mediawiki.org/wiki/API:Search) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title | ||||
| 
 | ||||
|  @todo        content | ||||
|  General mediawiki-engine (Web) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from string import Formatter | ||||
| from urllib.parse import urlencode, quote | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": None, | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": 'http://www.mediawiki.org/wiki/API:Search', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| language_support = True | ||||
|  | ||||
| @ -1,12 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
| Microsoft Academic (Science) | ||||
| 
 | ||||
| @website     https://academic.microsoft.com | ||||
| @provide-api yes | ||||
| @using-api   no | ||||
| @results     JSON | ||||
| @stable      no | ||||
| @parse       url, title, content | ||||
|  Microsoft Academic (Science) | ||||
| """ | ||||
| 
 | ||||
| from datetime import datetime | ||||
| @ -15,6 +9,16 @@ from uuid import uuid4 | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://academic.microsoft.com', | ||||
|     "wikidata_id": 'Q28136779', | ||||
|     "official_api_documentation": 'http://ma-graph.org/', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| categories = ['images'] | ||||
| paging = True | ||||
| result_url = 'https://academic.microsoft.com/api/search/GetEntityResults?{query}' | ||||
|  | ||||
| @ -1,19 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Mixcloud (Music) | ||||
| 
 | ||||
|  @website     https://http://www.mixcloud.com/ | ||||
|  @provide-api yes (http://www.mixcloud.com/developers/ | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content, embedded, publishedDate | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from dateutil import parser | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.mixcloud.com/', | ||||
|     "wikidata_id": 'Q6883832', | ||||
|     "official_api_documentation": 'http://www.mixcloud.com/developers/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['music'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,19 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  not Evil (Onions) | ||||
| 
 | ||||
|  @website     http://hss3uro2hsxfogfq.onion | ||||
|  @provide-api yes (http://hss3uro2hsxfogfq.onion/api.htm) | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'http://hss3uro2hsxfogfq.onion', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": 'http://hss3uro2hsxfogfq.onion/api.htm', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['onions'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,18 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Nyaa.si (Anime Bittorrent tracker) | ||||
| 
 | ||||
|  @website      https://nyaa.si/ | ||||
|  @provide-api  no | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse        url, title, content, seed, leech, torrentfile | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import extract_text, get_torrent_size, int_or_zero | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://nyaa.si/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'images', 'videos', 'music'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,18 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
| Open Semantic Search | ||||
| 
 | ||||
|  @website    https://www.opensemanticsearch.org/ | ||||
|  @provide-api yes (https://www.opensemanticsearch.org/dev) | ||||
| 
 | ||||
|  @using-api  yes | ||||
|  @results    JSON | ||||
|  @stable     yes | ||||
|  @parse      url, title, content, publishedDate | ||||
|  Open Semantic Search | ||||
| """ | ||||
| 
 | ||||
| from dateutil import parser | ||||
| from json import loads | ||||
| from urllib.parse import quote | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.opensemanticsearch.org/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": 'https://www.opensemanticsearch.org/dev', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| base_url = 'http://localhost:8983/solr/opensemanticsearch/' | ||||
| search_string = 'query?q={query}' | ||||
| 
 | ||||
|  | ||||
| @ -1,19 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  OpenStreetMap (Map) | ||||
| 
 | ||||
|  @website     https://openstreetmap.org/ | ||||
|  @provide-api yes (http://wiki.openstreetmap.org/wiki/Nominatim) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from json import loads | ||||
| from flask_babel import gettext | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.openstreetmap.org/', | ||||
|     "wikidata_id": 'Q936', | ||||
|     "official_api_documentation": 'http://wiki.openstreetmap.org/wiki/Nominatim', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['map'] | ||||
| paging = False | ||||
|  | ||||
| @ -1,18 +1,21 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  PDBe (Protein Data Bank in Europe) | ||||
| 
 | ||||
|  @website       https://www.ebi.ac.uk/pdbe | ||||
|  @provide-api   yes (https://www.ebi.ac.uk/pdbe/api/doc/search.html), | ||||
|                 unlimited | ||||
|  @using-api     yes | ||||
|  @results       python dictionary (from json) | ||||
|  @stable        yes | ||||
|  @parse         url, title, content, img_src | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from flask_babel import gettext | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.ebi.ac.uk/pdbe', | ||||
|     "wikidata_id": 'Q55823905', | ||||
|     "official_api_documentation": 'https://www.ebi.ac.uk/pdbe/api/doc/search.html', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| categories = ['science'] | ||||
| 
 | ||||
| hide_obsolete = False | ||||
|  | ||||
| @ -1,15 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  peertube (Videos) | ||||
| 
 | ||||
|  @website     https://www.peertube.live | ||||
|  @provide-api yes (https://docs.joinpeertube.org/api-rest-reference.html) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, thumbnail, publishedDate, embedded | ||||
| 
 | ||||
|  @todo        implement time range support | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| @ -17,6 +8,16 @@ from datetime import datetime | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://joinpeertube.org', | ||||
|     "wikidata_id": 'Q50938515', | ||||
|     "official_api_documentation": 'https://docs.joinpeertube.org/api-rest-reference.html', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ["videos"] | ||||
| paging = True | ||||
|  | ||||
| @ -1,19 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Photon (Map) | ||||
| 
 | ||||
|  @website     https://photon.komoot.de | ||||
|  @provide-api yes (https://photon.komoot.de/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import searx_useragent | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://photon.komoot.de', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": 'https://photon.komoot.de/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['map'] | ||||
| paging = False | ||||
|  | ||||
| @ -1,12 +1,7 @@ | ||||
| #  Piratebay (Videos, Music, Files) | ||||
| # | ||||
| # @website     https://thepiratebay.org | ||||
| # @provide-api yes (https://apibay.org/) | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     JSON | ||||
| # @stable      no (the API is not documented nor versioned) | ||||
| # @parse       url, title, seed, leech, magnetlink, filesize, publishedDate | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Piratebay (Videos, Music, Files) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from datetime import datetime | ||||
| @ -15,6 +10,16 @@ from operator import itemgetter | ||||
| from urllib.parse import quote | ||||
| from searx.utils import get_torrent_size | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://thepiratebay.org', | ||||
|     "wikidata_id": 'Q22663', | ||||
|     "official_api_documentation": 'https://apibay.org/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ["videos", "music", "files"] | ||||
| 
 | ||||
|  | ||||
| @ -1,14 +1,6 @@ | ||||
| #!/usr/bin/env python | ||||
| 
 | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  PubMed (Scholar publications) | ||||
|  @website     https://www.ncbi.nlm.nih.gov/pubmed/ | ||||
|  @provide-api yes (https://www.ncbi.nlm.nih.gov/home/develop/api/) | ||||
|  @using-api   yes | ||||
|  @results     XML | ||||
|  @stable      yes | ||||
|  @parse       url, title, publishedDate, content | ||||
|  More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/ | ||||
| """ | ||||
| 
 | ||||
| from flask_babel import gettext | ||||
| @ -17,6 +9,18 @@ from datetime import datetime | ||||
| from urllib.parse import urlencode | ||||
| from searx.poolrequests import get | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.ncbi.nlm.nih.gov/pubmed/', | ||||
|     "wikidata_id": 'Q1540899', | ||||
|     "official_api_documentation": { | ||||
|         'url': 'https://www.ncbi.nlm.nih.gov/home/develop/api/', | ||||
|         'comment': 'More info on api: https://www.ncbi.nlm.nih.gov/books/NBK25501/' | ||||
|     }, | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'XML', | ||||
| } | ||||
| 
 | ||||
| categories = ['science'] | ||||
| 
 | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Qwant (Web, Images, News, Social) | ||||
| 
 | ||||
|  @website     https://qwant.com/ | ||||
|  @provide-api not officially (https://api.qwant.com/api/search/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from datetime import datetime | ||||
| @ -17,6 +10,15 @@ from searx.utils import html_to_text, match_language | ||||
| from searx.exceptions import SearxEngineAPIException, SearxEngineCaptchaException | ||||
| from searx.raise_for_httperror import raise_for_httperror | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.qwant.com/', | ||||
|     "wikidata_id": 'Q14657870', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = [] | ||||
|  | ||||
| @ -1,17 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Recoll (local search engine) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, content, size, abstract, author, mtype, subtype, time, \ | ||||
|               filename, label, type, embedded | ||||
| """ | ||||
| 
 | ||||
| from datetime import date, timedelta | ||||
| from json import loads | ||||
| from urllib.parse import urlencode, quote | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": None, | ||||
|     "wikidata_id": 'Q15735774', | ||||
|     "official_api_documentation": 'https://www.lesbonscomptes.com/recoll/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| time_range_support = True | ||||
| 
 | ||||
|  | ||||
| @ -1,19 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Reddit | ||||
| 
 | ||||
|  @website      https://www.reddit.com/ | ||||
|  @provide-api  yes (https://www.reddit.com/dev/api) | ||||
| 
 | ||||
|  @using-api    yes | ||||
|  @results      JSON | ||||
|  @stable       yes | ||||
|  @parse        url, title, content, thumbnail, publishedDate | ||||
| """ | ||||
| 
 | ||||
| import json | ||||
| from datetime import datetime | ||||
| from urllib.parse import urlencode, urljoin, urlparse | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.reddit.com/', | ||||
|     "wikidata_id": 'Q1136', | ||||
|     "official_api_documentation": 'https://www.reddit.com/dev/api', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general', 'images', 'news', 'social media'] | ||||
| page_size = 25 | ||||
|  | ||||
| @ -1,18 +1,21 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  ScanR Structures (Science) | ||||
| 
 | ||||
|  @website     https://scanr.enseignementsup-recherche.gouv.fr | ||||
|  @provide-api yes (https://scanr.enseignementsup-recherche.gouv.fr/api/swagger-ui.html) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content, img_src | ||||
| """ | ||||
| 
 | ||||
| from json import loads, dumps | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://scanr.enseignementsup-recherche.gouv.fr', | ||||
|     "wikidata_id": 'Q44105684', | ||||
|     "official_api_documentation": 'https://scanr.enseignementsup-recherche.gouv.fr/opendata', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['science'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,18 +1,20 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Searchcode (It) | ||||
| 
 | ||||
|  @website     https://searchcode.com/ | ||||
|  @provide-api yes (https://searchcode.com/api/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content | ||||
|  Searchcode (IT) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://searchcode.com/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": 'https://searchcode.com/api/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
|  | ||||
| @ -1,18 +1,20 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Searx (all) | ||||
| 
 | ||||
|  @website     https://github.com/searx/searx | ||||
|  @provide-api yes (https://searx.github.io/searx/dev/search_api.html) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes (using api) | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from searx.engines import categories as searx_categories | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://github.com/searx/searx', | ||||
|     "wikidata_id": 'Q17639196', | ||||
|     "official_api_documentation": 'https://searx.github.io/searx/dev/search_api.html', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| categories = searx_categories.keys() | ||||
| 
 | ||||
|  | ||||
| @ -1,17 +1,23 @@ | ||||
| # SepiaSearch (Videos) | ||||
| # | ||||
| # @website     https://sepiasearch.org | ||||
| # @provide-api https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api | ||||
| # @using-api   yes | ||||
| # @results     JSON | ||||
| # @stable      yes | ||||
| # @parse       url, title, content, publishedDate, thumbnail | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  SepiaSearch (Videos) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from dateutil import parser, relativedelta | ||||
| from urllib.parse import urlencode | ||||
| from datetime import datetime | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://sepiasearch.org', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": "https://framagit.org/framasoft/peertube/search-index/-/tree/master/server/controllers/api",  # NOQA | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| categories = ['videos'] | ||||
| paging = True | ||||
| language_support = True | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Soundcloud (Music) | ||||
| 
 | ||||
|  @website     https://soundcloud.com | ||||
|  @provide-api yes (https://developers.soundcloud.com/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content, publishedDate, embedded | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| @ -18,6 +11,15 @@ from urllib.parse import quote_plus, urlencode | ||||
| from searx import logger | ||||
| from searx.poolrequests import get as http_get | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://soundcloud.com', | ||||
|     "wikidata_id": 'Q568769', | ||||
|     "official_api_documentation": 'https://developers.soundcloud.com/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['music'] | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Spotify (Music) | ||||
| 
 | ||||
|  @website     https://spotify.com | ||||
|  @provide-api yes (https://developer.spotify.com/web-api/search-item/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content, embedded | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| @ -15,6 +8,16 @@ from urllib.parse import urlencode | ||||
| import requests | ||||
| import base64 | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.spotify.com', | ||||
|     "wikidata_id": 'Q689141', | ||||
|     "official_api_documentation": 'https://developer.spotify.com/web-api/search-item/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['music'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Stackoverflow (It) | ||||
| 
 | ||||
|  @website     https://stackoverflow.com/ | ||||
|  @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content | ||||
|  Stackoverflow (IT) | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urljoin, urlparse | ||||
| @ -15,6 +8,16 @@ from lxml import html | ||||
| from searx.utils import extract_text | ||||
| from searx.exceptions import SearxEngineCaptchaException | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://stackoverflow.com/', | ||||
|     "wikidata_id": 'Q549037', | ||||
|     "official_api_documentation": 'https://api.stackexchange.com/docs', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,14 +1,7 @@ | ||||
| #  Startpage (Web) | ||||
| # | ||||
| # @website     https://startpage.com | ||||
| # @provide-api no (nothing found) | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     HTML | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, content | ||||
| # | ||||
| # @todo        paging | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Startpage (Web) | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from dateutil import parser | ||||
| @ -19,6 +12,16 @@ from babel import Locale | ||||
| from babel.localedata import locale_identifiers | ||||
| from searx.utils import extract_text, eval_xpath, match_language | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://startpage.com', | ||||
|     "wikidata_id": 'Q2333295', | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| # there is a mechanism to block "bot" search | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Tokyo Toshokan (A BitTorrent Library for Japanese Media) | ||||
| 
 | ||||
|  @website      https://www.tokyotosho.info/ | ||||
|  @provide-api  no | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change) | ||||
|  @parse        url, title, publishedDate, seed, leech, | ||||
|                filesize, magnetlink, content | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| @ -16,6 +9,16 @@ from lxml import html | ||||
| from datetime import datetime | ||||
| from searx.utils import extract_text, get_torrent_size, int_or_zero | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.tokyotosho.info/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'videos', 'music'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,14 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Torrentz2.is (BitTorrent meta-search engine) | ||||
| 
 | ||||
|  @website      https://torrentz2.is/ | ||||
|  @provide-api  no | ||||
| 
 | ||||
|  @using-api    no | ||||
|  @results      HTML | ||||
|  @stable       no (HTML can change, although unlikely, | ||||
|                    see https://torrentz.is/torrentz.btsearch) | ||||
|  @parse        url, title, publishedDate, seed, leech, filesize, magnetlink | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| @ -17,6 +9,16 @@ from lxml import html | ||||
| from datetime import datetime | ||||
| from searx.utils import extract_text, get_torrent_size | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://torrentz2.is/', | ||||
|     "wikidata_id": 'Q1156687', | ||||
|     "official_api_documentation": 'https://torrentz.is/torrentz.btsearch', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['files', 'videos', 'music'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,14 +1,18 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  MyMemory Translated | ||||
| 
 | ||||
|  @website     https://mymemory.translated.net/ | ||||
|  @provide-api yes (https://mymemory.translated.net/doc/spec.php) | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://mymemory.translated.net/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": 'https://mymemory.translated.net/doc/spec.php', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| engine_type = 'online_dictionnary' | ||||
| categories = ['general'] | ||||
| url = 'https://api.mymemory.translated.net/get?q={query}&langpair={from_lang}|{to_lang}{key}' | ||||
|  | ||||
| @ -1,18 +1,21 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Unsplash | ||||
| 
 | ||||
|  @website     https://unsplash.com | ||||
|  @provide-api yes (https://unsplash.com/developers) | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     JSON (using search portal's infiniscroll API) | ||||
|  @stable      no (JSON format could change any time) | ||||
|  @parse       url, title, img_src, thumbnail_src | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl | ||||
| from json import loads | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://unsplash.com', | ||||
|     "wikidata_id": 'Q28233552', | ||||
|     "official_api_documentation": 'https://unsplash.com/developers', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| url = 'https://unsplash.com/' | ||||
| search_url = url + 'napi/search/photos?' | ||||
| categories = ['images'] | ||||
|  | ||||
| @ -1,21 +1,22 @@ | ||||
| #  Vimeo (Videos) | ||||
| # | ||||
| # @website     https://vimeo.com/ | ||||
| # @provide-api yes (http://developer.vimeo.com/api), | ||||
| #              they have a maximum count of queries/hour | ||||
| # | ||||
| # @using-api   no (TODO, rewrite to api) | ||||
| # @results     HTML (using search portal) | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, publishedDate,  thumbnail, embedded | ||||
| # | ||||
| # @todo        rewrite to api | ||||
| # @todo        set content-parameter with correct data | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Wikipedia (Web | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from json import loads | ||||
| from dateutil import parser | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://vimeo.com/', | ||||
|     "wikidata_id": 'Q156376', | ||||
|     "official_api_documentation": 'http://developer.vimeo.com/api', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,14 +1,6 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Wikidata | ||||
| 
 | ||||
|  @website     https://wikidata.org | ||||
|  @provide-api yes (https://query.wikidata.org/) | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, infobox | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| @ -27,6 +19,16 @@ from searx.engines.wikipedia import _fetch_supported_languages, supported_langua | ||||
| 
 | ||||
| logger = logger.getChild('wikidata') | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://wikidata.org/', | ||||
|     "wikidata_id": 'Q2013', | ||||
|     "official_api_documentation": 'https://query.wikidata.org/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # SPARQL | ||||
| SPARQL_ENDPOINT_URL = 'https://query.wikidata.org/sparql' | ||||
| SPARQL_EXPLAIN_URL = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql?explain' | ||||
|  | ||||
| @ -1,13 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Wikipedia (Web) | ||||
| 
 | ||||
|  @website     https://en.wikipedia.org/api/rest_v1/ | ||||
|  @provide-api yes | ||||
| 
 | ||||
|  @using-api   yes | ||||
|  @results     JSON | ||||
|  @stable      yes | ||||
|  @parse       url, infobox | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import quote | ||||
| @ -16,6 +9,16 @@ from lxml.html import fromstring | ||||
| from searx.utils import match_language, searx_useragent | ||||
| from searx.raise_for_httperror import raise_for_httperror | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.wikipedia.org/', | ||||
|     "wikidata_id": 'Q52', | ||||
|     "official_api_documentation": 'https://en.wikipedia.org/api/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # search-url | ||||
| search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' | ||||
| supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias' | ||||
|  | ||||
| @ -1,16 +1,21 @@ | ||||
| # Wolfram Alpha (Science) | ||||
| # | ||||
| # @website     https://www.wolframalpha.com | ||||
| # @provide-api yes (https://api.wolframalpha.com/v2/) | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     XML | ||||
| # @stable      yes | ||||
| # @parse       url, infobox | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Wolfram|Alpha (Science) | ||||
| """ | ||||
| 
 | ||||
| from lxml import etree | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.wolframalpha.com', | ||||
|     "wikidata_id": 'Q207006', | ||||
|     "official_api_documentation": 'https://products.wolframalpha.com/api/', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'XML', | ||||
| } | ||||
| 
 | ||||
| # search-url | ||||
| search_url = 'https://api.wolframalpha.com/v2/query?appid={api_key}&{query}' | ||||
| site_url = 'https://www.wolframalpha.com/input/?{query}' | ||||
|  | ||||
| @ -1,12 +1,7 @@ | ||||
| # Wolfram|Alpha (Science) | ||||
| # | ||||
| # @website     https://www.wolframalpha.com/ | ||||
| # @provide-api yes (https://api.wolframalpha.com/v2/) | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     JSON | ||||
| # @stable      no | ||||
| # @parse       url, infobox | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Wolfram|Alpha (Science) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from time import time | ||||
| @ -14,6 +9,16 @@ from urllib.parse import urlencode | ||||
| 
 | ||||
| from searx.poolrequests import get as http_get | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.wolframalpha.com/', | ||||
|     "wikidata_id": 'Q207006', | ||||
|     "official_api_documentation": 'https://products.wolframalpha.com/api/', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # search-url | ||||
| url = 'https://www.wolframalpha.com/' | ||||
| 
 | ||||
|  | ||||
| @ -1,19 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  1x (Images) | ||||
| 
 | ||||
|  @website     http://1x.com/ | ||||
|  @provide-api no | ||||
| 
 | ||||
|  @using-api   no | ||||
|  @results     HTML | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, thumbnail | ||||
| """ | ||||
| 
 | ||||
| from lxml import html, etree | ||||
| from urllib.parse import urlencode, urljoin | ||||
| from searx.utils import extract_text, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://1x.com/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['images'] | ||||
| paging = False | ||||
|  | ||||
| @ -1,3 +1,5 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| 
 | ||||
| from lxml import html | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import extract_text, extract_url, eval_xpath, eval_xpath_list | ||||
|  | ||||
| @ -1,16 +1,7 @@ | ||||
| # Yacy (Web, Images, Videos, Music, Files) | ||||
| # | ||||
| # @website     http://yacy.net | ||||
| # @provide-api yes | ||||
| #              (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch) | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     JSON | ||||
| # @stable      yes | ||||
| # @parse       (general)    url, title, content, publishedDate | ||||
| # @parse       (images)     url, title, img_src | ||||
| # | ||||
| # @todo        parse video, audio and file results | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Yacy (Web, Images, Videos, Music, Files) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from dateutil import parser | ||||
| @ -20,6 +11,16 @@ from requests.auth import HTTPDigestAuth | ||||
| 
 | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://yacy.net/', | ||||
|     "wikidata_id": 'Q1759675', | ||||
|     "official_api_documentation": 'https://wiki.yacy.net/index.php/Dev:API', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general', 'images']  # TODO , 'music', 'videos', 'files' | ||||
| paging = True | ||||
|  | ||||
| @ -1,20 +1,22 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Yahoo (Web) | ||||
| 
 | ||||
|  @website     https://search.yahoo.com/web | ||||
|  @provide-api yes (https://developer.yahoo.com/boss/search/), | ||||
|               $0.80/1000 queries | ||||
| 
 | ||||
|  @using-api   no (because pricing) | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content, suggestion | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import unquote, urlencode | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, extract_url, match_language, eval_xpath | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://search.yahoo.com/', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": 'https://developer.yahoo.com/api/', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,13 +1,7 @@ | ||||
| # Yahoo (News) | ||||
| # | ||||
| # @website     https://news.yahoo.com | ||||
| # @provide-api yes (https://developer.yahoo.com/boss/search/) | ||||
| #              $0.80/1000 queries | ||||
| # | ||||
| # @using-api   no (because pricing) | ||||
| # @results     HTML (using search portal) | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, content, publishedDate | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Yahoo (News) | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from datetime import datetime, timedelta | ||||
| @ -18,6 +12,16 @@ from searx.engines.yahoo import _fetch_supported_languages, supported_languages_ | ||||
| from dateutil import parser | ||||
| from searx.utils import extract_text, extract_url, match_language | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://news.yahoo.com', | ||||
|     "wikidata_id": 'Q3044717', | ||||
|     "official_api_documentation": 'https://developer.yahoo.com/api/', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['news'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,12 +1,6 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Yahoo (Web) | ||||
| 
 | ||||
|  @website     https://yandex.ru/ | ||||
|  @provide-api ? | ||||
|  @using-api   no | ||||
|  @results     HTML (using search portal) | ||||
|  @stable      no (HTML can change) | ||||
|  @parse       url, title, content | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode, urlparse | ||||
| @ -16,6 +10,16 @@ from searx.exceptions import SearxEngineCaptchaException | ||||
| 
 | ||||
| logger = logger.getChild('yandex engine') | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://yandex.ru/', | ||||
|     "wikidata_id": 'Q5281', | ||||
|     "official_api_documentation": "?", | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,12 +1,7 @@ | ||||
| #  Yggtorrent (Videos, Music, Files) | ||||
| # | ||||
| # @website     https://www2.yggtorrent.si | ||||
| # @provide-api no (nothing found) | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     HTML (using search portal) | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, seed, leech, publishedDate, filesize | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Yggtorrent (Videos, Music, Files) | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from operator import itemgetter | ||||
| @ -15,6 +10,16 @@ from urllib.parse import quote | ||||
| from searx.utils import extract_text, get_torrent_size | ||||
| from searx.poolrequests import get as http_get | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www2.yggtorrent.si', | ||||
|     "wikidata_id": None, | ||||
|     "official_api_documentation": None, | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos', 'music', 'files'] | ||||
| paging = True | ||||
|  | ||||
| @ -1,18 +1,23 @@ | ||||
| # Youtube (Videos) | ||||
| # | ||||
| # @website     https://www.youtube.com/ | ||||
| # @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     JSON | ||||
| # @stable      yes | ||||
| # @parse       url, title, content, publishedDate, thumbnail, embedded | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Youtube (Videos) | ||||
| """ | ||||
| 
 | ||||
| from json import loads | ||||
| from dateutil import parser | ||||
| from urllib.parse import urlencode | ||||
| from searx.exceptions import SearxEngineAPIException | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.youtube.com/', | ||||
|     "wikidata_id": 'Q866', | ||||
|     "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true', | ||||
|     "use_official_api": True, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos', 'music'] | ||||
| paging = False | ||||
|  | ||||
| @ -1,17 +1,22 @@ | ||||
| # Youtube (Videos) | ||||
| # | ||||
| # @website     https://www.youtube.com/ | ||||
| # @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     HTML | ||||
| # @stable      no | ||||
| # @parse       url, title, content, publishedDate, thumbnail, embedded | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| """ | ||||
|  Youtube (Videos) | ||||
| """ | ||||
| 
 | ||||
| from functools import reduce | ||||
| from json import loads | ||||
| from urllib.parse import quote_plus | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://www.youtube.com/', | ||||
|     "wikidata_id": 'Q866', | ||||
|     "official_api_documentation": 'https://developers.google.com/youtube/v3/docs/search/list?apix=true', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'HTML', | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos', 'music'] | ||||
| paging = True | ||||
|  | ||||
| @ -157,6 +157,13 @@ engines: | ||||
|     timeout : 7.0 | ||||
|     disabled : True | ||||
|     shortcut : ai | ||||
|     about: | ||||
|       website: https://archive.is/ | ||||
|       wikidata_id: Q13515725 | ||||
|       official_api_documentation: http://mementoweb.org/depot/native/archiveis/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : arxiv | ||||
|     engine : arxiv | ||||
| @ -201,6 +208,13 @@ engines: | ||||
|     timeout : 4.0 | ||||
|     disabled : True | ||||
|     shortcut : bb | ||||
|     about: | ||||
|       website: https://bitbucket.org/ | ||||
|       wikidata_id: Q2493781 | ||||
|       official_api_documentation: https://developer.atlassian.com/bitbucket | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : btdigg | ||||
|     engine : btdigg | ||||
| @ -216,6 +230,13 @@ engines: | ||||
|     categories : videos | ||||
|     disabled : True | ||||
|     shortcut : c3tv | ||||
|     about: | ||||
|       website: https://media.ccc.de/ | ||||
|       wikidata_id: Q80729951 | ||||
|       official_api_documentation: https://github.com/voc/voctoweb | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : crossref | ||||
|     engine : json_engine | ||||
| @ -226,6 +247,13 @@ engines: | ||||
|     content_query : fullCitation | ||||
|     categories : science | ||||
|     shortcut : cr | ||||
|     about: | ||||
|       website: https://www.crossref.org/ | ||||
|       wikidata_id: Q5188229 | ||||
|       official_api_documentation: https://github.com/CrossRef/rest-api-doc | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: JSON | ||||
| 
 | ||||
|   - name : currency | ||||
|     engine : currency_convert | ||||
| @ -271,6 +299,13 @@ engines: | ||||
|     categories : general | ||||
|     shortcut : ew | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.erowid.org/ | ||||
|       wikidata_id: Q1430691 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
| #  - name : elasticsearch | ||||
| #    shortcut : es | ||||
| @ -321,6 +356,13 @@ engines: | ||||
|     first_page_num : 1 | ||||
|     shortcut : et | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.etymonline.com/ | ||||
|       wikidata_id: Q1188617 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
| #  - name : ebay | ||||
| #    engine : ebay | ||||
| @ -360,6 +402,9 @@ engines: | ||||
|     search_type : title | ||||
|     timeout : 5.0 | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://directory.fsf.org/ | ||||
|       wikidata_id: Q2470288 | ||||
| 
 | ||||
|   - name : frinkiac | ||||
|     engine : frinkiac | ||||
| @ -394,6 +439,13 @@ engines: | ||||
|     shortcut : gl | ||||
|     timeout : 10.0 | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://about.gitlab.com/ | ||||
|       wikidata_id: Q16639197 | ||||
|       official_api_documentation: https://docs.gitlab.com/ee/api/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: JSON | ||||
| 
 | ||||
|   - name : github | ||||
|     engine : github | ||||
| @ -411,6 +463,13 @@ engines: | ||||
|     categories : it | ||||
|     shortcut : cb | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://codeberg.org/ | ||||
|       wikidata_id: | ||||
|       official_api_documentation: https://try.gitea.io/api/swagger | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: JSON | ||||
| 
 | ||||
|   - name : google | ||||
|     engine : google | ||||
| @ -441,6 +500,13 @@ engines: | ||||
|     first_page_num : 0 | ||||
|     categories : science | ||||
|     shortcut : gos | ||||
|     about: | ||||
|       website: https://scholar.google.com/ | ||||
|       wikidata_id: Q494817 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : google play apps | ||||
|     engine : xpath | ||||
| @ -453,6 +519,13 @@ engines: | ||||
|     categories : files | ||||
|     shortcut : gpa | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://play.google.com/ | ||||
|       wikidata_id: Q79576 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : google play movies | ||||
|     engine : xpath | ||||
| @ -465,6 +538,13 @@ engines: | ||||
|     categories : videos | ||||
|     shortcut : gpm | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://play.google.com/ | ||||
|       wikidata_id: Q79576 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : google play music | ||||
|     engine : xpath | ||||
| @ -477,6 +557,13 @@ engines: | ||||
|     categories : music | ||||
|     shortcut : gps | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://play.google.com/ | ||||
|       wikidata_id: Q79576 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : geektimes | ||||
|     engine : xpath | ||||
| @ -489,6 +576,13 @@ engines: | ||||
|     timeout : 4.0 | ||||
|     disabled : True | ||||
|     shortcut : gt | ||||
|     about: | ||||
|       website: https://geektimes.ru/ | ||||
|       wikidata_id: Q50572423 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : habrahabr | ||||
|     engine : xpath | ||||
| @ -501,6 +595,13 @@ engines: | ||||
|     timeout : 4.0 | ||||
|     disabled : True | ||||
|     shortcut : habr | ||||
|     about: | ||||
|       website: https://habr.com/ | ||||
|       wikidata_id: Q4494434 | ||||
|       official_api_documentation: https://habr.com/en/docs/help/api/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : hoogle | ||||
|     engine : json_engine | ||||
| @ -513,6 +614,13 @@ engines: | ||||
|     page_size : 20 | ||||
|     categories : it | ||||
|     shortcut : ho | ||||
|     about: | ||||
|       website: https://www.haskell.org/ | ||||
|       wikidata_id: Q34010 | ||||
|       official_api_documentation: https://hackage.haskell.org/api | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: JSON | ||||
| 
 | ||||
|   - name : ina | ||||
|     engine : ina | ||||
| @ -543,6 +651,13 @@ engines: | ||||
|     timeout : 7.0 | ||||
|     disabled : True | ||||
|     shortcut : lg | ||||
|     about: | ||||
|       website: http://libgen.rs/ | ||||
|       wikidata_id: Q22017206 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : lobste.rs | ||||
|     engine : xpath | ||||
| @ -555,6 +670,13 @@ engines: | ||||
|     shortcut : lo | ||||
|     timeout : 3.0 | ||||
|     disabled: True | ||||
|     about: | ||||
|       website: https://lobste.rs/ | ||||
|       wikidata_id: Q60762874 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : metager | ||||
|     engine : xpath | ||||
| @ -566,6 +688,13 @@ engines: | ||||
|     categories : general | ||||
|     shortcut : mg | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://metager.org/ | ||||
|       wikidata_id: Q1924645 | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : microsoft academic | ||||
|     engine : microsoft_academic | ||||
| @ -589,6 +718,13 @@ engines: | ||||
|     disabled: True | ||||
|     timeout: 5.0 | ||||
|     shortcut : npm | ||||
|     about: | ||||
|       website: https://npms.io/ | ||||
|       wikidata_id: Q7067518 | ||||
|       official_api_documentation: https://api-docs.npms.io/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: JSON | ||||
| 
 | ||||
| # Requires Tor | ||||
|   - name : not evil | ||||
| @ -617,6 +753,13 @@ engines: | ||||
|     categories : science | ||||
|     shortcut : oad | ||||
|     timeout: 5.0 | ||||
|     about: | ||||
|       website: https://www.openaire.eu/ | ||||
|       wikidata_id: Q25106053 | ||||
|       official_api_documentation: https://api.openaire.eu/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: JSON | ||||
| 
 | ||||
|   - name : openairepublications | ||||
|     engine : json_engine | ||||
| @ -629,6 +772,13 @@ engines: | ||||
|     categories : science | ||||
|     shortcut : oap | ||||
|     timeout: 5.0 | ||||
|     about: | ||||
|       website: https://www.openaire.eu/ | ||||
|       wikidata_id: Q25106053 | ||||
|       official_api_documentation: https://api.openaire.eu/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: JSON | ||||
| 
 | ||||
| #  - name : opensemanticsearch | ||||
| #    engine : opensemantic | ||||
| @ -650,6 +800,13 @@ engines: | ||||
|     timeout : 4.0 | ||||
|     disabled : True | ||||
|     shortcut : or | ||||
|     about: | ||||
|       website: https://openrepos.net/ | ||||
|       wikidata_id: | ||||
|       official_api_documentation: | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : pdbe | ||||
|     engine : pdbe | ||||
| @ -768,6 +925,13 @@ engines: | ||||
|     content_xpath : .//div[@class="search-result-abstract"] | ||||
|     shortcut : se | ||||
|     categories : science | ||||
|     about: | ||||
|       website: https://www.semanticscholar.org/ | ||||
|       wikidata_id: Q22908627 | ||||
|       official_api_documentation: https://api.semanticscholar.org/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
| # Spotify needs API credentials | ||||
| #  - name : spotify | ||||
| @ -876,6 +1040,9 @@ engines: | ||||
|     number_of_results : 5 | ||||
|     search_type : text | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.wikibooks.org/ | ||||
|       wikidata_id: Q367 | ||||
| 
 | ||||
|   - name : wikinews | ||||
|     engine : mediawiki | ||||
| @ -885,6 +1052,9 @@ engines: | ||||
|     number_of_results : 5 | ||||
|     search_type : text | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.wikinews.org/ | ||||
|       wikidata_id: Q964 | ||||
| 
 | ||||
|   - name : wikiquote | ||||
|     engine : mediawiki | ||||
| @ -896,6 +1066,9 @@ engines: | ||||
|     disabled : True | ||||
|     additional_tests: | ||||
|       rosebud: *test_rosebud | ||||
|     about: | ||||
|       website: https://www.wikiquote.org/ | ||||
|       wikidata_id: Q369 | ||||
| 
 | ||||
|   - name : wikisource | ||||
|     engine : mediawiki | ||||
| @ -905,6 +1078,9 @@ engines: | ||||
|     number_of_results : 5 | ||||
|     search_type : text | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.wikisource.org/ | ||||
|       wikidata_id: Q263 | ||||
| 
 | ||||
|   - name : wiktionary | ||||
|     engine : mediawiki | ||||
| @ -914,6 +1090,9 @@ engines: | ||||
|     number_of_results : 5 | ||||
|     search_type : text | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.wiktionary.org/ | ||||
|       wikidata_id: Q151 | ||||
| 
 | ||||
|   - name : wikiversity | ||||
|     engine : mediawiki | ||||
| @ -923,6 +1102,9 @@ engines: | ||||
|     number_of_results : 5 | ||||
|     search_type : text | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.wikiversity.org/ | ||||
|       wikidata_id: Q370 | ||||
| 
 | ||||
|   - name : wikivoyage | ||||
|     engine : mediawiki | ||||
| @ -932,6 +1114,9 @@ engines: | ||||
|     number_of_results : 5 | ||||
|     search_type : text | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.wikivoyage.org/ | ||||
|       wikidata_id: Q373 | ||||
| 
 | ||||
|   - name : wolframalpha | ||||
|     shortcut : wa | ||||
| @ -979,6 +1164,13 @@ engines: | ||||
|     first_page_num : 0 | ||||
|     page_size : 10 | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.seznam.cz/ | ||||
|       wikidata_id: Q3490485 | ||||
|       official_api_documentation: https://api.sklik.cz/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : mojeek | ||||
|     shortcut: mjk | ||||
| @ -993,6 +1185,13 @@ engines: | ||||
|     first_page_num : 0 | ||||
|     page_size : 10 | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.mojeek.com/ | ||||
|       wikidata_id: Q60747299 | ||||
|       official_api_documentation: https://www.mojeek.com/services/api.html/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : naver | ||||
|     shortcut: nvr | ||||
| @ -1007,6 +1206,13 @@ engines: | ||||
|     first_page_num : 1 | ||||
|     page_size : 10 | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://www.naver.com/ | ||||
|       wikidata_id: Q485639 | ||||
|       official_api_documentation: https://developers.naver.com/docs/nmt/examples/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : rubygems | ||||
|     shortcut: rbg | ||||
| @ -1021,6 +1227,13 @@ engines: | ||||
|     first_page_num : 1 | ||||
|     categories: it | ||||
|     disabled : True | ||||
|     about: | ||||
|       website: https://rubygems.org/ | ||||
|       wikidata_id: Q1853420 | ||||
|       official_api_documentation: https://guides.rubygems.org/rubygems-org-api/ | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: HTML | ||||
| 
 | ||||
|   - name : peertube | ||||
|     engine: peertube | ||||
|  | ||||
							
								
								
									
										206
									
								
								utils/fetch_engine_descriptions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										206
									
								
								utils/fetch_engine_descriptions.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,206 @@ | ||||
| #!/usr/bin/env python | ||||
| 
 | ||||
| import sys | ||||
| import json | ||||
| from urllib.parse import quote, urlparse | ||||
| from os.path import realpath, dirname | ||||
| import cld3 | ||||
| from lxml.html import fromstring | ||||
| 
 | ||||
| # set path | ||||
| sys.path.append(realpath(dirname(realpath(__file__)) + '/../')) | ||||
| 
 | ||||
| from searx.engines.wikidata import send_wikidata_query | ||||
| from searx.utils import extract_text | ||||
| import searx | ||||
| import searx.search | ||||
| import searx.poolrequests | ||||
| 
 | ||||
| SPARQL_WIKIPEDIA_ARTICLE = """ | ||||
| SELECT DISTINCT ?item ?name | ||||
| WHERE { | ||||
|   VALUES ?item { %IDS% } | ||||
|   ?article schema:about ?item ; | ||||
|               schema:inLanguage ?lang ; | ||||
|               schema:name ?name ; | ||||
|               schema:isPartOf [ wikibase:wikiGroup "wikipedia" ] . | ||||
|   FILTER(?lang in (%LANGUAGES_SPARQL%)) . | ||||
|   FILTER (!CONTAINS(?name, ':')) . | ||||
| } | ||||
| """ | ||||
| 
 | ||||
| SPARQL_DESCRIPTION = """ | ||||
| SELECT DISTINCT ?item ?itemDescription | ||||
| WHERE { | ||||
|   VALUES ?item { %IDS% } | ||||
|   ?item schema:description ?itemDescription . | ||||
|   FILTER (lang(?itemDescription) in (%LANGUAGES_SPARQL%)) | ||||
| } | ||||
| ORDER BY ?itemLang | ||||
| """ | ||||
| 
 | ||||
| LANGUAGES = searx.settings['locales'].keys() | ||||
| LANGUAGES_SPARQL = ', '.join(set(map(lambda l: repr(l.split('_')[0]), LANGUAGES))) | ||||
| IDS = None | ||||
| 
 | ||||
| descriptions = {} | ||||
| wd_to_engine_name = {} | ||||
| 
 | ||||
| 
 | ||||
| def normalize_description(description): | ||||
|     for c in [chr(c) for c in range(0, 31)]: | ||||
|         description = description.replace(c, ' ') | ||||
|     description = ' '.join(description.strip().split()) | ||||
|     return description | ||||
| 
 | ||||
| 
 | ||||
| def update_description(engine_name, lang, description, source, replace=True): | ||||
|     if replace or lang not in descriptions[engine_name]: | ||||
|         descriptions[engine_name][lang] = [normalize_description(description), source] | ||||
| 
 | ||||
| 
 | ||||
| def get_wikipedia_summary(language, pageid): | ||||
|     search_url = 'https://{language}.wikipedia.org/api/rest_v1/page/summary/{title}' | ||||
|     url = search_url.format(title=quote(pageid), language=language) | ||||
|     try: | ||||
|         response = searx.poolrequests.get(url) | ||||
|         response.raise_for_status() | ||||
|         api_result = json.loads(response.text) | ||||
|         return api_result.get('extract') | ||||
|     except: | ||||
|         return None | ||||
| 
 | ||||
| 
 | ||||
| def detect_language(text): | ||||
|     r = cld3.get_language(str(text))  # pylint: disable=E1101 | ||||
|     if r is not None and r.probability >= 0.98 and r.is_reliable: | ||||
|         return r.language | ||||
|     return None | ||||
| 
 | ||||
| 
 | ||||
| def get_website_description(url, lang1, lang2=None): | ||||
|     headers = { | ||||
|         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0', | ||||
|         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', | ||||
|         'DNT': '1', | ||||
|         'Upgrade-Insecure-Requests': '1', | ||||
|         'Sec-GPC': '1', | ||||
|         'Cache-Control': 'max-age=0', | ||||
|     } | ||||
|     if lang1 is not None: | ||||
|         lang_list = [lang1] | ||||
|         if lang2 is not None: | ||||
|             lang_list.append(lang2) | ||||
|         headers['Accept-Language'] = f'{",".join(lang_list)};q=0.8' | ||||
|     try: | ||||
|         response = searx.poolrequests.get(url, headers=headers, timeout=10) | ||||
|         response.raise_for_status() | ||||
|     except Exception: | ||||
|         return (None, None) | ||||
| 
 | ||||
|     try: | ||||
|         html = fromstring(response.text) | ||||
|     except ValueError: | ||||
|         html = fromstring(response.content) | ||||
| 
 | ||||
|     description = extract_text(html.xpath('/html/head/meta[@name="description"]/@content')) | ||||
|     if not description: | ||||
|         description = extract_text(html.xpath('/html/head/meta[@property="og:description"]/@content')) | ||||
|     if not description: | ||||
|         description = extract_text(html.xpath('/html/head/title')) | ||||
|     lang = extract_text(html.xpath('/html/@lang')) | ||||
|     if lang is None and len(lang1) > 0: | ||||
|         lang = lang1 | ||||
|     lang = detect_language(description) or lang or 'en' | ||||
|     lang = lang.split('_')[0] | ||||
|     lang = lang.split('-')[0] | ||||
|     return (lang, description) | ||||
| 
 | ||||
| 
 | ||||
| def initialize(): | ||||
|     global descriptions, wd_to_engine_name, IDS | ||||
|     searx.search.initialize() | ||||
|     for engine_name, engine in searx.engines.engines.items(): | ||||
|         descriptions[engine_name] = {} | ||||
|         wikidata_id = getattr(engine, "about", {}).get('wikidata_id') | ||||
|         if wikidata_id is not None: | ||||
|             wd_to_engine_name.setdefault(wikidata_id, set()).add(engine_name) | ||||
| 
 | ||||
|     IDS = ' '.join(list(map(lambda wd_id: 'wd:' + wd_id, wd_to_engine_name.keys()))) | ||||
| 
 | ||||
| 
 | ||||
| def fetch_wikidata_descriptions(): | ||||
|     global IDS | ||||
|     result = send_wikidata_query(SPARQL_DESCRIPTION | ||||
|                                  .replace('%IDS%', IDS) | ||||
|                                  .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) | ||||
|     if result is not None: | ||||
|         for binding in result['results']['bindings']: | ||||
|             wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '') | ||||
|             lang = binding['itemDescription']['xml:lang'] | ||||
|             description = binding['itemDescription']['value'] | ||||
|             if ' ' in description:  # skip unique word description (like "website") | ||||
|                 for engine_name in wd_to_engine_name[wikidata_id]: | ||||
|                     update_description(engine_name, lang, description, 'wikidata') | ||||
| 
 | ||||
| 
 | ||||
| def fetch_wikipedia_descriptions(): | ||||
|     global IDS | ||||
|     result = send_wikidata_query(SPARQL_WIKIPEDIA_ARTICLE | ||||
|                                  .replace('%IDS%', IDS) | ||||
|                                  .replace('%LANGUAGES_SPARQL%', LANGUAGES_SPARQL)) | ||||
|     if result is not None: | ||||
|         for binding in result['results']['bindings']: | ||||
|             wikidata_id = binding['item']['value'].replace('http://www.wikidata.org/entity/', '') | ||||
|             lang = binding['name']['xml:lang'] | ||||
|             pageid = binding['name']['value'] | ||||
|             description = get_wikipedia_summary(lang, pageid) | ||||
|             if description is not None and ' ' in description: | ||||
|                 for engine_name in wd_to_engine_name[wikidata_id]: | ||||
|                     update_description(engine_name, lang, description, 'wikipedia') | ||||
| 
 | ||||
| 
 | ||||
| def normalize_url(url): | ||||
|     url = url.replace('{language}', 'en') | ||||
|     url = urlparse(url)._replace(path='/', params='', query='', fragment='').geturl() | ||||
|     url = url.replace('https://api.', 'https://') | ||||
|     return url | ||||
| 
 | ||||
| 
 | ||||
| def fetch_website_description(engine_name, website): | ||||
|     default_lang, default_description = get_website_description(website, None, None) | ||||
|     if default_lang is None or default_description is None: | ||||
|         return | ||||
|     if default_lang not in descriptions[engine_name]: | ||||
|         descriptions[engine_name][default_lang] = [normalize_description(default_description), website] | ||||
|     for request_lang in ('en-US', 'es-US', 'fr-FR', 'zh', 'ja', 'ru', 'ar', 'ko'): | ||||
|         if request_lang.split('-')[0] not in descriptions[engine_name]: | ||||
|             lang, desc = get_website_description(website, request_lang, request_lang.split('-')[0]) | ||||
|             if desc is not None and desc != default_description: | ||||
|                 update_description(engine_name, lang, desc, website, replace=False) | ||||
|             else: | ||||
|                 break | ||||
| 
 | ||||
| 
 | ||||
| def fetch_website_descriptions(): | ||||
|     for engine_name, engine in searx.engines.engines.items(): | ||||
|         website = getattr(engine, "about", {}).get('website') | ||||
|         if website is None: | ||||
|             website = normalize_url(getattr(engine, "search_url")) | ||||
|         if website is None: | ||||
|             website = normalize_url(getattr(engine, "base_url")) | ||||
|         if website is not None: | ||||
|             fetch_website_description(engine_name, website) | ||||
| 
 | ||||
| 
 | ||||
| def main(): | ||||
|     initialize() | ||||
|     fetch_wikidata_descriptions() | ||||
|     fetch_wikipedia_descriptions() | ||||
|     fetch_website_descriptions() | ||||
| 
 | ||||
|     sys.stdout.write(json.dumps(descriptions, indent=1, separators=(',', ':'), ensure_ascii=False)) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user