commit
						9517f7a6e7
					
				| @ -1,7 +1,8 @@ | ||||
| ## Bing (Web) | ||||
| # | ||||
| # @website     https://www.bing.com | ||||
| # @provide-api yes (http://datamarket.azure.com/dataset/bing/search), max. 5000 query/month | ||||
| # @provide-api yes (http://datamarket.azure.com/dataset/bing/search), | ||||
| #              max. 5000 query/month | ||||
| # | ||||
| # @using-api   no (because of query limit) | ||||
| # @results     HTML (using search portal) | ||||
|  | ||||
| @ -1,17 +1,19 @@ | ||||
| ## Bing (Images) | ||||
| # | ||||
| # @website     https://www.bing.com/images | ||||
| # @provide-api yes (http://datamarket.azure.com/dataset/bing/search), max. 5000 query/month | ||||
| # @provide-api yes (http://datamarket.azure.com/dataset/bing/search), | ||||
| #              max. 5000 query/month | ||||
| # | ||||
| # @using-api   no (because of query limit) | ||||
| # @results     HTML (using search portal) | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, img_src | ||||
| # | ||||
| # @todo        currently there are up to 35 images receive per page, because bing does not parse count=10. limited response to 10 images | ||||
| # @todo        currently there are up to 35 images receive per page, | ||||
| #              because bing does not parse count=10. | ||||
| #              limited response to 10 images | ||||
| 
 | ||||
| from urllib import urlencode | ||||
| from cgi import escape | ||||
| from lxml import html | ||||
| from yaml import load | ||||
| import re | ||||
| @ -51,14 +53,14 @@ def response(resp): | ||||
|     dom = html.fromstring(resp.content) | ||||
| 
 | ||||
|     # init regex for yaml-parsing | ||||
|     p = re.compile( '({|,)([a-z]+):(")') | ||||
|     p = re.compile('({|,)([a-z]+):(")') | ||||
| 
 | ||||
|     # parse results | ||||
|     for result in dom.xpath('//div[@class="dg_u"]'): | ||||
|         link = result.xpath('./a')[0] | ||||
| 
 | ||||
|         # parse yaml-data (it is required to add a space, to make it parsable) | ||||
|         yaml_data = load(p.sub( r'\1\2: \3', link.attrib.get('m'))) | ||||
|         yaml_data = load(p.sub(r'\1\2: \3', link.attrib.get('m'))) | ||||
| 
 | ||||
|         title = link.attrib.get('t1') | ||||
|         #url = 'http://' + link.attrib.get('t3') | ||||
|  | ||||
| @ -1,7 +1,8 @@ | ||||
| ## Bing (News) | ||||
| # | ||||
| # @website     https://www.bing.com/news | ||||
| # @provide-api yes (http://datamarket.azure.com/dataset/bing/search), max. 5000 query/month | ||||
| # @provide-api yes (http://datamarket.azure.com/dataset/bing/search), | ||||
| #              max. 5000 query/month | ||||
| # | ||||
| # @using-api   no (because of query limit) | ||||
| # @results     HTML (using search portal) | ||||
| @ -57,12 +58,12 @@ def response(resp): | ||||
|         url = link.attrib.get('href') | ||||
|         title = ' '.join(link.xpath('.//text()')) | ||||
|         contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()') | ||||
|         if contentXPath != None: | ||||
|         if contentXPath is not None: | ||||
|             content = escape(' '.join(contentXPath)) | ||||
| 
 | ||||
|         # parse publishedDate | ||||
|         publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()') | ||||
|         if publishedDateXPath != None: | ||||
|         if publishedDateXPath is not None: | ||||
|             publishedDate = escape(' '.join(publishedDateXPath)) | ||||
| 
 | ||||
|         if re.match("^[0-9]+ minute(s|) ago$", publishedDate): | ||||
|  | ||||
| @ -55,6 +55,6 @@ def response(resp): | ||||
|         resp.search_params['to'].lower() | ||||
|     ) | ||||
| 
 | ||||
|     results.append({'answer' : answer, 'url': url}) | ||||
|     results.append({'answer': answer, 'url': url}) | ||||
| 
 | ||||
|     return results | ||||
|  | ||||
| @ -12,7 +12,6 @@ | ||||
| 
 | ||||
| from urllib import urlencode | ||||
| from json import loads | ||||
| from lxml import html | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['videos'] | ||||
|  | ||||
| @ -1,7 +1,8 @@ | ||||
| ## DuckDuckGo (Web) | ||||
| # | ||||
| # @website     https://duckduckgo.com/ | ||||
| # @provide-api yes (https://duckduckgo.com/api), but not all results from search-site | ||||
| # @provide-api yes (https://duckduckgo.com/api), | ||||
| #              but not all results from search-site | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     HTML (using search portal) | ||||
| @ -9,7 +10,8 @@ | ||||
| # @parse       url, title, content | ||||
| # | ||||
| # @todo        rewrite to api | ||||
| # @todo        language support (the current used site does not support language-change) | ||||
| # @todo        language support | ||||
| #              (the current used site does not support language-change) | ||||
| 
 | ||||
| from urllib import urlencode | ||||
| from lxml.html import fromstring | ||||
| @ -37,7 +39,7 @@ def request(query, params): | ||||
|     if params['language'] == 'all': | ||||
|         locale = 'en-us' | ||||
|     else: | ||||
|         locale = params['language'].replace('_','-').lower() | ||||
|         locale = params['language'].replace('_', '-').lower() | ||||
| 
 | ||||
|     params['url'] = url.format( | ||||
|         query=urlencode({'q': query, 'kl': locale}), | ||||
|  | ||||
| @ -3,21 +3,25 @@ from urllib import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| 
 | ||||
| url = 'https://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1&d=1' | ||||
| url = 'https://api.duckduckgo.com/'\ | ||||
|     + '?{query}&format=json&pretty=0&no_redirect=1&d=1' | ||||
| 
 | ||||
| 
 | ||||
| def result_to_text(url, text, htmlResult): | ||||
|     # TODO : remove result ending with "Meaning" or "Category" | ||||
|     dom = html.fromstring(htmlResult) | ||||
|     a = dom.xpath('//a') | ||||
|     if len(a)>=1: | ||||
|     if len(a) >= 1: | ||||
|         return extract_text(a[0]) | ||||
|     else: | ||||
|         return text | ||||
| 
 | ||||
| 
 | ||||
| def html_to_text(htmlFragment): | ||||
|     dom = html.fromstring(htmlFragment) | ||||
|     return extract_text(dom) | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     # TODO add kl={locale} | ||||
|     params['url'] = url.format(query=urlencode({'q': query})) | ||||
| @ -38,7 +42,7 @@ def response(resp): | ||||
|     # add answer if there is one | ||||
|     answer = search_res.get('Answer', '') | ||||
|     if answer != '': | ||||
|         results.append({ 'answer' : html_to_text(answer) }) | ||||
|         results.append({'answer': html_to_text(answer)}) | ||||
| 
 | ||||
|     # add infobox | ||||
|     if 'Definition' in search_res: | ||||
| @ -47,7 +51,6 @@ def response(resp): | ||||
|     if 'Abstract' in search_res: | ||||
|         content = content + search_res.get('Abstract', '') | ||||
| 
 | ||||
| 
 | ||||
|     # image | ||||
|     image = search_res.get('Image', '') | ||||
|     image = None if image == '' else image | ||||
| @ -55,29 +58,35 @@ def response(resp): | ||||
|     # attributes | ||||
|     if 'Infobox' in search_res: | ||||
|         infobox = search_res.get('Infobox', None) | ||||
|         if  'content' in infobox: | ||||
|         if 'content' in infobox: | ||||
|             for info in infobox.get('content'): | ||||
|                 attributes.append({'label': info.get('label'), 'value': info.get('value')}) | ||||
|                 attributes.append({'label': info.get('label'), | ||||
|                                   'value': info.get('value')}) | ||||
| 
 | ||||
|     # urls | ||||
|     for ddg_result in search_res.get('Results', []): | ||||
|         if 'FirstURL' in ddg_result: | ||||
|             firstURL = ddg_result.get('FirstURL', '') | ||||
|             text = ddg_result.get('Text', '') | ||||
|             urls.append({'title':text, 'url':firstURL}) | ||||
|             results.append({'title':heading, 'url': firstURL}) | ||||
|             urls.append({'title': text, 'url': firstURL}) | ||||
|             results.append({'title': heading, 'url': firstURL}) | ||||
| 
 | ||||
|     # related topics | ||||
|     for ddg_result in search_res.get('RelatedTopics', None): | ||||
|         if 'FirstURL' in ddg_result: | ||||
|             suggestion = result_to_text(ddg_result.get('FirstURL', None), ddg_result.get('Text', None), ddg_result.get('Result', None)) | ||||
|             suggestion = result_to_text(ddg_result.get('FirstURL', None), | ||||
|                                         ddg_result.get('Text', None), | ||||
|                                         ddg_result.get('Result', None)) | ||||
|             if suggestion != heading: | ||||
|                 results.append({'suggestion': suggestion}) | ||||
|         elif 'Topics' in ddg_result: | ||||
|             suggestions = [] | ||||
|             relatedTopics.append({ 'name' : ddg_result.get('Name', ''), 'suggestions': suggestions }) | ||||
|             relatedTopics.append({'name': ddg_result.get('Name', ''), | ||||
|                                  'suggestions': suggestions}) | ||||
|             for topic_result in ddg_result.get('Topics', []): | ||||
|                 suggestion = result_to_text(topic_result.get('FirstURL', None), topic_result.get('Text', None), topic_result.get('Result', None)) | ||||
|                 suggestion = result_to_text(topic_result.get('FirstURL', None), | ||||
|                                             topic_result.get('Text', None), | ||||
|                                             topic_result.get('Result', None)) | ||||
|                 if suggestion != heading: | ||||
|                     suggestions.append(suggestion) | ||||
| 
 | ||||
| @ -86,21 +95,26 @@ def response(resp): | ||||
|     if abstractURL != '': | ||||
|         # add as result ? problem always in english | ||||
|         infobox_id = abstractURL | ||||
|         urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL}) | ||||
|         urls.append({'title': search_res.get('AbstractSource'), | ||||
|                     'url': abstractURL}) | ||||
| 
 | ||||
|     # definition | ||||
|     definitionURL = search_res.get('DefinitionURL', '') | ||||
|     if definitionURL != '': | ||||
|         # add as result ? as answer ? problem always in english | ||||
|         infobox_id = definitionURL | ||||
|         urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) | ||||
|         urls.append({'title': search_res.get('DefinitionSource'), | ||||
|                     'url': definitionURL}) | ||||
| 
 | ||||
|     # entity | ||||
|     entity = search_res.get('Entity', None) | ||||
|     # TODO continent / country / department / location / waterfall / mountain range : link to map search, get weather, near by locations | ||||
|     # TODO continent / country / department / location / waterfall / | ||||
|     #      mountain range : | ||||
|     #      link to map search, get weather, near by locations | ||||
|     # TODO musician : link to music search | ||||
|     # TODO concert tour : ?? | ||||
|     # TODO film / actor / television  / media franchise : links to IMDB / rottentomatoes (or scrap result) | ||||
|     # TODO film / actor / television  / media franchise : | ||||
|     #      links to IMDB / rottentomatoes (or scrap result) | ||||
|     # TODO music : link tu musicbrainz / last.fm | ||||
|     # TODO book : ?? | ||||
|     # TODO artist / playwright : ?? | ||||
| @ -114,24 +128,25 @@ def response(resp): | ||||
|     # TODO programming language : ?? | ||||
|     # TODO file format : ?? | ||||
| 
 | ||||
|     if len(heading)>0: | ||||
|     if len(heading) > 0: | ||||
|         # TODO get infobox.meta.value where .label='article_title' | ||||
|         if image==None and len(attributes)==0 and len(urls)==1 and len(relatedTopics)==0 and len(content)==0: | ||||
|         if image is None and len(attributes) == 0 and len(urls) == 1 and\ | ||||
|            len(relatedTopics) == 0 and len(content) == 0: | ||||
|             results.append({ | ||||
|                     'url': urls[0]['url'], | ||||
|                     'title': heading, | ||||
|                     'content': content | ||||
|                     }) | ||||
|                            'url': urls[0]['url'], | ||||
|                            'title': heading, | ||||
|                            'content': content | ||||
|                            }) | ||||
|         else: | ||||
|             results.append({ | ||||
|                     'infobox': heading, | ||||
|                     'id': infobox_id, | ||||
|                     'entity': entity, | ||||
|                     'content': content, | ||||
|                     'img_src' : image, | ||||
|                     'attributes': attributes, | ||||
|                     'urls': urls, | ||||
|                     'relatedTopics': relatedTopics | ||||
|                     }) | ||||
|                            'infobox': heading, | ||||
|                            'id': infobox_id, | ||||
|                            'entity': entity, | ||||
|                            'content': content, | ||||
|                            'img_src': image, | ||||
|                            'attributes': attributes, | ||||
|                            'urls': urls, | ||||
|                            'relatedTopics': relatedTopics | ||||
|                            }) | ||||
| 
 | ||||
|     return results | ||||
|  | ||||
| @ -27,6 +27,7 @@ search_url = url + 'api?{query}&start={offset}&length={number_of_results}&l={lan | ||||
| search_category = {'general': 'web', | ||||
|                 'news': 'news'} | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     offset = (params['pageno']-1) * number_of_results + 1 | ||||
| @ -48,7 +49,7 @@ def request(query, params): | ||||
|                                       query=urlencode({'q': query}), | ||||
|                                       language=language, | ||||
|                                       categorie=categorie, | ||||
|                                       api_key=api_key ) | ||||
|                                       api_key=api_key) | ||||
| 
 | ||||
|     # using searx User-Agent | ||||
|     params['headers']['User-Agent'] = searx_useragent() | ||||
|  | ||||
| @ -1,7 +1,8 @@ | ||||
| ## Google (Images) | ||||
| # | ||||
| # @website     https://www.google.com | ||||
| # @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! | ||||
| # @provide-api yes (https://developers.google.com/web-search/docs/), | ||||
| #              deprecated! | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     JSON | ||||
|  | ||||
| @ -1,7 +1,8 @@ | ||||
| ## Google (News) | ||||
| # | ||||
| # @website     https://www.google.com | ||||
| # @provide-api yes (https://developers.google.com/web-search/docs/), deprecated! | ||||
| # @provide-api yes (https://developers.google.com/web-search/docs/), | ||||
| #              deprecated! | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     JSON | ||||
|  | ||||
| @ -39,16 +39,16 @@ def response(resp): | ||||
|         url = result_base_url.format(osm_type=osm_type, | ||||
|                                      osm_id=r['osm_id']) | ||||
| 
 | ||||
|         osm = {'type':osm_type, | ||||
|                'id':r['osm_id']} | ||||
|         osm = {'type': osm_type, | ||||
|                'id': r['osm_id']} | ||||
| 
 | ||||
|         geojson =  r.get('geojson') | ||||
|         geojson = r.get('geojson') | ||||
| 
 | ||||
|         # if no geojson is found and osm_type is a node, add geojson Point | ||||
|         if not geojson and\ | ||||
|            osm_type == 'node': | ||||
|             geojson = {u'type':u'Point',  | ||||
|                        u'coordinates':[r['lon'],r['lat']]} | ||||
|             geojson = {u'type': u'Point', | ||||
|                        u'coordinates': [r['lon'], r['lat']]} | ||||
| 
 | ||||
|         address_raw = r.get('address') | ||||
|         address = {} | ||||
| @ -59,20 +59,20 @@ def response(resp): | ||||
|            r['class'] == 'tourism' or\ | ||||
|            r['class'] == 'leisure': | ||||
|             if address_raw.get('address29'): | ||||
|                 address = {'name':address_raw.get('address29')} | ||||
|                 address = {'name': address_raw.get('address29')} | ||||
|             else: | ||||
|                 address = {'name':address_raw.get(r['type'])} | ||||
|                 address = {'name': address_raw.get(r['type'])} | ||||
| 
 | ||||
|         # add rest of adressdata, if something is already found | ||||
|         if address.get('name'): | ||||
|             address.update({'house_number':address_raw.get('house_number'), | ||||
|                        'road':address_raw.get('road'), | ||||
|                        'locality':address_raw.get('city', | ||||
|                                   address_raw.get('town',  | ||||
|                                   address_raw.get('village'))), | ||||
|                        'postcode':address_raw.get('postcode'), | ||||
|                        'country':address_raw.get('country'), | ||||
|                        'country_code':address_raw.get('country_code')}) | ||||
|             address.update({'house_number': address_raw.get('house_number'), | ||||
|                            'road': address_raw.get('road'), | ||||
|                            'locality': address_raw.get('city', | ||||
|                                        address_raw.get('town', | ||||
|                                        address_raw.get('village'))), | ||||
|                            'postcode': address_raw.get('postcode'), | ||||
|                            'country': address_raw.get('country'), | ||||
|                            'country_code': address_raw.get('country_code')}) | ||||
|         else: | ||||
|             address = None | ||||
| 
 | ||||
|  | ||||
| @ -1,7 +1,8 @@ | ||||
| ## Vimeo (Videos) | ||||
| # | ||||
| # @website     https://vimeo.com/ | ||||
| # @provide-api yes (http://developer.vimeo.com/api), they have a maximum count of queries/hour | ||||
| # @provide-api yes (http://developer.vimeo.com/api), | ||||
| #              they have a maximum count of queries/hour | ||||
| # | ||||
| # @using-api   no (TODO, rewrite to api) | ||||
| # @results     HTML (using search portal) | ||||
| @ -35,11 +36,12 @@ publishedDate_xpath = './/p[@class="meta"]//attribute::datetime' | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     params['url'] = search_url.format(pageno=params['pageno'] , | ||||
|     params['url'] = search_url.format(pageno=params['pageno'], | ||||
|                                       query=urlencode({'q': query})) | ||||
| 
 | ||||
|     # TODO required? | ||||
|     params['cookies']['__utma'] = '00000000.000#0000000.0000000000.0000000000.0000000000.0' | ||||
|     params['cookies']['__utma'] =\ | ||||
|         '00000000.000#0000000.0000000000.0000000000.0000000000.0' | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
|  | ||||
| @ -2,13 +2,25 @@ import json | ||||
| from requests import get | ||||
| from urllib import urlencode | ||||
| 
 | ||||
| resultCount=1 | ||||
| urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}' | ||||
| urlDetail = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=labels%7Cinfo%7Csitelinks%7Csitelinks%2Furls%7Cdescriptions%7Cclaims&{query}' | ||||
| urlMap = 'https://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M' | ||||
| result_count = 1 | ||||
| wikidata_host = 'https://www.wikidata.org' | ||||
| wikidata_api = wikidata_host + '/w/api.php' | ||||
| url_search = wikidata_api \ | ||||
|     + '?action=query&list=search&format=json'\ | ||||
|     + '&srnamespace=0&srprop=sectiontitle&{query}' | ||||
| url_detail = wikidata_api\ | ||||
|     + '?action=wbgetentities&format=json'\ | ||||
|     + '&props=labels%7Cinfo%7Csitelinks'\ | ||||
|     + '%7Csitelinks%2Furls%7Cdescriptions%7Cclaims'\ | ||||
|     + '&{query}' | ||||
| url_map = 'https://www.openstreetmap.org/'\ | ||||
|     + '?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     params['url'] = urlSearch.format(query=urlencode({'srsearch': query, 'srlimit': resultCount})) | ||||
|     params['url'] = url_search.format( | ||||
|         query=urlencode({'srsearch': query, | ||||
|                         'srlimit': result_count})) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| @ -23,7 +35,8 @@ def response(resp): | ||||
|     language = resp.search_params['language'].split('_')[0] | ||||
|     if language == 'all': | ||||
|         language = 'en' | ||||
|     url = urlDetail.format(query=urlencode({'ids': '|'.join(wikidata_ids), 'languages': language + '|en'})) | ||||
|     url = url_detail.format(query=urlencode({'ids': '|'.join(wikidata_ids), | ||||
|                                             'languages': language + '|en'})) | ||||
| 
 | ||||
|     htmlresponse = get(url) | ||||
|     jsonresponse = json.loads(htmlresponse.content) | ||||
| @ -32,6 +45,7 @@ def response(resp): | ||||
| 
 | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def getDetail(jsonresponse, wikidata_id, language): | ||||
|     results = [] | ||||
|     urls = [] | ||||
| @ -40,60 +54,103 @@ def getDetail(jsonresponse, wikidata_id, language): | ||||
|     result = jsonresponse.get('entities', {}).get(wikidata_id, {}) | ||||
| 
 | ||||
|     title = result.get('labels', {}).get(language, {}).get('value', None) | ||||
|     if title == None: | ||||
|     if title is None: | ||||
|         title = result.get('labels', {}).get('en', {}).get('value', None) | ||||
|     if title == None: | ||||
|     if title is None: | ||||
|         return results | ||||
| 
 | ||||
|     description = result.get('descriptions', {}).get(language, {}).get('value', None) | ||||
|     if description == None: | ||||
|         description = result.get('descriptions', {}).get('en', {}).get('value', '') | ||||
|     description = result\ | ||||
|         .get('descriptions', {})\ | ||||
|         .get(language, {})\ | ||||
|         .get('value', None) | ||||
| 
 | ||||
|     if description is None: | ||||
|         description = result\ | ||||
|             .get('descriptions', {})\ | ||||
|             .get('en', {})\ | ||||
|             .get('value', '') | ||||
| 
 | ||||
|     claims = result.get('claims', {}) | ||||
|     official_website = get_string(claims, 'P856', None) | ||||
|     if official_website != None: | ||||
|         urls.append({ 'title' : 'Official site', 'url': official_website }) | ||||
|         results.append({ 'title': title, 'url' : official_website }) | ||||
|     if official_website is not None: | ||||
|         urls.append({'title': 'Official site', 'url': official_website}) | ||||
|         results.append({'title': title, 'url': official_website}) | ||||
| 
 | ||||
|     wikipedia_link_count = 0 | ||||
|     if language != 'en': | ||||
|         wikipedia_link_count += add_url(urls, 'Wikipedia (' + language + ')', get_wikilink(result, language + 'wiki')) | ||||
|         wikipedia_link_count += add_url(urls, | ||||
|                                         'Wikipedia (' + language + ')', | ||||
|                                         get_wikilink(result, language + | ||||
|                                                      'wiki')) | ||||
|     wikipedia_en_link = get_wikilink(result, 'enwiki') | ||||
|     wikipedia_link_count += add_url(urls, 'Wikipedia (en)', wikipedia_en_link) | ||||
|     wikipedia_link_count += add_url(urls, | ||||
|                                     'Wikipedia (en)', | ||||
|                                     wikipedia_en_link) | ||||
|     if wikipedia_link_count == 0: | ||||
|         misc_language = get_wiki_firstlanguage(result, 'wiki') | ||||
|         if misc_language != None: | ||||
|             add_url(urls, 'Wikipedia (' + misc_language + ')', get_wikilink(result, misc_language + 'wiki')) | ||||
|         if misc_language is not None: | ||||
|             add_url(urls, | ||||
|                     'Wikipedia (' + misc_language + ')', | ||||
|                     get_wikilink(result, misc_language + 'wiki')) | ||||
| 
 | ||||
|     if language != 'en': | ||||
|         add_url(urls, 'Wiki voyage (' + language + ')', get_wikilink(result, language + 'wikivoyage')) | ||||
|     add_url(urls, 'Wiki voyage (en)', get_wikilink(result, 'enwikivoyage')) | ||||
|         add_url(urls, | ||||
|                 'Wiki voyage (' + language + ')', | ||||
|                 get_wikilink(result, language + 'wikivoyage')) | ||||
| 
 | ||||
|     add_url(urls, | ||||
|             'Wiki voyage (en)', | ||||
|             get_wikilink(result, 'enwikivoyage')) | ||||
| 
 | ||||
|     if language != 'en': | ||||
|         add_url(urls, 'Wikiquote (' + language + ')', get_wikilink(result, language + 'wikiquote')) | ||||
|     add_url(urls, 'Wikiquote (en)', get_wikilink(result, 'enwikiquote')) | ||||
|         add_url(urls, | ||||
|                 'Wikiquote (' + language + ')', | ||||
|                 get_wikilink(result, language + 'wikiquote')) | ||||
| 
 | ||||
|     add_url(urls, 'Commons wiki', get_wikilink(result, 'commonswiki')) | ||||
|     add_url(urls, | ||||
|             'Wikiquote (en)', | ||||
|             get_wikilink(result, 'enwikiquote')) | ||||
| 
 | ||||
|     add_url(urls, 'Location', get_geolink(claims, 'P625', None)) | ||||
|     add_url(urls, | ||||
|             'Commons wiki', | ||||
|             get_wikilink(result, 'commonswiki')) | ||||
| 
 | ||||
|     add_url(urls, 'Wikidata', 'https://www.wikidata.org/wiki/' + wikidata_id + '?uselang='+ language) | ||||
|     add_url(urls, | ||||
|             'Location', | ||||
|             get_geolink(claims, 'P625', None)) | ||||
| 
 | ||||
|     add_url(urls, | ||||
|             'Wikidata', | ||||
|             'https://www.wikidata.org/wiki/' | ||||
|             + wikidata_id + '?uselang=' + language) | ||||
| 
 | ||||
|     musicbrainz_work_id = get_string(claims, 'P435') | ||||
|     if musicbrainz_work_id != None: | ||||
|         add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/work/' + musicbrainz_work_id) | ||||
|     if musicbrainz_work_id is not None: | ||||
|         add_url(urls, | ||||
|                 'MusicBrainz', | ||||
|                 'http://musicbrainz.org/work/' | ||||
|                 + musicbrainz_work_id) | ||||
| 
 | ||||
|     musicbrainz_artist_id = get_string(claims, 'P434') | ||||
|     if musicbrainz_artist_id != None: | ||||
|         add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/artist/' + musicbrainz_artist_id) | ||||
|     if musicbrainz_artist_id is not None: | ||||
|         add_url(urls, | ||||
|                 'MusicBrainz', | ||||
|                 'http://musicbrainz.org/artist/' | ||||
|                 + musicbrainz_artist_id) | ||||
| 
 | ||||
|     musicbrainz_release_group_id = get_string(claims, 'P436') | ||||
|     if musicbrainz_release_group_id != None: | ||||
|         add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/release-group/' + musicbrainz_release_group_id) | ||||
|     if musicbrainz_release_group_id is not None: | ||||
|         add_url(urls, | ||||
|                 'MusicBrainz', | ||||
|                 'http://musicbrainz.org/release-group/' | ||||
|                 + musicbrainz_release_group_id) | ||||
| 
 | ||||
|     musicbrainz_label_id = get_string(claims, 'P966') | ||||
|     if musicbrainz_label_id != None: | ||||
|         add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/label/' + musicbrainz_label_id) | ||||
|     if musicbrainz_label_id is not None: | ||||
|         add_url(urls, | ||||
|                 'MusicBrainz', | ||||
|                 'http://musicbrainz.org/label/' | ||||
|                 + musicbrainz_label_id) | ||||
| 
 | ||||
|     # musicbrainz_area_id = get_string(claims, 'P982') | ||||
|     # P1407 MusicBrainz series ID | ||||
| @ -102,42 +159,43 @@ def getDetail(jsonresponse, wikidata_id, language): | ||||
|     # P1407 MusicBrainz series ID | ||||
| 
 | ||||
|     postal_code = get_string(claims, 'P281', None) | ||||
|     if postal_code != None: | ||||
|         attributes.append({'label' : 'Postal code(s)', 'value' : postal_code}) | ||||
|     if postal_code is not None: | ||||
|         attributes.append({'label': 'Postal code(s)', 'value': postal_code}) | ||||
| 
 | ||||
|     date_of_birth = get_time(claims, 'P569', None) | ||||
|     if date_of_birth != None: | ||||
|         attributes.append({'label' : 'Date of birth', 'value' : date_of_birth}) | ||||
|     if date_of_birth is not None: | ||||
|         attributes.append({'label': 'Date of birth', 'value': date_of_birth}) | ||||
| 
 | ||||
|     date_of_death = get_time(claims, 'P570', None) | ||||
|     if date_of_death != None: | ||||
|         attributes.append({'label' : 'Date of death', 'value' : date_of_death}) | ||||
|     if date_of_death is not None: | ||||
|         attributes.append({'label': 'Date of death', 'value': date_of_death}) | ||||
| 
 | ||||
|     if len(attributes)==0 and len(urls)==2 and len(description)==0: | ||||
|     if len(attributes) == 0 and len(urls) == 2 and len(description) == 0: | ||||
|         results.append({ | ||||
|                 'url': urls[0]['url'], | ||||
|                 'title': title, | ||||
|                 'content': description | ||||
|                 }) | ||||
|                        'url': urls[0]['url'], | ||||
|                        'title': title, | ||||
|                        'content': description | ||||
|                        }) | ||||
|     else: | ||||
|         results.append({ | ||||
|                 'infobox' : title, | ||||
|                 'id' : wikipedia_en_link, | ||||
|                 'content' : description, | ||||
|                 'attributes' : attributes, | ||||
|                 'urls' : urls | ||||
|                 }) | ||||
|                        'infobox': title, | ||||
|                        'id': wikipedia_en_link, | ||||
|                        'content': description, | ||||
|                        'attributes': attributes, | ||||
|                        'urls': urls | ||||
|                        }) | ||||
| 
 | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def add_url(urls, title, url): | ||||
|     if url != None: | ||||
|         urls.append({'title' : title, 'url' : url}) | ||||
|     if url is not None: | ||||
|         urls.append({'title': title, 'url': url}) | ||||
|         return 1 | ||||
|     else: | ||||
|         return 0 | ||||
| 
 | ||||
| 
 | ||||
| def get_mainsnak(claims, propertyName): | ||||
|     propValue = claims.get(propertyName, {}) | ||||
|     if len(propValue) == 0: | ||||
| @ -157,7 +215,7 @@ def get_string(claims, propertyName, defaultValue=None): | ||||
|         mainsnak = e.get('mainsnak', {}) | ||||
| 
 | ||||
|         datavalue = mainsnak.get('datavalue', {}) | ||||
|         if datavalue != None: | ||||
|         if datavalue is not None: | ||||
|             result.append(datavalue.get('value', '')) | ||||
| 
 | ||||
|     if len(result) == 0: | ||||
| @ -177,7 +235,7 @@ def get_time(claims, propertyName, defaultValue=None): | ||||
|         mainsnak = e.get('mainsnak', {}) | ||||
| 
 | ||||
|         datavalue = mainsnak.get('datavalue', {}) | ||||
|         if datavalue != None: | ||||
|         if datavalue is not None: | ||||
|             value = datavalue.get('value', '') | ||||
|             result.append(value.get('time', '')) | ||||
| 
 | ||||
| @ -190,7 +248,7 @@ def get_time(claims, propertyName, defaultValue=None): | ||||
| def get_geolink(claims, propertyName, defaultValue=''): | ||||
|     mainsnak = get_mainsnak(claims, propertyName) | ||||
| 
 | ||||
|     if mainsnak == None: | ||||
|     if mainsnak is None: | ||||
|         return defaultValue | ||||
| 
 | ||||
|     datatype = mainsnak.get('datatype', '') | ||||
| @ -209,21 +267,25 @@ def get_geolink(claims, propertyName, defaultValue=''): | ||||
|     # 1 --> 6 | ||||
|     # 0.016666666666667 --> 9 | ||||
|     # 0.00027777777777778 --> 19 | ||||
|     # wolframalpha : quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}} | ||||
|     # wolframalpha : | ||||
|     # quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}} | ||||
|     # 14.1186-8.8322 x+0.625447 x^2 | ||||
|     if precision < 0.0003: | ||||
|         zoom = 19 | ||||
|     else: | ||||
|         zoom = int(15 - precision*8.8322 + precision*precision*0.625447) | ||||
| 
 | ||||
|     url = urlMap.replace('{latitude}', str(value.get('latitude',0))).replace('{longitude}', str(value.get('longitude',0))).replace('{zoom}', str(zoom)) | ||||
|     url = url_map\ | ||||
|         .replace('{latitude}', str(value.get('latitude', 0)))\ | ||||
|         .replace('{longitude}', str(value.get('longitude', 0)))\ | ||||
|         .replace('{zoom}', str(zoom)) | ||||
| 
 | ||||
|     return url | ||||
| 
 | ||||
| 
 | ||||
| def get_wikilink(result, wikiid): | ||||
|     url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None) | ||||
|     if url == None: | ||||
|     if url is None: | ||||
|         return url | ||||
|     elif url.startswith('http://'): | ||||
|         url = url.replace('http://', 'https://') | ||||
| @ -231,8 +293,9 @@ def get_wikilink(result, wikiid): | ||||
|         url = 'https:' + url | ||||
|     return url | ||||
| 
 | ||||
| 
 | ||||
| def get_wiki_firstlanguage(result, wikipatternid): | ||||
|     for k in result.get('sitelinks', {}).keys(): | ||||
|         if k.endswith(wikipatternid) and len(k)==(2+len(wikipatternid)): | ||||
|         if k.endswith(wikipatternid) and len(k) == (2+len(wikipatternid)): | ||||
|             return k[0:2] | ||||
|     return None | ||||
|  | ||||
| @ -1,7 +1,8 @@ | ||||
| ## Yacy (Web, Images, Videos, Music, Files) | ||||
| # | ||||
| # @website     http://yacy.net | ||||
| # @provide-api yes (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch) | ||||
| # @provide-api yes | ||||
| #              (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch) | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     JSON | ||||
| @ -16,7 +17,7 @@ from urllib import urlencode | ||||
| from dateutil import parser | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general', 'images'] #TODO , 'music', 'videos', 'files' | ||||
| categories = ['general', 'images']  # TODO , 'music', 'videos', 'files' | ||||
| paging = True | ||||
| language_support = True | ||||
| number_of_results = 5 | ||||
|  | ||||
| @ -1,7 +1,8 @@ | ||||
| ## Yahoo (Web) | ||||
| # | ||||
| # @website     https://search.yahoo.com/web | ||||
| # @provide-api yes (https://developer.yahoo.com/boss/search/), $0.80/1000 queries | ||||
| # @provide-api yes (https://developer.yahoo.com/boss/search/), | ||||
| #              $0.80/1000 queries | ||||
| # | ||||
| # @using-api   no (because pricing) | ||||
| # @results     HTML (using search portal) | ||||
| @ -40,7 +41,7 @@ def parse_url(url_string): | ||||
|         if endpos > -1: | ||||
|             endpositions.append(endpos) | ||||
| 
 | ||||
|     if start==0 or len(endpositions) == 0: | ||||
|     if start == 0 or len(endpositions) == 0: | ||||
|         return url_string | ||||
|     else: | ||||
|         end = min(endpositions) | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user