Merge pull request #104 from dalf/master
[enh] add infoboxes and answers, [fix] when two results are merged, really use the content with more text
This commit is contained in:
		
						commit
						67b69619ba
					
				| @ -38,16 +38,14 @@ def response(resp): | ||||
|     except: | ||||
|         return results | ||||
| 
 | ||||
|     title = '{0} {1} in {2} is {3}'.format( | ||||
|     answer = '{0} {1} = {2} {3} (1 {1} = {4} {3})'.format( | ||||
|         resp.search_params['ammount'], | ||||
|         resp.search_params['from'], | ||||
|         resp.search_params['ammount'] * conversion_rate, | ||||
|         resp.search_params['to'], | ||||
|         resp.search_params['ammount'] * conversion_rate | ||||
|         conversion_rate | ||||
|     ) | ||||
| 
 | ||||
|     content = '1 {0} is {1} {2}'.format(resp.search_params['from'], | ||||
|                                         conversion_rate, | ||||
|                                         resp.search_params['to']) | ||||
|     now_date = datetime.now().strftime('%Y%m%d') | ||||
|     url = 'http://finance.yahoo.com/currency/converter-results/{0}/{1}-{2}-to-{3}.html'  # noqa | ||||
|     url = url.format( | ||||
| @ -56,6 +54,7 @@ def response(resp): | ||||
|         resp.search_params['from'].lower(), | ||||
|         resp.search_params['to'].lower() | ||||
|     ) | ||||
|     results.append({'title': title, 'content': content, 'url': url}) | ||||
| 
 | ||||
|     results.append({'answer' : answer, 'url': url}) | ||||
| 
 | ||||
|     return results | ||||
|  | ||||
| @ -1,10 +1,25 @@ | ||||
| import json | ||||
| from urllib import urlencode | ||||
| from lxml import html | ||||
| from searx.engines.xpath import extract_text | ||||
| 
 | ||||
| url = 'http://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1' | ||||
| url = 'https://api.duckduckgo.com/?{query}&format=json&pretty=0&no_redirect=1&d=1' | ||||
| 
 | ||||
| def result_to_text(url, text, htmlResult): | ||||
|     # TODO : remove result ending with "Meaning" or "Category" | ||||
|     dom = html.fromstring(htmlResult) | ||||
|     a = dom.xpath('//a') | ||||
|     if len(a)>=1: | ||||
|         return extract_text(a[0]) | ||||
|     else: | ||||
|         return text | ||||
| 
 | ||||
| def html_to_text(htmlFragment): | ||||
|     dom = html.fromstring(htmlFragment) | ||||
|     return extract_text(dom) | ||||
| 
 | ||||
| def request(query, params): | ||||
|     # TODO add kl={locale} | ||||
|     params['url'] = url.format(query=urlencode({'q': query})) | ||||
|     return params | ||||
| 
 | ||||
| @ -12,12 +27,104 @@ def request(query, params): | ||||
| def response(resp): | ||||
|     search_res = json.loads(resp.text) | ||||
|     results = [] | ||||
| 
 | ||||
|     content = '' | ||||
|     heading = search_res.get('Heading', '') | ||||
|     attributes = [] | ||||
|     urls = [] | ||||
|     infobox_id = None | ||||
|     relatedTopics = [] | ||||
| 
 | ||||
|     # add answer if there is one | ||||
|     answer = search_res.get('Answer', '') | ||||
|     if answer != '': | ||||
|         results.append({ 'answer' : html_to_text(answer) }) | ||||
| 
 | ||||
|     # add infobox | ||||
|     if 'Definition' in search_res: | ||||
|         if search_res.get('AbstractURL'): | ||||
|             res = {'title': search_res.get('Heading', ''), | ||||
|                    'content': search_res.get('Definition', ''), | ||||
|                    'url': search_res.get('AbstractURL', ''), | ||||
|                    'class': 'definition_result'} | ||||
|             results.append(res) | ||||
|         content = content + search_res.get('Definition', '')  | ||||
| 
 | ||||
|     if 'Abstract' in search_res: | ||||
|         content = content + search_res.get('Abstract', '') | ||||
| 
 | ||||
| 
 | ||||
|     # image | ||||
|     image = search_res.get('Image', '') | ||||
|     image = None if image == '' else image | ||||
| 
 | ||||
|     # attributes | ||||
|     if 'Infobox' in search_res: | ||||
|         infobox = search_res.get('Infobox', None) | ||||
|         if  'content' in infobox: | ||||
|             for info in infobox.get('content'): | ||||
|                 attributes.append({'label': info.get('label'), 'value': info.get('value')}) | ||||
| 
 | ||||
|     # urls | ||||
|     for ddg_result in search_res.get('Results', []): | ||||
|         if 'FirstURL' in ddg_result: | ||||
|             firstURL = ddg_result.get('FirstURL', '') | ||||
|             text = ddg_result.get('Text', '') | ||||
|             urls.append({'title':text, 'url':firstURL}) | ||||
|             results.append({'title':heading, 'url': firstURL}) | ||||
| 
 | ||||
|     # related topics | ||||
|     for ddg_result in search_res.get('RelatedTopics', None): | ||||
|         if 'FirstURL' in ddg_result: | ||||
|             suggestion = result_to_text(ddg_result.get('FirstURL', None), ddg_result.get('Text', None), ddg_result.get('Result', None)) | ||||
|             if suggestion != heading: | ||||
|                 results.append({'suggestion': suggestion}) | ||||
|         elif 'Topics' in ddg_result: | ||||
|             suggestions = [] | ||||
|             relatedTopics.append({ 'name' : ddg_result.get('Name', ''), 'suggestions': suggestions }) | ||||
|             for topic_result in ddg_result.get('Topics', []): | ||||
|                 suggestion = result_to_text(topic_result.get('FirstURL', None), topic_result.get('Text', None), topic_result.get('Result', None)) | ||||
|                 if suggestion != heading: | ||||
|                     suggestions.append(suggestion) | ||||
| 
 | ||||
|     # abstract | ||||
|     abstractURL = search_res.get('AbstractURL', '') | ||||
|     if abstractURL != '': | ||||
|         # add as result ? problem always in english | ||||
|         infobox_id = abstractURL | ||||
|         urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL}) | ||||
| 
 | ||||
|     # definition | ||||
|     definitionURL = search_res.get('DefinitionURL', '') | ||||
|     if definitionURL != '': | ||||
|         # add as result ? as answer ? problem always in english | ||||
|         infobox_id = definitionURL | ||||
|         urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) | ||||
| 
 | ||||
|     # entity | ||||
|     entity = search_res.get('Entity', None) | ||||
|     # TODO continent / country / department / location / waterfall / mountain range : link to map search, get weather, near by locations | ||||
|     # TODO musician : link to music search | ||||
|     # TODO concert tour : ?? | ||||
|     # TODO film / actor / television  / media franchise : links to IMDB / rottentomatoes (or scrap result) | ||||
|     # TODO music : link tu musicbrainz / last.fm | ||||
|     # TODO book : ?? | ||||
|     # TODO artist / playwright : ?? | ||||
|     # TODO compagny : ?? | ||||
|     # TODO software / os : ?? | ||||
|     # TODO software engineer : ?? | ||||
|     # TODO prepared food : ?? | ||||
|     # TODO website : ?? | ||||
|     # TODO performing art : ?? | ||||
|     # TODO prepared food : ?? | ||||
|     # TODO programming language : ?? | ||||
|     # TODO file format : ?? | ||||
| 
 | ||||
|     if len(heading)>0: | ||||
|         # TODO get infobox.meta.value where .label='article_title' | ||||
|         results.append({ | ||||
|                'infobox': heading, | ||||
|                'id': infobox_id, | ||||
|                'entity': entity, | ||||
|                'content': content, | ||||
|                'img_src' : image, | ||||
|                'attributes': attributes, | ||||
|                'urls': urls, | ||||
|                'relatedTopics': relatedTopics | ||||
|                }) | ||||
| 
 | ||||
|     return results | ||||
|  | ||||
							
								
								
									
										221
									
								
								searx/engines/wikidata.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										221
									
								
								searx/engines/wikidata.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,221 @@ | ||||
| import json | ||||
| from requests import get | ||||
| from urllib import urlencode | ||||
| from datetime import datetime | ||||
| 
 | ||||
| resultCount=2 | ||||
| urlSearch = 'https://www.wikidata.org/w/api.php?action=query&list=search&format=json&srnamespace=0&srprop=sectiontitle&{query}' | ||||
| urlDetail = 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&props=labels%7Cinfo%7Csitelinks%7Csitelinks%2Furls%7Cdescriptions%7Cclaims&{query}' | ||||
| urlMap = 'https://www.openstreetmap.org/?lat={latitude}&lon={longitude}&zoom={zoom}&layers=M' | ||||
| 
 | ||||
| def request(query, params): | ||||
|     params['url'] = urlSearch.format(query=urlencode({'srsearch': query, 'srlimit': resultCount})) | ||||
|     print params['url'] | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     results = [] | ||||
|     search_res = json.loads(resp.text) | ||||
| 
 | ||||
|     wikidata_ids = set() | ||||
|     for r in search_res.get('query', {}).get('search', {}): | ||||
|         wikidata_ids.add(r.get('title', '')) | ||||
| 
 | ||||
|     language = resp.search_params['language'].split('_')[0] | ||||
|     if language == 'all': | ||||
|         language = 'en' | ||||
|     url = urlDetail.format(query=urlencode({'ids': '|'.join(wikidata_ids), 'languages': language + '|en'})) | ||||
| 
 | ||||
|     before = datetime.now() | ||||
|     htmlresponse = get(url) | ||||
|     print datetime.now() - before | ||||
|     jsonresponse = json.loads(htmlresponse.content) | ||||
|     for wikidata_id in wikidata_ids: | ||||
|         results = results + getDetail(jsonresponse, wikidata_id, language) | ||||
| 
 | ||||
|     return results | ||||
| 
 | ||||
| def getDetail(jsonresponse, wikidata_id, language): | ||||
|     result = jsonresponse.get('entities', {}).get(wikidata_id, {}) | ||||
|      | ||||
|     title = result.get('labels', {}).get(language, {}).get('value', None) | ||||
|     if title == None: | ||||
|         title = result.get('labels', {}).get('en', {}).get('value', wikidata_id) | ||||
|     results = [] | ||||
|     urls = [] | ||||
|     attributes = [] | ||||
| 
 | ||||
|     description = result.get('descriptions', {}).get(language, {}).get('value', '') | ||||
|     if description == '': | ||||
|         description = result.get('descriptions', {}).get('en', {}).get('value', '') | ||||
| 
 | ||||
|     claims = result.get('claims', {}) | ||||
|     official_website = get_string(claims, 'P856', None) | ||||
|     if official_website != None: | ||||
|         urls.append({ 'title' : 'Official site', 'url': official_website }) | ||||
|         results.append({ 'title': title, 'url' : official_website }) | ||||
| 
 | ||||
|     if language != 'en': | ||||
|         add_url(urls, 'Wikipedia (' + language + ')', get_wikilink(result, language + 'wiki')) | ||||
|     wikipedia_en_link = get_wikilink(result, 'enwiki') | ||||
|     add_url(urls, 'Wikipedia (en)', wikipedia_en_link) | ||||
| 
 | ||||
|     if language != 'en': | ||||
|         add_url(urls, 'Wiki voyage (' + language + ')', get_wikilink(result, language + 'wikivoyage')) | ||||
|     add_url(urls, 'Wiki voyage (en)', get_wikilink(result, 'enwikivoyage')) | ||||
| 
 | ||||
|     if language != 'en': | ||||
|         add_url(urls, 'Wikiquote (' + language + ')', get_wikilink(result, language + 'wikiquote')) | ||||
|     add_url(urls, 'Wikiquote (en)', get_wikilink(result, 'enwikiquote')) | ||||
| 
 | ||||
|     add_url(urls, 'Commons wiki', get_wikilink(result, 'commonswiki')) | ||||
| 
 | ||||
|     add_url(urls, 'Location', get_geolink(claims, 'P625', None)) | ||||
| 
 | ||||
|     add_url(urls, 'Wikidata', 'https://www.wikidata.org/wiki/' + wikidata_id + '?uselang='+ language) | ||||
| 
 | ||||
|     musicbrainz_work_id = get_string(claims, 'P435') | ||||
|     if musicbrainz_work_id != None: | ||||
|         add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/work/' + musicbrainz_work_id) | ||||
| 
 | ||||
|     musicbrainz_artist_id = get_string(claims, 'P434') | ||||
|     if musicbrainz_artist_id != None: | ||||
|         add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/artist/' + musicbrainz_artist_id) | ||||
| 
 | ||||
|     musicbrainz_release_group_id = get_string(claims, 'P436') | ||||
|     if musicbrainz_release_group_id != None: | ||||
|         add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/release-group/' + musicbrainz_release_group_id) | ||||
|      | ||||
|     musicbrainz_label_id = get_string(claims, 'P966') | ||||
|     if musicbrainz_label_id != None: | ||||
|         add_url(urls, 'MusicBrainz', 'http://musicbrainz.org/label/' + musicbrainz_label_id) | ||||
| 
 | ||||
|     # musicbrainz_area_id = get_string(claims, 'P982') | ||||
|     # P1407 MusicBrainz series ID | ||||
|     # P1004 MusicBrainz place ID | ||||
|     # P1330 MusicBrainz instrument ID | ||||
|     # P1407 MusicBrainz series ID | ||||
| 
 | ||||
|     postal_code = get_string(claims, 'P281', None) | ||||
|     if postal_code != None: | ||||
|         attributes.append({'label' : 'Postal code(s)', 'value' : postal_code}) | ||||
| 
 | ||||
|     date_of_birth = get_time(claims, 'P569', None) | ||||
|     if date_of_birth != None: | ||||
|         attributes.append({'label' : 'Date of birth', 'value' : date_of_birth}) | ||||
| 
 | ||||
|     date_of_death = get_time(claims, 'P570', None) | ||||
|     if date_of_death != None: | ||||
|         attributes.append({'label' : 'Date of death', 'value' : date_of_death}) | ||||
| 
 | ||||
| 
 | ||||
|     results.append({ | ||||
|             'infobox' : title,  | ||||
|             'id' : wikipedia_en_link, | ||||
|             'content' : description, | ||||
|             'attributes' : attributes, | ||||
|             'urls' : urls | ||||
|             }) | ||||
| 
 | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| def add_url(urls, title, url): | ||||
|     if url != None: | ||||
|         urls.append({'title' : title, 'url' : url}) | ||||
| 
 | ||||
| 
 | ||||
| def get_mainsnak(claims, propertyName): | ||||
|     propValue = claims.get(propertyName, {}) | ||||
|     if len(propValue) == 0: | ||||
|         return None | ||||
| 
 | ||||
|     propValue = propValue[0].get('mainsnak', None) | ||||
|     return propValue | ||||
| 
 | ||||
| 
 | ||||
| def get_string(claims, propertyName, defaultValue=None): | ||||
|     propValue = claims.get(propertyName, {}) | ||||
|     if len(propValue) == 0: | ||||
|         return defaultValue | ||||
| 
 | ||||
|     result = [] | ||||
|     for e in propValue: | ||||
|         mainsnak = e.get('mainsnak', {}) | ||||
| 
 | ||||
|         datatype = mainsnak.get('datatype', '') | ||||
|         datavalue = mainsnak.get('datavalue', {}) | ||||
|         if datavalue != None: | ||||
|             result.append(datavalue.get('value', '')) | ||||
| 
 | ||||
|     if len(result) == 0: | ||||
|         return defaultValue | ||||
|     else: | ||||
|         return ', '.join(result) | ||||
| 
 | ||||
| 
 | ||||
| def get_time(claims, propertyName, defaultValue=None): | ||||
|     propValue = claims.get(propertyName, {}) | ||||
|     if len(propValue) == 0: | ||||
|         return defaultValue | ||||
| 
 | ||||
|     result = [] | ||||
|     for e in propValue: | ||||
|         mainsnak = e.get('mainsnak', {}) | ||||
| 
 | ||||
|         datatype = mainsnak.get('datatype', '') | ||||
|         datavalue = mainsnak.get('datavalue', {}) | ||||
|         if datavalue != None: | ||||
|             value = datavalue.get('value', '') | ||||
|             result.append(value.get('time', '')) | ||||
| 
 | ||||
|     if len(result) == 0: | ||||
|         return defaultValue | ||||
|     else: | ||||
|         return ', '.join(result) | ||||
| 
 | ||||
| 
 | ||||
| def get_geolink(claims, propertyName, defaultValue=''): | ||||
|     mainsnak = get_mainsnak(claims, propertyName) | ||||
| 
 | ||||
|     if mainsnak == None: | ||||
|         return defaultValue | ||||
| 
 | ||||
|     datatype = mainsnak.get('datatype', '') | ||||
|     datavalue = mainsnak.get('datavalue', {}) | ||||
| 
 | ||||
|     if datatype != 'globe-coordinate': | ||||
|         return defaultValue | ||||
| 
 | ||||
|     value = datavalue.get('value', {}) | ||||
| 
 | ||||
|     precision = value.get('precision', 0.0002) | ||||
| 
 | ||||
|     # there is no zoom information, deduce from precision (error prone)     | ||||
|     # samples : | ||||
|     # 13 --> 5 | ||||
|     # 1 --> 6 | ||||
|     # 0.016666666666667 --> 9 | ||||
|     # 0.00027777777777778 --> 19 | ||||
|     # wolframalpha : quadratic fit { {13, 5}, {1, 6}, {0.0166666, 9}, {0.0002777777,19}} | ||||
|     # 14.1186-8.8322 x+0.625447 x^2 | ||||
|     if precision < 0.0003: | ||||
|         zoom = 19 | ||||
|     else: | ||||
|         zoom = int(15 - precision*8.8322 + precision*precision*0.625447) | ||||
| 
 | ||||
|     url = urlMap.replace('{latitude}', str(value.get('latitude',0))).replace('{longitude}', str(value.get('longitude',0))).replace('{zoom}', str(zoom)) | ||||
| 
 | ||||
|     return url | ||||
| 
 | ||||
| 
 | ||||
| def get_wikilink(result, wikiid): | ||||
|     url = result.get('sitelinks', {}).get(wikiid, {}).get('url', None) | ||||
|     if url == None: | ||||
|         return url | ||||
|     elif url.startswith('http://'): | ||||
|         url = url.replace('http://', 'https://') | ||||
|     elif url.startswith('//'): | ||||
|         url = 'https:' + url | ||||
|     return url | ||||
							
								
								
									
										113
									
								
								searx/search.py
									
									
									
									
									
								
							
							
						
						
									
										113
									
								
								searx/search.py
									
									
									
									
									
								
							| @ -16,6 +16,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| ''' | ||||
| 
 | ||||
| import grequests | ||||
| import re | ||||
| from itertools import izip_longest, chain | ||||
| from datetime import datetime | ||||
| from operator import itemgetter | ||||
| @ -38,17 +39,14 @@ def default_request_params(): | ||||
| 
 | ||||
| 
 | ||||
| # create a callback wrapper for the search engine results | ||||
| def make_callback(engine_name, results, suggestions, callback, params): | ||||
| def make_callback(engine_name, results, suggestions, answers, infoboxes, callback, params): | ||||
| 
 | ||||
|     # creating a callback wrapper for the search engine results | ||||
|     def process_callback(response, **kwargs): | ||||
|         cb_res = [] | ||||
|         response.search_params = params | ||||
| 
 | ||||
|         # update stats with current page-load-time | ||||
|         engines[engine_name].stats['page_load_time'] += \ | ||||
|             (datetime.now() - params['started']).total_seconds() | ||||
| 
 | ||||
|         # callback | ||||
|         try: | ||||
|             search_results = callback(response) | ||||
|         except Exception, e: | ||||
| @ -61,6 +59,7 @@ def make_callback(engine_name, results, suggestions, callback, params): | ||||
|                 engine_name, str(e)) | ||||
|             return | ||||
| 
 | ||||
|         # add results | ||||
|         for result in search_results: | ||||
|             result['engine'] = engine_name | ||||
| 
 | ||||
| @ -70,14 +69,37 @@ def make_callback(engine_name, results, suggestions, callback, params): | ||||
|                 suggestions.add(result['suggestion']) | ||||
|                 continue | ||||
| 
 | ||||
|             # if it is an answer, add it to list of answers | ||||
|             if 'answer' in result: | ||||
|                 answers.add(result['answer']) | ||||
|                 continue | ||||
| 
 | ||||
|             # if it is an infobox, add it to list of infoboxes | ||||
|             if 'infobox' in result: | ||||
|                 infoboxes.append(result) | ||||
|                 continue | ||||
| 
 | ||||
|             # append result | ||||
|             cb_res.append(result) | ||||
| 
 | ||||
|         results[engine_name] = cb_res | ||||
| 
 | ||||
|         # update stats with current page-load-time | ||||
|         engines[engine_name].stats['page_load_time'] += \ | ||||
|             (datetime.now() - params['started']).total_seconds() | ||||
| 
 | ||||
|     return process_callback | ||||
| 
 | ||||
| 
 | ||||
| # return the meaningful length of the content for a result | ||||
| def content_result_len(content): | ||||
|     if isinstance(content, basestring): | ||||
|         content = re.sub('[,;:!?\./\\\\ ()-_]', '', content) | ||||
|         return len(content)  | ||||
|     else: | ||||
|         return 0 | ||||
| 
 | ||||
| 
 | ||||
| # score results and remove duplications | ||||
| def score_results(results): | ||||
|     # calculate scoring parameters | ||||
| @ -99,8 +121,13 @@ def score_results(results): | ||||
|             res['host'] = res['host'].replace('www.', '', 1) | ||||
| 
 | ||||
|         res['engines'] = [res['engine']] | ||||
| 
 | ||||
|         weight = 1.0 | ||||
| 
 | ||||
|         # strip multiple spaces and cariage returns from content | ||||
|         if 'content' in res: | ||||
|             res['content'] = re.sub(' +', ' ', res['content'].strip().replace('\n', '')) | ||||
| 
 | ||||
|         # get weight of this engine if possible | ||||
|         if hasattr(engines[res['engine']], 'weight'): | ||||
|             weight = float(engines[res['engine']].weight) | ||||
| @ -108,9 +135,8 @@ def score_results(results): | ||||
|         # calculate score for that engine | ||||
|         score = int((flat_len - i) / engines_len) * weight + 1 | ||||
| 
 | ||||
|         duplicated = False | ||||
| 
 | ||||
|         # check for duplicates | ||||
|         duplicated = False | ||||
|         for new_res in results: | ||||
|             # remove / from the end of the url if required | ||||
|             p1 = res['parsed_url'].path[:-1] if res['parsed_url'].path.endswith('/') else res['parsed_url'].path  # noqa | ||||
| @ -127,7 +153,7 @@ def score_results(results): | ||||
|         # merge duplicates together | ||||
|         if duplicated: | ||||
|             # using content with more text | ||||
|             if res.get('content') > duplicated.get('content'): | ||||
|             if content_result_len(res.get('content', '')) > content_result_len(duplicated.get('content', '')): | ||||
|                 duplicated['content'] = res['content'] | ||||
| 
 | ||||
|             # increase result-score | ||||
| @ -186,6 +212,64 @@ def score_results(results): | ||||
|     return gresults | ||||
| 
 | ||||
| 
 | ||||
| def merge_two_infoboxes(infobox1, infobox2): | ||||
|     if 'urls' in infobox2: | ||||
|         urls1 = infobox1.get('urls', None) | ||||
|         if urls1 == None: | ||||
|             urls1 = [] | ||||
|             infobox1.set('urls', urls1) | ||||
| 
 | ||||
|         urlSet = set() | ||||
|         for url in infobox1.get('urls', []): | ||||
|             urlSet.add(url.get('url', None)) | ||||
|          | ||||
|         for url in infobox2.get('urls', []): | ||||
|             if url.get('url', None) not in urlSet: | ||||
|                 urls1.append(url) | ||||
| 
 | ||||
|     if 'attributes' in infobox2: | ||||
|         attributes1 = infobox1.get('attributes', None) | ||||
|         if attributes1 == None: | ||||
|             attributes1 = [] | ||||
|             infobox1.set('attributes', attributes1) | ||||
| 
 | ||||
|         attributeSet = set() | ||||
|         for attribute in infobox1.get('attributes', []): | ||||
|             if attribute.get('label', None) not in attributeSet: | ||||
|                 attributeSet.add(attribute.get('label', None)) | ||||
|          | ||||
|         for attribute in infobox2.get('attributes', []): | ||||
|             attributes1.append(attribute) | ||||
| 
 | ||||
|     if 'content' in infobox2: | ||||
|         content1 = infobox1.get('content', None) | ||||
|         content2 = infobox2.get('content', '') | ||||
|         if content1 != None: | ||||
|             if content_result_len(content2) > content_result_len(content1): | ||||
|                 infobox1['content'] = content2 | ||||
|         else: | ||||
|             infobox1.set('content', content2) | ||||
| 
 | ||||
| 
 | ||||
| def merge_infoboxes(infoboxes): | ||||
|     results = [] | ||||
|     infoboxes_id = {} | ||||
|     for infobox in infoboxes: | ||||
|         add_infobox = True | ||||
|         infobox_id = infobox.get('id', None) | ||||
|         if infobox_id != None: | ||||
|             existingIndex = infoboxes_id.get(infobox_id, None) | ||||
|             if existingIndex != None: | ||||
|                 merge_two_infoboxes(results[existingIndex], infobox) | ||||
|                 add_infobox=False | ||||
|              | ||||
|         if add_infobox: | ||||
|             results.append(infobox) | ||||
|             infoboxes_id[infobox_id] = len(results)-1 | ||||
| 
 | ||||
|     return results | ||||
| 
 | ||||
| 
 | ||||
| class Search(object): | ||||
| 
 | ||||
|     """Search information container""" | ||||
| @ -208,6 +292,8 @@ class Search(object): | ||||
| 
 | ||||
|         self.results = [] | ||||
|         self.suggestions = [] | ||||
|         self.answers = [] | ||||
|         self.infoboxes = [] | ||||
|         self.request_data = {} | ||||
| 
 | ||||
|         # set specific language if set | ||||
| @ -293,6 +379,8 @@ class Search(object): | ||||
|         requests = [] | ||||
|         results = {} | ||||
|         suggestions = set() | ||||
|         answers = set() | ||||
|         infoboxes = [] | ||||
| 
 | ||||
|         # increase number of searches | ||||
|         number_of_searches += 1 | ||||
| @ -337,6 +425,8 @@ class Search(object): | ||||
|                 selected_engine['name'], | ||||
|                 results, | ||||
|                 suggestions, | ||||
|                 answers, | ||||
|                 infoboxes, | ||||
|                 engine.response, | ||||
|                 request_params | ||||
|             ) | ||||
| @ -374,11 +464,14 @@ class Search(object): | ||||
|         # score results and remove duplications | ||||
|         results = score_results(results) | ||||
| 
 | ||||
|         # merge infoboxes according to their ids | ||||
|         infoboxes = merge_infoboxes(infoboxes) | ||||
| 
 | ||||
|         # update engine stats, using calculated score | ||||
|         for result in results: | ||||
|             for res_engine in result['engines']: | ||||
|                 engines[result['engine']]\ | ||||
|                     .stats['score_count'] += result['score'] | ||||
| 
 | ||||
|         # return results and suggestions | ||||
|         return results, suggestions | ||||
|         # return results, suggestions, answers and infoboxes | ||||
|         return results, suggestions, answers, infoboxes | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| server: | ||||
|     port : 8888 | ||||
|     secret_key : "ultrasecretkey" # change this! | ||||
|     debug : False # Debug mode, only for development | ||||
|     debug : True # Debug mode, only for development | ||||
|     request_timeout : 2.0 # seconds | ||||
|     base_url : False # Set custom base_url. Possible values: False or "https://your.custom.host/location/" | ||||
|     themes_path : "" # Custom ui themes path | ||||
| @ -44,6 +44,10 @@ engines: | ||||
|     engine : duckduckgo_definitions | ||||
|     shortcut : ddd | ||||
| 
 | ||||
|   - name : wikidata | ||||
|     engine : wikidata | ||||
|     shortcut : wd | ||||
| 
 | ||||
|   - name : duckduckgo | ||||
|     engine : duckduckgo | ||||
|     shortcut : ddg | ||||
|  | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @ -235,6 +235,17 @@ a { | ||||
| 		max-width: 54em; | ||||
| 		word-wrap:break-word; | ||||
| 		line-height: 1.24; | ||||
| 
 | ||||
| 		img { | ||||
| 		    float: left; | ||||
| 		    margin-right: 5px; | ||||
| 		    max-width: 200px; | ||||
| 		    max-height: 100px; | ||||
| 		} | ||||
| 		 | ||||
| 		br.last { | ||||
| 		    clear: both; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	.url { | ||||
| @ -384,15 +395,14 @@ tr { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #suggestions { | ||||
| #suggestions, #answers { | ||||
| 
 | ||||
|     	margin-top: 20px; | ||||
| 
 | ||||
| 	span { | ||||
| 		display: inline; | ||||
| 		margin: 0 2px 2px 2px; | ||||
| 		padding: 0; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| #suggestions, #answers, #infoboxes { | ||||
| 
 | ||||
| 	input { | ||||
| 		padding: 0; | ||||
| 		margin: 3px; | ||||
| @ -402,6 +412,7 @@ tr { | ||||
|         	color: @color-result-search-url-font; | ||||
| 		cursor: pointer; | ||||
| 	} | ||||
| 
 | ||||
|     	input[type="submit"] { | ||||
| 		text-decoration: underline; | ||||
|     	} | ||||
| @ -411,6 +422,53 @@ tr { | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| #infoboxes { | ||||
| 	   position: absolute; | ||||
| 	   top: 220px; | ||||
| 	   right: 20px; | ||||
| 	   margin: 0px 2px 5px 5px; | ||||
| 	   padding: 0px 2px 2px; | ||||
| 	   max-width: 21em; | ||||
| 
 | ||||
| 	   .infobox { | ||||
| 	   	    margin: 10px 0 10px; | ||||
| 	   	    border: 1px solid #ddd; | ||||
| 		    padding: 5px; | ||||
| 	   	    font-size: 0.8em; | ||||
| 
 | ||||
| 	   	    img { | ||||
| 		    	max-width: 20em; | ||||
| 			max-heigt: 12em; | ||||
| 			display: block; | ||||
| 			margin: 5px; | ||||
| 			padding: 5px; | ||||
| 		    } | ||||
| 
 | ||||
| 		    h2 { | ||||
| 		       margin: 0; | ||||
| 		    } | ||||
| 
 | ||||
| 		    table { | ||||
| 		    	  width: auto; | ||||
| 
 | ||||
| 			  td { | ||||
| 		       	     vertical-align: top; | ||||
| 		    	  } | ||||
| 
 | ||||
| 		    } | ||||
| 
 | ||||
| 		    input { | ||||
| 		    	  font-size: 1em; | ||||
| 		    } | ||||
| 
 | ||||
| 		    br { | ||||
| 		       clear: both; | ||||
| 		    } | ||||
| 
 | ||||
| 	   } | ||||
| } | ||||
| 
 | ||||
| #search_url { | ||||
| 	margin-top: 8px; | ||||
| 
 | ||||
| @ -453,16 +511,6 @@ tr { | ||||
| 
 | ||||
| @media screen and (max-width: @results-width) { | ||||
| 
 | ||||
| 	#categories { | ||||
| 		font-size: 90%; | ||||
| 		clear: both; | ||||
| 
 | ||||
| 		.checkbox_container { | ||||
| 			margin-top: 2px; | ||||
| 			margin: auto;  | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|     #results { | ||||
|         margin: auto; | ||||
|         padding: 0; | ||||
| @ -483,7 +531,33 @@ tr { | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| @media screen and (max-width: 70em) { | ||||
| @media screen and (max-width: 75em) { | ||||
| 
 | ||||
|        #infoboxes { | ||||
| 	   position: inherit; | ||||
| 	   max-width: inherit; | ||||
| 	    | ||||
| 	   .infobox { | ||||
| 	   	    clear:both; | ||||
| 	    | ||||
| 	   	   img { | ||||
| 	   	       float: left; | ||||
| 	       	       max-width: 10em; | ||||
| 	   	   } | ||||
| 	   } | ||||
| 
 | ||||
|        } | ||||
| 
 | ||||
| 	#categories { | ||||
| 		font-size: 90%; | ||||
| 		clear: both; | ||||
| 
 | ||||
| 		.checkbox_container { | ||||
| 			margin-top: 2px; | ||||
| 			margin: auto;  | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	.right { | ||||
| 		display: none; | ||||
| 		postion: fixed !important; | ||||
| @ -515,12 +589,6 @@ tr { | ||||
| 	.result { | ||||
| 		border-top: 1px solid @color-result-top-border; | ||||
| 		margin: 7px 0 6px 0; | ||||
| 
 | ||||
| 		img { | ||||
| 			max-width: 90%; | ||||
| 			width: auto; | ||||
| 			height: auto | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										44
									
								
								searx/templates/default/infobox.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								searx/templates/default/infobox.html
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,44 @@ | ||||
| <div class="infobox"> | ||||
|   <h2>{{ infobox.infobox }}</h2> | ||||
|   {% if infobox.img_src %}<img src="{{ infobox.img_src }}" />{% endif %} | ||||
|   <p>{{ infobox.entity }}</p> | ||||
|   <p>{{ infobox.content }}</p> | ||||
|   {% if infobox.attributes %} | ||||
|   <div class="attributes"> | ||||
|     <table> | ||||
|       {% for attribute in infobox.attributes %} | ||||
|       <tr><td>{{ attribute.label }}</td><td>{{ attribute.value }}</td></tr> | ||||
|       {% endfor %} | ||||
|     </table> | ||||
|   </div> | ||||
|   {% endif %} | ||||
| 
 | ||||
|   {% if infobox.urls %} | ||||
|   <div class="urls"> | ||||
|     <ul> | ||||
|       {% for url in infobox.urls %} | ||||
|       <li class="url"><a href="{{ url.url }}">{{ url.title }}</a></li> | ||||
|       {% endfor %} | ||||
|     </ul> | ||||
|   </div> | ||||
|   {% endif %} | ||||
| 
 | ||||
|   {% if infobox.relatedTopics %} | ||||
|   <div class="relatedTopics"> | ||||
|       {% for topic in infobox.relatedTopics %} | ||||
|       <div> | ||||
| 	<h3>{{ topic.name }}</h3> | ||||
| 	{% for suggestion in topic.suggestions %} | ||||
| 	<form method="{{ method or 'POST' }}" action="{{ url_for('index') }}"> | ||||
|             <input type="hidden" name="q" value="{{ suggestion }}"> | ||||
|             <input type="submit" value="{{ suggestion }}" /> | ||||
|         </form> | ||||
| 	{% endfor %} | ||||
|       </div> | ||||
|       {% endfor %} | ||||
|   </div> | ||||
|   {% endif %} | ||||
| 
 | ||||
|   <br /> | ||||
|    | ||||
| </div> | ||||
| @ -8,6 +8,6 @@ | ||||
|     <h3 class="result_title"><a href="{{ result.url }}">{{ result.title|safe }}</a></h3> | ||||
|     <p class="url">{{ result.pretty_url }} <a class="cache_link" href="https://web.archive.org/web/{{ result.url }}">cached</a></p> | ||||
| 	{% if result.publishedDate %}<p class="published_date">{{ result.publishedDate }}</p>{% endif %} | ||||
|     <p class="content">{% if result.content %}{{ result.content|safe }}<br />{% endif %}</p> | ||||
|     <p class="content">{% if result.img_src %}<img src="{{ result.img_src|safe }}" class="image" />{% endif %}{% if result.content %}{{ result.content|safe }}<br class="last"/>{% endif %}</p> | ||||
|   </div> | ||||
| </div> | ||||
|  | ||||
| @ -30,6 +30,14 @@ | ||||
|         </div> | ||||
|     </div> | ||||
| 
 | ||||
|     {% if answers %} | ||||
|     <div id="answers"><span>{{ _('Answers') }}</span> | ||||
|         {% for answer in answers %} | ||||
|         <span>{{ answer }}</span> | ||||
|         {% endfor %} | ||||
|     </div> | ||||
|     {% endif %} | ||||
| 
 | ||||
|     {% if suggestions %} | ||||
|     <div id="suggestions"><span>{{ _('Suggestions') }}</span> | ||||
|         {% for suggestion in suggestions %} | ||||
| @ -41,6 +49,14 @@ | ||||
|     </div> | ||||
|     {% endif %} | ||||
| 
 | ||||
|     {% if infoboxes %} | ||||
|     <div id="infoboxes"> | ||||
|       {% for infobox in infoboxes %} | ||||
|          {% include 'default/infobox.html' %} | ||||
|       {% endfor %} | ||||
|     </div> | ||||
|     {% endif %}     | ||||
| 
 | ||||
|     {% for result in results %} | ||||
|         {% if result['template'] %} | ||||
|             {% include 'default/result_templates/'+result['template'] %} | ||||
|  | ||||
| @ -43,6 +43,8 @@ class ViewsTestCase(SearxTestCase): | ||||
|     def test_index_html(self, search): | ||||
|         search.return_value = ( | ||||
|             self.test_results, | ||||
|             set(), | ||||
|             set(), | ||||
|             set() | ||||
|         ) | ||||
|         result = self.app.post('/', data={'q': 'test'}) | ||||
| @ -51,7 +53,7 @@ class ViewsTestCase(SearxTestCase): | ||||
|             result.data | ||||
|         ) | ||||
|         self.assertIn( | ||||
|             '<p class="content">first <span class="highlight">test</span> content<br /></p>',  # noqa | ||||
|             '<p class="content">first <span class="highlight">test</span> content<br class="last"/></p>',  # noqa | ||||
|             result.data | ||||
|         ) | ||||
| 
 | ||||
| @ -59,6 +61,8 @@ class ViewsTestCase(SearxTestCase): | ||||
|     def test_index_json(self, search): | ||||
|         search.return_value = ( | ||||
|             self.test_results, | ||||
|             set(), | ||||
|             set(), | ||||
|             set() | ||||
|         ) | ||||
|         result = self.app.post('/', data={'q': 'test', 'format': 'json'}) | ||||
| @ -75,6 +79,8 @@ class ViewsTestCase(SearxTestCase): | ||||
|     def test_index_csv(self, search): | ||||
|         search.return_value = ( | ||||
|             self.test_results, | ||||
|             set(), | ||||
|             set(), | ||||
|             set() | ||||
|         ) | ||||
|         result = self.app.post('/', data={'q': 'test', 'format': 'csv'}) | ||||
| @ -90,6 +96,8 @@ class ViewsTestCase(SearxTestCase): | ||||
|     def test_index_rss(self, search): | ||||
|         search.return_value = ( | ||||
|             self.test_results, | ||||
|             set(), | ||||
|             set(), | ||||
|             set() | ||||
|         ) | ||||
|         result = self.app.post('/', data={'q': 'test', 'format': 'rss'}) | ||||
|  | ||||
| @ -199,7 +199,7 @@ def index(): | ||||
|             'index.html', | ||||
|         ) | ||||
| 
 | ||||
|     search.results, search.suggestions = search.search(request) | ||||
|     search.results, search.suggestions, search.answers, search.infoboxes = search.search(request) | ||||
| 
 | ||||
|     for result in search.results: | ||||
| 
 | ||||
| @ -292,6 +292,8 @@ def index(): | ||||
|         pageno=search.pageno, | ||||
|         base_url=get_base_url(), | ||||
|         suggestions=search.suggestions, | ||||
|         answers=search.answers, | ||||
|         infoboxes=search.infoboxes, | ||||
|         theme=get_current_theme_name() | ||||
|     ) | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user