Merge pull request #1705 from dalf/template_paper
Theme: add a paper.html template and update of the science engines
This commit is contained in:
		
						commit
						fc389f009d
					
				| @ -311,3 +311,88 @@ the parameter ``template`` must be set to the desired type. | ||||
|    address.postcode          postcode of object | ||||
|    address.country           country of object | ||||
|    ========================= ===================================================== | ||||
| 
 | ||||
| .. _BibTeX format: https://www.bibtex.com/g/bibtex-format/ | ||||
| .. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types | ||||
| 
 | ||||
| .. list-table:: Parameter of the **paper** media type / | ||||
|                 see `BibTeX field types`_ and `BibTeX format`_ | ||||
|    :header-rows: 2 | ||||
|    :width: 100% | ||||
| 
 | ||||
|    * - result-parameter | ||||
|      - Python type | ||||
|      - information | ||||
| 
 | ||||
|    * - template | ||||
|      - :py:class:`str` | ||||
|      - is set to ``paper.html`` | ||||
| 
 | ||||
|    * - title | ||||
|      - :py:class:`str` | ||||
|      - title of the result | ||||
| 
 | ||||
|    * - content | ||||
|      - :py:class:`str` | ||||
|      - abstract | ||||
| 
 | ||||
|    * - comments | ||||
|      - :py:class:`str` | ||||
|      - free text display in italic below the content | ||||
| 
 | ||||
|    * - tags | ||||
|      - :py:class:`List <list>`\ [\ :py:class:`str`\ ] | ||||
|      - free tag list | ||||
| 
 | ||||
|    * - publishedDate | ||||
|      - :py:class:`datetime <datetime.datetime>` | ||||
|      - last publication date | ||||
| 
 | ||||
|    * - authors | ||||
|      - :py:class:`List <list>`\ [\ :py:class:`str`\ ] | ||||
|      - list of authors of the work (authors with a "s") | ||||
| 
 | ||||
|    * - editor | ||||
|      - :py:class:`str` | ||||
|      - list of editors of a book | ||||
| 
 | ||||
|    * - publisher | ||||
|      - :py:class:`str` | ||||
|      - name of the publisher | ||||
| 
 | ||||
|    * - journal | ||||
|      - :py:class:`str` | ||||
|      - name of the journal or magazine the article was | ||||
|        published in | ||||
| 
 | ||||
|    * - volume | ||||
|      - :py:class:`str` | ||||
|      - volume number | ||||
| 
 | ||||
|    * - pages | ||||
|      - :py:class:`str` | ||||
|      - page range where the article is | ||||
| 
 | ||||
|    * - number | ||||
|      - :py:class:`str` | ||||
|      - number of the report or the issue number for a journal article | ||||
| 
 | ||||
|    * - doi | ||||
|      - :py:class:`str` | ||||
|      - DOI number (like ``10.1038/d41586-018-07848-2``) | ||||
| 
 | ||||
|    * - issn | ||||
|      - :py:class:`str` | ||||
|      - ISSN number like ``1476-4687`` | ||||
| 
 | ||||
|    * - isbn | ||||
|      - :py:class:`str` | ||||
|      - ISBN number like ``9780201896831`` | ||||
| 
 | ||||
|    * - pdf_url | ||||
|      - :py:class:`str` | ||||
|      - URL to the full article, the PDF version | ||||
| 
 | ||||
|    * - html_url | ||||
|      - :py:class:`str` | ||||
|      - URL to full article, HTML version | ||||
|  | ||||
| @ -3,9 +3,10 @@ | ||||
|  ArXiV (Scientific preprints) | ||||
| """ | ||||
| 
 | ||||
| from lxml import html | ||||
| from lxml import etree | ||||
| from lxml.etree import XPath | ||||
| from datetime import datetime | ||||
| from searx.utils import eval_xpath_list, eval_xpath_getindex | ||||
| from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
| @ -17,7 +18,7 @@ about = { | ||||
|     "results": 'XML-RSS', | ||||
| } | ||||
| 
 | ||||
| categories = ['science'] | ||||
| categories = ['science', 'scientific publications'] | ||||
| paging = True | ||||
| 
 | ||||
| base_url = ( | ||||
| @ -27,6 +28,23 @@ base_url = ( | ||||
| # engine dependent config | ||||
| number_of_results = 10 | ||||
| 
 | ||||
| # xpaths | ||||
| arxiv_namespaces = { | ||||
|     "atom": "http://www.w3.org/2005/Atom", | ||||
|     "arxiv": "http://arxiv.org/schemas/atom", | ||||
| } | ||||
| xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces) | ||||
| xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces) | ||||
| xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces) | ||||
| xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces) | ||||
| xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces) | ||||
| xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces) | ||||
| xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces) | ||||
| xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces) | ||||
| xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces) | ||||
| xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces) | ||||
| xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces) | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     # basic search | ||||
| @ -41,30 +59,50 @@ def request(query, params): | ||||
| 
 | ||||
| def response(resp): | ||||
|     results = [] | ||||
|     dom = etree.fromstring(resp.content) | ||||
|     for entry in eval_xpath_list(dom, xpath_entry): | ||||
|         title = eval_xpath_getindex(entry, xpath_title, 0).text | ||||
| 
 | ||||
|     dom = html.fromstring(resp.content) | ||||
|         url = eval_xpath_getindex(entry, xpath_id, 0).text | ||||
|         abstract = eval_xpath_getindex(entry, xpath_summary, 0).text | ||||
| 
 | ||||
|     for entry in eval_xpath_list(dom, '//entry'): | ||||
|         title = eval_xpath_getindex(entry, './/title', 0).text | ||||
|         authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)] | ||||
| 
 | ||||
|         url = eval_xpath_getindex(entry, './/id', 0).text | ||||
|         #  doi | ||||
|         doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None) | ||||
|         doi = None if doi_element is None else doi_element.text | ||||
| 
 | ||||
|         content_string = '{doi_content}{abstract_content}' | ||||
|         # pdf | ||||
|         pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None) | ||||
|         pdf_url = None if pdf_element is None else pdf_element.attrib.get('href') | ||||
| 
 | ||||
|         abstract = eval_xpath_getindex(entry, './/summary', 0).text | ||||
|         # journal | ||||
|         journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None) | ||||
|         journal = None if journal_element is None else journal_element.text | ||||
| 
 | ||||
|         #  If a doi is available, add it to the snipppet | ||||
|         doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None) | ||||
|         doi_content = doi_element.text if doi_element is not None else '' | ||||
|         content = content_string.format(doi_content=doi_content, abstract_content=abstract) | ||||
|         # tags | ||||
|         tag_elements = eval_xpath(entry, xpath_category) | ||||
|         tags = [str(tag) for tag in tag_elements] | ||||
| 
 | ||||
|         if len(content) > 300: | ||||
|             content = content[0:300] + "..." | ||||
|         # TODO: center snippet on query term | ||||
|         # comments | ||||
|         comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None) | ||||
|         comments = None if comments_elements is None else comments_elements.text | ||||
| 
 | ||||
|         publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ') | ||||
|         publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ') | ||||
| 
 | ||||
|         res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content} | ||||
|         res_dict = { | ||||
|             'template': 'paper.html', | ||||
|             'url': url, | ||||
|             'title': title, | ||||
|             'publishedDate': publishedDate, | ||||
|             'content': abstract, | ||||
|             'doi': doi, | ||||
|             'authors': authors, | ||||
|             'journal': journal, | ||||
|             'tags': tags, | ||||
|             'comments': comments, | ||||
|             'pdf_url': pdf_url, | ||||
|         } | ||||
| 
 | ||||
|         results.append(res_dict) | ||||
| 
 | ||||
|  | ||||
							
								
								
									
										59
									
								
								searx/engines/crossref.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								searx/engines/crossref.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,59 @@ | ||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | ||||
| # lint: pylint | ||||
| """Semantic Scholar (Science) | ||||
| """ | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from searx.utils import html_to_text | ||||
| 
 | ||||
| about = { | ||||
|     "website": 'https://www.crossref.org/', | ||||
|     "wikidata_id": 'Q5188229', | ||||
|     "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc', | ||||
|     "use_official_api": False, | ||||
|     "require_api_key": False, | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| categories = ['science', 'scientific publications'] | ||||
| paging = True | ||||
| search_url = 'https://api.crossref.org/works' | ||||
| 
 | ||||
| 
 | ||||
| def request(query, params): | ||||
|     params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1))) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
|     res = resp.json() | ||||
|     results = [] | ||||
|     for record in res['message']['items']: | ||||
|         record_type = record['type'] | ||||
|         if record_type == 'book-chapter': | ||||
|             title = record['container-title'][0] | ||||
|             if record['title'][0].lower().strip() != title.lower().strip(): | ||||
|                 title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')' | ||||
|             journal = None | ||||
|         else: | ||||
|             title = html_to_text(record['title'][0]) | ||||
|             journal = record.get('container-title', [None])[0] | ||||
|         url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL'] | ||||
|         authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])] | ||||
|         isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])] | ||||
|         results.append( | ||||
|             { | ||||
|                 'template': 'paper.html', | ||||
|                 'url': url, | ||||
|                 'title': title, | ||||
|                 'journal': journal, | ||||
|                 'volume': record.get('volume'), | ||||
|                 'type': record['type'], | ||||
|                 'content': html_to_text(record.get('abstract', '')), | ||||
|                 'publisher': record.get('publisher'), | ||||
|                 'authors': authors, | ||||
|                 'doi': record['DOI'], | ||||
|                 'isbn': isbn, | ||||
|             } | ||||
|         ) | ||||
|     return results | ||||
| @ -13,10 +13,12 @@ Definitions`_. | ||||
| 
 | ||||
| from urllib.parse import urlencode | ||||
| from datetime import datetime | ||||
| from typing import Optional | ||||
| from lxml import html | ||||
| 
 | ||||
| from searx.utils import ( | ||||
|     eval_xpath, | ||||
|     eval_xpath_getindex, | ||||
|     eval_xpath_list, | ||||
|     extract_text, | ||||
| ) | ||||
| @ -46,7 +48,7 @@ about = { | ||||
| } | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['science'] | ||||
| categories = ['science', 'scientific publications'] | ||||
| paging = True | ||||
| language_support = True | ||||
| use_locale_domain = True | ||||
| @ -99,7 +101,43 @@ def request(query, params): | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| def response(resp): | ||||
| def parse_gs_a(text: Optional[str]): | ||||
|     """Parse the text written in green. | ||||
| 
 | ||||
|     Possible formats: | ||||
|     * "{authors} - {journal}, {year} - {publisher}" | ||||
|     * "{authors} - {year} - {publisher}" | ||||
|     * "{authors} - {publisher}" | ||||
|     """ | ||||
|     if text is None or text == "": | ||||
|         return None, None, None, None | ||||
| 
 | ||||
|     s_text = text.split(' - ') | ||||
|     authors = s_text[0].split(', ') | ||||
|     publisher = s_text[-1] | ||||
|     if len(s_text) != 3: | ||||
|         return authors, None, publisher, None | ||||
| 
 | ||||
|     # the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}" | ||||
|     # get journal and year | ||||
|     journal_year = s_text[1].split(', ') | ||||
|     # journal is optional and may contains some coma | ||||
|     if len(journal_year) > 1: | ||||
|         journal = ', '.join(journal_year[0:-1]) | ||||
|         if journal == '…': | ||||
|             journal = None | ||||
|     else: | ||||
|         journal = None | ||||
|     # year | ||||
|     year = journal_year[-1] | ||||
|     try: | ||||
|         publishedDate = datetime.strptime(year.strip(), '%Y') | ||||
|     except ValueError: | ||||
|         publishedDate = None | ||||
|     return authors, journal, publisher, publishedDate | ||||
| 
 | ||||
| 
 | ||||
| def response(resp):  # pylint: disable=too-many-locals | ||||
|     """Get response from google's search request""" | ||||
|     results = [] | ||||
| 
 | ||||
| @ -112,30 +150,53 @@ def response(resp): | ||||
|     dom = html.fromstring(resp.text) | ||||
| 
 | ||||
|     # parse results | ||||
|     for result in eval_xpath_list(dom, '//div[@class="gs_ri"]'): | ||||
|     for result in eval_xpath_list(dom, '//div[@data-cid]'): | ||||
| 
 | ||||
|         title = extract_text(eval_xpath(result, './h3[1]//a')) | ||||
|         title = extract_text(eval_xpath(result, './/h3[1]//a')) | ||||
| 
 | ||||
|         if not title: | ||||
|             # this is a [ZITATION] block | ||||
|             continue | ||||
| 
 | ||||
|         url = eval_xpath(result, './h3[1]//a/@href')[0] | ||||
|         content = extract_text(eval_xpath(result, './div[@class="gs_rs"]')) or '' | ||||
| 
 | ||||
|         pub_info = extract_text(eval_xpath(result, './div[@class="gs_a"]')) | ||||
|         if pub_info: | ||||
|             content += "[%s]" % pub_info | ||||
| 
 | ||||
|         pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]')) | ||||
|         if pub_type: | ||||
|             title = title + " " + pub_type | ||||
|             pub_type = pub_type[1:-1].lower() | ||||
| 
 | ||||
|         url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0) | ||||
|         content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]')) | ||||
|         authors, journal, publisher, publishedDate = parse_gs_a( | ||||
|             extract_text(eval_xpath(result, './/div[@class="gs_a"]')) | ||||
|         ) | ||||
|         if publisher in url: | ||||
|             publisher = None | ||||
| 
 | ||||
|         # cited by | ||||
|         comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]')) | ||||
| 
 | ||||
|         # link to the html or pdf document | ||||
|         html_url = None | ||||
|         pdf_url = None | ||||
|         doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None) | ||||
|         doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]')) | ||||
|         if doc_type == "[PDF]": | ||||
|             pdf_url = doc_url | ||||
|         else: | ||||
|             html_url = doc_url | ||||
| 
 | ||||
|         results.append( | ||||
|             { | ||||
|                 'template': 'paper.html', | ||||
|                 'type': pub_type, | ||||
|                 'url': url, | ||||
|                 'title': title, | ||||
|                 'authors': authors, | ||||
|                 'publisher': publisher, | ||||
|                 'journal': journal, | ||||
|                 'publishedDate': publishedDate, | ||||
|                 'content': content, | ||||
|                 'comments': comments, | ||||
|                 'html_url': html_url, | ||||
|                 'pdf_url': pdf_url, | ||||
|             } | ||||
|         ) | ||||
| 
 | ||||
|  | ||||
| @ -3,11 +3,15 @@ | ||||
|  PubMed (Scholar publications) | ||||
| """ | ||||
| 
 | ||||
| from flask_babel import gettext | ||||
| from lxml import etree | ||||
| from datetime import datetime | ||||
| from urllib.parse import urlencode | ||||
| from searx.network import get | ||||
| from searx.utils import ( | ||||
|     eval_xpath_getindex, | ||||
|     eval_xpath_list, | ||||
|     extract_text, | ||||
| ) | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
| @ -22,7 +26,7 @@ about = { | ||||
|     "results": 'XML', | ||||
| } | ||||
| 
 | ||||
| categories = ['science'] | ||||
| categories = ['science', 'scientific publications'] | ||||
| 
 | ||||
| base_url = ( | ||||
|     'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}' | ||||
| @ -63,46 +67,61 @@ def response(resp): | ||||
| 
 | ||||
|     retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args) | ||||
| 
 | ||||
|     search_results_xml = get(retrieve_url_encoded).content | ||||
|     search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation') | ||||
|     search_results_response = get(retrieve_url_encoded).content | ||||
|     search_results = etree.XML(search_results_response) | ||||
|     for entry in eval_xpath_list(search_results, '//PubmedArticle'): | ||||
|         medline = eval_xpath_getindex(entry, './MedlineCitation', 0) | ||||
| 
 | ||||
|     for entry in search_results: | ||||
|         title = entry.xpath('.//Article/ArticleTitle')[0].text | ||||
| 
 | ||||
|         pmid = entry.xpath('.//PMID')[0].text | ||||
|         title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text | ||||
|         pmid = eval_xpath_getindex(medline, './/PMID', 0).text | ||||
|         url = pubmed_url + pmid | ||||
|         content = extract_text( | ||||
|             eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True | ||||
|         ) | ||||
|         doi = extract_text( | ||||
|             eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True | ||||
|         ) | ||||
|         journal = extract_text( | ||||
|             eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True | ||||
|         ) | ||||
|         issn = extract_text( | ||||
|             eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True | ||||
|         ) | ||||
|         authors = [] | ||||
|         for author in eval_xpath_list(medline, './Article/AuthorList/Author'): | ||||
|             f = eval_xpath_getindex(author, './ForeName', 0, default=None) | ||||
|             l = eval_xpath_getindex(author, './LastName', 0, default=None) | ||||
|             f = '' if f is None else f.text | ||||
|             l = '' if l is None else l.text | ||||
|             authors.append((f + ' ' + l).strip()) | ||||
| 
 | ||||
|         try: | ||||
|             content = entry.xpath('.//Abstract/AbstractText')[0].text | ||||
|         except: | ||||
|             content = gettext('No abstract is available for this publication.') | ||||
|         res_dict = { | ||||
|             'template': 'paper.html', | ||||
|             'url': url, | ||||
|             'title': title, | ||||
|             'content': content, | ||||
|             'journal': journal, | ||||
|             'issn': [issn], | ||||
|             'authors': authors, | ||||
|             'doi': doi, | ||||
|         } | ||||
| 
 | ||||
|         #  If a doi is available, add it to the snipppet | ||||
|         try: | ||||
|             doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text | ||||
|             content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content) | ||||
|         except: | ||||
|             pass | ||||
| 
 | ||||
|         if len(content) > 300: | ||||
|             content = content[0:300] + "..." | ||||
|         # TODO: center snippet on query term | ||||
| 
 | ||||
|         res_dict = {'url': url, 'title': title, 'content': content} | ||||
| 
 | ||||
|         try: | ||||
|             publishedDate = datetime.strptime( | ||||
|                 entry.xpath('.//DateCreated/Year')[0].text | ||||
|                 + '-' | ||||
|                 + entry.xpath('.//DateCreated/Month')[0].text | ||||
|                 + '-' | ||||
|                 + entry.xpath('.//DateCreated/Day')[0].text, | ||||
|                 '%Y-%m-%d', | ||||
|             ) | ||||
|             res_dict['publishedDate'] = publishedDate | ||||
|         except: | ||||
|             pass | ||||
|         accepted_date = eval_xpath_getindex( | ||||
|             entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None | ||||
|         ) | ||||
|         if accepted_date is not None: | ||||
|             year = eval_xpath_getindex(accepted_date, './Year', 0) | ||||
|             month = eval_xpath_getindex(accepted_date, './Month', 0) | ||||
|             day = eval_xpath_getindex(accepted_date, './Day', 0) | ||||
|             try: | ||||
|                 publishedDate = datetime.strptime( | ||||
|                     year.text + '-' + month.text + '-' + day.text, | ||||
|                     '%Y-%m-%d', | ||||
|                 ) | ||||
|                 res_dict['publishedDate'] = publishedDate | ||||
|             except Exception as e: | ||||
|                 print(e) | ||||
| 
 | ||||
|         results.append(res_dict) | ||||
| 
 | ||||
|         return results | ||||
|     return results | ||||
|  | ||||
| @ -6,6 +6,8 @@ | ||||
| from json import dumps, loads | ||||
| from datetime import datetime | ||||
| 
 | ||||
| from flask_babel import gettext | ||||
| 
 | ||||
| about = { | ||||
|     "website": 'https://www.semanticscholar.org/', | ||||
|     "wikidata_id": 'Q22908627', | ||||
| @ -15,6 +17,7 @@ about = { | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| categories = ['science', 'scientific publications'] | ||||
| paging = True | ||||
| search_url = 'https://www.semanticscholar.org/api/1/search' | ||||
| paper_url = 'https://www.semanticscholar.org/paper' | ||||
| @ -45,11 +48,7 @@ def request(query, params): | ||||
| def response(resp): | ||||
|     res = loads(resp.text) | ||||
|     results = [] | ||||
| 
 | ||||
|     for result in res['results']: | ||||
|         item = {} | ||||
|         metadata = [] | ||||
| 
 | ||||
|         url = result.get('primaryPaperLink', {}).get('url') | ||||
|         if not url and result.get('links'): | ||||
|             url = result.get('links')[0] | ||||
| @ -60,22 +59,47 @@ def response(resp): | ||||
|         if not url: | ||||
|             url = paper_url + '/%s' % result['id'] | ||||
| 
 | ||||
|         item['url'] = url | ||||
|         # publishedDate | ||||
|         if 'pubDate' in result: | ||||
|             publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d") | ||||
|         else: | ||||
|             publishedDate = None | ||||
| 
 | ||||
|         item['title'] = result['title']['text'] | ||||
|         item['content'] = result['paperAbstract']['text'] | ||||
|         # authors | ||||
|         authors = [author[0]['name'] for author in result.get('authors', [])] | ||||
| 
 | ||||
|         metadata = result.get('fieldsOfStudy') or [] | ||||
|         venue = result.get('venue', {}).get('text') | ||||
|         if venue: | ||||
|             metadata.append(venue) | ||||
|         if metadata: | ||||
|             item['metadata'] = ', '.join(metadata) | ||||
|         # pick for the first alternate link, but not from the crawler | ||||
|         pdf_url = None | ||||
|         for doc in result.get('alternatePaperLinks', []): | ||||
|             if doc['linkType'] not in ('crawler', 'doi'): | ||||
|                 pdf_url = doc['url'] | ||||
|                 break | ||||
| 
 | ||||
|         pubDate = result.get('pubDate') | ||||
|         if pubDate: | ||||
|             item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d") | ||||
|         # comments | ||||
|         comments = None | ||||
|         if 'citationStats' in result: | ||||
|             comments = gettext( | ||||
|                 '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}' | ||||
|             ).format( | ||||
|                 numCitations=result['citationStats']['numCitations'], | ||||
|                 firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'], | ||||
|                 lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'], | ||||
|             ) | ||||
| 
 | ||||
|         results.append(item) | ||||
|         results.append( | ||||
|             { | ||||
|                 'template': 'paper.html', | ||||
|                 'url': url, | ||||
|                 'title': result['title']['text'], | ||||
|                 'content': result['paperAbstract']['text'], | ||||
|                 'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'), | ||||
|                 'doi': result.get('doiInfo', {}).get('doi'), | ||||
|                 'tags': result.get('fieldsOfStudy'), | ||||
|                 'authors': authors, | ||||
|                 'pdf_url': pdf_url, | ||||
|                 'publishedDate': publishedDate, | ||||
|                 'comments': comments, | ||||
|             } | ||||
|         ) | ||||
| 
 | ||||
|     return results | ||||
|  | ||||
| @ -19,7 +19,7 @@ about = { | ||||
|     "results": 'JSON', | ||||
| } | ||||
| 
 | ||||
| categories = ['science'] | ||||
| categories = ['science', 'scientific publications'] | ||||
| paging = True | ||||
| nb_per_page = 10 | ||||
| api_key = 'unset' | ||||
| @ -41,32 +41,29 @@ def response(resp): | ||||
|     json_data = loads(resp.text) | ||||
| 
 | ||||
|     for record in json_data['records']: | ||||
|         content = record['abstract'][0:500] | ||||
|         if len(record['abstract']) > len(content): | ||||
|             content += "..." | ||||
|         content = record['abstract'] | ||||
|         published = datetime.strptime(record['publicationDate'], '%Y-%m-%d') | ||||
| 
 | ||||
|         metadata = [ | ||||
|             record[x] | ||||
|             for x in [ | ||||
|                 'publicationName', | ||||
|                 'identifier', | ||||
|                 'contentType', | ||||
|             ] | ||||
|             if record.get(x) is not None | ||||
|         ] | ||||
| 
 | ||||
|         metadata = ' / '.join(metadata) | ||||
|         if record.get('startingPage') and record.get('endingPage') is not None: | ||||
|             metadata += " (%(startingPage)s-%(endingPage)s)" % record | ||||
| 
 | ||||
|         authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']] | ||||
|         tags = record.get('genre') | ||||
|         if isinstance(tags, str): | ||||
|             tags = [tags] | ||||
|         results.append( | ||||
|             { | ||||
|                 'template': 'paper.html', | ||||
|                 'title': record['title'], | ||||
|                 'url': record['url'][0]['value'].replace('http://', 'https://', 1), | ||||
|                 'type': record.get('contentType'), | ||||
|                 'content': content, | ||||
|                 'publishedDate': published, | ||||
|                 'metadata': metadata, | ||||
|                 'authors': authors, | ||||
|                 'doi': record.get('doi'), | ||||
|                 'journal': record.get('publicationName'), | ||||
|                 'pages': record.get('start_page') + '-' + record.get('end_page'), | ||||
|                 'tags': tags, | ||||
|                 'issn': [record.get('issn')], | ||||
|                 'isbn': [record.get('isbn')], | ||||
|                 'volume': record.get('volume') or None, | ||||
|                 'number': record.get('number') or None, | ||||
|             } | ||||
|         ) | ||||
|     return results | ||||
|  | ||||
| @ -42,4 +42,6 @@ def on_result(request, search, result): | ||||
|                 doi = doi[: -len(suffix)] | ||||
|         result['url'] = get_doi_resolver(request.preferences) + doi | ||||
|         result['parsed_url'] = urlparse(result['url']) | ||||
|         if 'doi' not in result: | ||||
|             result['doi'] = doi | ||||
|     return True | ||||
|  | ||||
| @ -43,6 +43,7 @@ CATEGORY_GROUPS = { | ||||
|     'REPOS': 'repos', | ||||
|     'SOFTWARE_WIKIS': 'software wikis', | ||||
|     'WEB': 'web', | ||||
|     'SCIENTIFIC PUBLICATIONS': 'scientific publications', | ||||
| } | ||||
| 
 | ||||
| STYLE_NAMES = { | ||||
|  | ||||
| @ -319,7 +319,6 @@ engines: | ||||
|   - name: arxiv | ||||
|     engine: arxiv | ||||
|     shortcut: arx | ||||
|     categories: science | ||||
|     timeout: 4.0 | ||||
| 
 | ||||
|   # tmp suspended:  dh key too small | ||||
| @ -411,23 +410,10 @@ engines: | ||||
|   #   api_key: 'unset' | ||||
| 
 | ||||
|   - name: crossref | ||||
|     engine: json_engine | ||||
|     paging: true | ||||
|     search_url: https://search.crossref.org/dois?q={query}&page={pageno} | ||||
|     url_query: doi | ||||
|     title_query: title | ||||
|     title_html_to_text: true | ||||
|     content_query: fullCitation | ||||
|     content_html_to_text: true | ||||
|     categories: science | ||||
|     engine: crossref | ||||
|     shortcut: cr | ||||
|     about: | ||||
|       website: https://www.crossref.org/ | ||||
|       wikidata_id: Q5188229 | ||||
|       official_api_documentation: https://github.com/CrossRef/rest-api-doc | ||||
|       use_official_api: false | ||||
|       require_api_key: false | ||||
|       results: JSON | ||||
|     timeout: 30 | ||||
|     disable: true | ||||
| 
 | ||||
|   - name: yep | ||||
|     engine: json_engine | ||||
| @ -1068,7 +1054,7 @@ engines: | ||||
|     title_query: metadata/oaf:entity/oaf:result/title/$ | ||||
|     content_query: metadata/oaf:entity/oaf:result/description/$ | ||||
|     content_html_to_text: true | ||||
|     categories: science | ||||
|     categories: "science" | ||||
|     shortcut: oad | ||||
|     timeout: 5.0 | ||||
|     about: | ||||
| @ -1198,7 +1184,6 @@ engines: | ||||
|   - name: pubmed | ||||
|     engine: pubmed | ||||
|     shortcut: pub | ||||
|     categories: science | ||||
|     timeout: 3.0 | ||||
| 
 | ||||
|   - name: pypi | ||||
| @ -1346,7 +1331,6 @@ engines: | ||||
|     engine: semantic_scholar | ||||
|     disabled: true | ||||
|     shortcut: se | ||||
|     categories: science | ||||
| 
 | ||||
|   # Spotify needs API credentials | ||||
|   # - name: spotify | ||||
| @ -1372,8 +1356,7 @@ engines: | ||||
|   #   # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601" | ||||
|   #   api_key: 'unset' | ||||
|   #   shortcut: springer | ||||
|   #   categories: science | ||||
|   #   timeout: 6.0 | ||||
|   #   timeout: 15.0 | ||||
| 
 | ||||
|   - name: startpage | ||||
|     engine: startpage | ||||
|  | ||||
							
								
								
									
										
											BIN
										
									
								
								searx/static/themes/simple/css/searxng-rtl.min.css
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								searx/static/themes/simple/css/searxng-rtl.min.css
									
									
									
									
										vendored
									
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								searx/static/themes/simple/css/searxng.min.css
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										
											BIN
										
									
								
								searx/static/themes/simple/css/searxng.min.css
									
									
									
									
										vendored
									
									
								
							
										
											Binary file not shown.
										
									
								
							
										
											Binary file not shown.
										
									
								
							| @ -302,6 +302,49 @@ article[data-vim-selected].category-social { | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| .result-paper { | ||||
|   .attributes { | ||||
|     display: table; | ||||
|     border-spacing: 0.125rem; | ||||
| 
 | ||||
|     div { | ||||
|       display: table-row; | ||||
| 
 | ||||
|       span { | ||||
|         font-size: 0.9rem; | ||||
|         margin-top: 0.25rem; | ||||
|         display: table-cell; | ||||
| 
 | ||||
|         time { | ||||
|           font-size: 0.9rem; | ||||
|         } | ||||
|       } | ||||
| 
 | ||||
|       span:first-child { | ||||
|         color: var(--color-base-font); | ||||
|         min-width: 10rem; | ||||
|       } | ||||
| 
 | ||||
|       span:nth-child(2) { | ||||
|         color: var(--color-result-publishdate-font); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   .content { | ||||
|     margin-top: 0.25rem; | ||||
|   } | ||||
| 
 | ||||
|   .comments { | ||||
|     font-size: 0.9rem; | ||||
|     margin: 0.25rem 0 0 0; | ||||
|     padding: 0; | ||||
|     word-wrap: break-word; | ||||
|     line-height: 1.24; | ||||
|     font-style: italic; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| .template_group_images { | ||||
|   display: flex; | ||||
|   flex-wrap: wrap; | ||||
| @ -955,6 +998,28 @@ article[data-vim-selected].category-social { | ||||
|     border: none !important; | ||||
|     background-color: var(--color-sidebar-background); | ||||
|   } | ||||
| 
 | ||||
|   .result-paper { | ||||
|     .attributes { | ||||
|       display: block; | ||||
| 
 | ||||
|       div { | ||||
|         display: block; | ||||
| 
 | ||||
|         span { | ||||
|           display: inline; | ||||
|         } | ||||
| 
 | ||||
|         span:first-child { | ||||
|           font-weight: bold; | ||||
|         } | ||||
| 
 | ||||
|         span:nth-child(2) { | ||||
|           .ltr-margin-left(0.5rem); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| /* | ||||
|  | ||||
							
								
								
									
										44
									
								
								searx/templates/simple/result_templates/paper.html
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								searx/templates/simple/result_templates/paper.html
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,44 @@ | ||||
| {% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer with context %} | ||||
| 
 | ||||
| {{ result_header(result, favicons, image_proxify) -}} | ||||
| <div class="attributes"> | ||||
|   {%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%} | ||||
|   {%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%} | ||||
|   {%- if result.journal -%} | ||||
|     <div class="result_journal"> | ||||
|       <span>{{- _("Journal") }}:</span><span>{{ result.journal -}} | ||||
|       {%- if result.volume -%} | ||||
|          {{- result.volume -}} | ||||
|         {%- if result.number -%} | ||||
|           .{{- result.number -}} | ||||
|         {%- endif -%} | ||||
|       {%- endif -%} | ||||
|       {%- if result.pages -%} | ||||
|          {{- result.pages -}} | ||||
|       {%- endif -%} | ||||
|       </span> | ||||
|     </div> | ||||
|   {%- endif %} | ||||
|   {%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%} | ||||
|   {%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%} | ||||
|   {%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%} | ||||
|   {%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%} | ||||
|   {%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{- result.doi -}}</span></div>{% endif -%} | ||||
|   {%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%} | ||||
|   {%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%} | ||||
| </div> | ||||
| {%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%} | ||||
| {%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%} | ||||
| <p class="altlink"> | ||||
|   {%- if result.pdf_url -%} | ||||
|     <a href="{{ result.pdf_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('PDF') }}</a> | ||||
|   {%- endif -%} | ||||
|   {%- if result.html_url -%} | ||||
|       <a href="{{ result.html_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('HTML') }}</a> | ||||
|   {%- endif -%} | ||||
|   {%- if result.doi %} | ||||
|     <a href="https://www.altmetric.com/details/doi/{{result.doi}}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>Altmetric</a> | ||||
|   {% endif -%} | ||||
| </p> | ||||
| {{- result_sub_footer(result, proxify) -}} | ||||
| {{- result_footer(result) }} | ||||
| @ -12,7 +12,6 @@ import os | ||||
| import sys | ||||
| import base64 | ||||
| 
 | ||||
| from datetime import datetime, timedelta | ||||
| from timeit import default_timer | ||||
| from html import escape | ||||
| from io import StringIO | ||||
| @ -45,7 +44,6 @@ from flask.json import jsonify | ||||
| from flask_babel import ( | ||||
|     Babel, | ||||
|     gettext, | ||||
|     format_date, | ||||
|     format_decimal, | ||||
| ) | ||||
| 
 | ||||
| @ -79,6 +77,7 @@ from searx.webutils import ( | ||||
|     is_hmac_of, | ||||
|     is_flask_run_cmdline, | ||||
|     group_engines_in_tab, | ||||
|     searxng_l10n_timespan, | ||||
| ) | ||||
| from searx.webadapter import ( | ||||
|     get_search_query_from_webapp, | ||||
| @ -718,25 +717,13 @@ def search(): | ||||
|         if 'url' in result: | ||||
|             result['pretty_url'] = prettify_url(result['url']) | ||||
| 
 | ||||
|         # TODO, check if timezone is calculated right  # pylint: disable=fixme | ||||
|         if result.get('publishedDate'):  # do not try to get a date from an empty string or a None type | ||||
|             try:  # test if publishedDate >= 1900 (datetime module bug) | ||||
|                 result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') | ||||
|             except ValueError: | ||||
|                 result['publishedDate'] = None | ||||
|             else: | ||||
|                 if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1): | ||||
|                     timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None) | ||||
|                     minutes = int((timedifference.seconds / 60) % 60) | ||||
|                     hours = int(timedifference.seconds / 60 / 60) | ||||
|                     if hours == 0: | ||||
|                         result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes) | ||||
|                     else: | ||||
|                         result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format( | ||||
|                             hours=hours, minutes=minutes | ||||
|                         ) | ||||
|                 else: | ||||
|                     result['publishedDate'] = format_date(result['publishedDate']) | ||||
|                 result['publishedDate'] = searxng_l10n_timespan(result['publishedDate']) | ||||
| 
 | ||||
|         # set result['open_group'] = True when the template changes from the previous result | ||||
|         # set result['close_group'] = True when the template changes on the next result | ||||
|  | ||||
| @ -7,11 +7,14 @@ import hmac | ||||
| import re | ||||
| import inspect | ||||
| import itertools | ||||
| from datetime import datetime, timedelta | ||||
| from typing import Iterable, List, Tuple, Dict | ||||
| 
 | ||||
| from io import StringIO | ||||
| from codecs import getincrementalencoder | ||||
| 
 | ||||
| from flask_babel import gettext, format_date | ||||
| 
 | ||||
| from searx import logger, settings | ||||
| from searx.engines import Engine, OTHER_CATEGORY | ||||
| 
 | ||||
| @ -138,6 +141,28 @@ def highlight_content(content, query): | ||||
|     return content | ||||
| 
 | ||||
| 
 | ||||
| def searxng_l10n_timespan(dt: datetime) -> str:  # pylint: disable=invalid-name | ||||
|     """Returns a human-readable and translated string indicating how long ago | ||||
|     a date was in the past / the time span of the date to the present. | ||||
| 
 | ||||
|     On January 1st, midnight, the returned string only indicates how many years | ||||
|     ago the date was. | ||||
|     """ | ||||
|     # TODO, check if timezone is calculated right  # pylint: disable=fixme | ||||
|     d = dt.date() | ||||
|     t = dt.time() | ||||
|     if d.month == 1 and d.day == 1 and t.hour == 0 and t.minute == 0 and t.second == 0: | ||||
|         return str(d.year) | ||||
|     if dt.replace(tzinfo=None) >= datetime.now() - timedelta(days=1): | ||||
|         timedifference = datetime.now() - dt.replace(tzinfo=None) | ||||
|         minutes = int((timedifference.seconds / 60) % 60) | ||||
|         hours = int(timedifference.seconds / 60 / 60) | ||||
|         if hours == 0: | ||||
|             return gettext('{minutes} minute(s) ago').format(minutes=minutes) | ||||
|         return gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) | ||||
|     return format_date(dt) | ||||
| 
 | ||||
| 
 | ||||
| def is_flask_run_cmdline(): | ||||
|     """Check if the application was started using "flask run" command line | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user