From a96f503d7b4866e6eb352afd759433b3aad0a3f5 Mon Sep 17 00:00:00 2001
From: Alexandre FLAMENT <alexandre.flament@hesge.ch>
Date: Fri, 26 Aug 2022 16:04:50 +0000
Subject: [PATCH 1/7] Add searx.webutils.searxng_format_date

* Move the datetime to str code from searx.webapp.search to searx.webutils.searxng_format_date
* When the month, day, hour, day and second are zero, the function returns only the year.
---
 searx/webapp.py   | 17 ++---------------
 searx/webutils.py | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/searx/webapp.py b/searx/webapp.py
index bd76cc534..e6bda42be 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -12,7 +12,6 @@ import os
 import sys
 import base64
 
-from datetime import datetime, timedelta
 from timeit import default_timer
 from html import escape
 from io import StringIO
@@ -45,7 +44,6 @@ from flask.json import jsonify
 from flask_babel import (
     Babel,
     gettext,
-    format_date,
     format_decimal,
 )
 
@@ -79,6 +77,7 @@ from searx.webutils import (
     is_hmac_of,
     is_flask_run_cmdline,
     group_engines_in_tab,
+    searxng_format_date,
 )
 from searx.webadapter import (
     get_search_query_from_webapp,
@@ -718,25 +717,13 @@ def search():
         if 'url' in result:
             result['pretty_url'] = prettify_url(result['url'])
 
-        # TODO, check if timezone is calculated right  # pylint: disable=fixme
         if result.get('publishedDate'):  # do not try to get a date from an empty string or a None type
             try:  # test if publishedDate >= 1900 (datetime module bug)
                 result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z')
             except ValueError:
                 result['publishedDate'] = None
             else:
-                if result['publishedDate'].replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
-                    timedifference = datetime.now() - result['publishedDate'].replace(tzinfo=None)
-                    minutes = int((timedifference.seconds / 60) % 60)
-                    hours = int(timedifference.seconds / 60 / 60)
-                    if hours == 0:
-                        result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes)
-                    else:
-                        result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(
-                            hours=hours, minutes=minutes
-                        )
-                else:
-                    result['publishedDate'] = format_date(result['publishedDate'])
+                result['publishedDate'] = searxng_format_date(result['publishedDate'])
 
         # set result['open_group'] = True when the template changes from the previous result
         # set result['close_group'] = True when the template changes on the next result
diff --git a/searx/webutils.py b/searx/webutils.py
index b18fd5c6a..f084fe9d3 100644
--- a/searx/webutils.py
+++ b/searx/webutils.py
@@ -7,11 +7,14 @@ import hmac
 import re
 import inspect
 import itertools
+from datetime import datetime, timedelta
 from typing import Iterable, List, Tuple, Dict
 
 from io import StringIO
 from codecs import getincrementalencoder
 
+from flask_babel import gettext, format_date
+
 from searx import logger, settings
 from searx.engines import Engine, OTHER_CATEGORY
 
@@ -138,6 +141,22 @@ def highlight_content(content, query):
     return content
 
 
+def searxng_format_date(dt: datetime):  # pylint: disable=invalid-name
+    # TODO, check if timezone is calculated right  # pylint: disable=fixme
+    d = dt.date()
+    t = dt.time()
+    if d.month == 1 and d.day == 1 and t.hour == 0 and t.minute == 0 and t.second == 0:
+        return str(d.year)
+    if dt.replace(tzinfo=None) >= datetime.now() - timedelta(days=1):
+        timedifference = datetime.now() - dt.replace(tzinfo=None)
+        minutes = int((timedifference.seconds / 60) % 60)
+        hours = int(timedifference.seconds / 60 / 60)
+        if hours == 0:
+            return gettext('{minutes} minute(s) ago').format(minutes=minutes)
+        return gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes)
+    return format_date(dt)
+
+
 def is_flask_run_cmdline():
     """Check if the application was started using "flask run" command line
 

From 5ba831d6a88bca617d984593f6710d0c18bae120 Mon Sep 17 00:00:00 2001
From: Alexandre FLAMENT <alexandre.flament@hesge.ch>
Date: Fri, 26 Aug 2022 16:07:18 +0000
Subject: [PATCH 2/7] Add paper.html result template

---
 .../static/themes/simple/src/less/style.less  | 65 +++++++++++++++++++
 .../simple/result_templates/paper.html        | 44 +++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 searx/templates/simple/result_templates/paper.html

diff --git a/searx/static/themes/simple/src/less/style.less b/searx/static/themes/simple/src/less/style.less
index 11d2ef58d..dd8e8a596 100644
--- a/searx/static/themes/simple/src/less/style.less
+++ b/searx/static/themes/simple/src/less/style.less
@@ -302,6 +302,49 @@ article[data-vim-selected].category-social {
   }
 }
 
+.result-paper {
+  .attributes {
+    display: table;
+    border-spacing: 0.125rem;
+
+    div {
+      display: table-row;
+
+      span {
+        font-size: 0.9rem;
+        margin-top: 0.25rem;
+        display: table-cell;
+
+        time {
+          font-size: 0.9rem;
+        }
+      }
+
+      span:first-child {
+        color: var(--color-base-font);
+        min-width: 10rem;
+      }
+
+      span:nth-child(2) {
+        color: var(--color-result-publishdate-font);
+      }
+    }
+  }
+
+  .content {
+    margin-top: 0.25rem;
+  }
+
+  .comments {
+    font-size: 0.9rem;
+    margin: 0.25rem 0 0 0;
+    padding: 0;
+    word-wrap: break-word;
+    line-height: 1.24;
+    font-style: italic;
+  }
+}
+
 .template_group_images {
   display: flex;
   flex-wrap: wrap;
@@ -955,6 +998,28 @@ article[data-vim-selected].category-social {
     border: none !important;
     background-color: var(--color-sidebar-background);
   }
+
+  .result-paper {
+    .attributes {
+      display: block;
+
+      div {
+        display: block;
+
+        span {
+          display: inline;
+        }
+
+        span:first-child {
+          font-weight: bold;
+        }
+
+        span:nth-child(2) {
+          .ltr-margin-left(0.5rem);
+        }
+      }
+    }
+  }
 }
 
 /*
diff --git a/searx/templates/simple/result_templates/paper.html b/searx/templates/simple/result_templates/paper.html
new file mode 100644
index 000000000..3ede1b250
--- /dev/null
+++ b/searx/templates/simple/result_templates/paper.html
@@ -0,0 +1,44 @@
+{% from 'simple/macros.html' import result_header, result_sub_header, result_sub_footer, result_footer with context %}
+
+{{ result_header(result, favicons, image_proxify) -}}
+<div class="attributes">
+  {%- if result.publishedDate %}<div class="result_publishedDate"><span>{{ _("Published date") }}:</span><span><time class="published_date" datetime="{{ result.pubdate }}" >{{ result.publishedDate }}</time></span></div>{% endif -%}
+  {%- if result.authors %}<div class="result_authors"><span>{{ _("Author") }}:</span><span>{{ result.authors | join(", ") }}</span></div>{% endif -%}
+  {%- if result.journal -%}
+    <div class="result_journal">
+      <span>{{- _("Journal") }}:</span><span>{{ result.journal -}}
+      {%- if result.volume -%}
+        &nbsp;{{- result.volume -}}
+        {%- if result.number -%}
+          .{{- result.number -}}
+        {%- endif -%}
+      {%- endif -%}
+      {%- if result.start_page -%}
+        &nbsp;{{- result.start_page -}} / {{- result.end_page -}}
+      {%- endif -%}
+      </span>
+    </div>
+  {%- endif %}
+  {%- if result.editor %}<div class="result_editor"><span>{{ _("Editor") }}:</span><span>{{ result.editor }}</span></div>{% endif -%}
+  {%- if result.publisher %}<div class="result_publisher"><span>{{ _("Publisher") }}:</span><span>{{ result.publisher }}</span></div>{% endif -%}
+  {%- if result.type %}<div class="result_type"><span>{{ _("Type") }}:</span><span>{{ result.type }}</span></div>{% endif -%}
+  {%- if result.tags %}<div class="result_tags"><span>{{ _("Tags") }}:</span><span>{{ result.tags | join(", ")}}</span></div>{%- endif -%}
+  {%- if result.doi %}<div class="result_doi"><span>{{ _("DOI") }}:</span><span>{{- result.doi -}}</span></div>{% endif -%}
+  {%- if result.issn %}<div class="result_issn"><span>{{ _("ISSN") }}:</span><span>{{ result.issn | join(", ") }}</span></div>{% endif -%}
+  {%- if result.isbn %}<div class="result_isbn"><span>{{ _("ISBN") }}:</span><span>{{ result.isbn | join(", ") }}</span></div>{% endif -%}
+</div>
+{%- if result.content -%}<p class="content">{{- result.content | safe -}}</p>{%- endif -%}
+{%- if result.comments -%}<p class="comments">{{- result.comments -}}</p>{%- endif -%}
+<p class="altlink">
+  {%- if result.pdf_url -%}
+    <a href="{{ result.pdf_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('PDF') }}</a>
+  {%- endif -%}
+  {%- if result.html_url -%}
+      <a href="{{ result.html_url }}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>{{ _('HTML') }}</a>
+  {%- endif -%}
+  {%- if result.doi %}
+    <a href="https://www.altmetric.com/details/doi/{{result.doi}}" {% if results_on_new_tab %}target="_blank" rel="noopener noreferrer"{% else %}rel="noreferrer"{% endif %}>Altmetric</a>
+  {% endif -%}
+</p>
+{{- result_sub_footer(result, proxify) -}}
+{{- result_footer(result) }}

From 593026ad9cd024fd7b3182d48f274aa41b374c74 Mon Sep 17 00:00:00 2001
From: Alexandre FLAMENT <alexandre.flament@hesge.ch>
Date: Fri, 26 Aug 2022 16:07:38 +0000
Subject: [PATCH 3/7] oa_doi_rewrite: add the doi to the result when it is
 found.

Currentty, when oa_doi_rewrite find a DOI in the result URL, it replace the URL.
In this commit, the plugin adds the key "doi" to the result,
so the paper.html can show it.
---
 searx/plugins/oa_doi_rewrite.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/searx/plugins/oa_doi_rewrite.py b/searx/plugins/oa_doi_rewrite.py
index 54d28bc9a..f0e07735d 100644
--- a/searx/plugins/oa_doi_rewrite.py
+++ b/searx/plugins/oa_doi_rewrite.py
@@ -42,4 +42,6 @@ def on_result(request, search, result):
                 doi = doi[: -len(suffix)]
         result['url'] = get_doi_resolver(request.preferences) + doi
         result['parsed_url'] = urlparse(result['url'])
+        if 'doi' not in result:
+            result['doi'] = doi
     return True

From e36f85b8365e5d6a9263dd78242a10a305a9000c Mon Sep 17 00:00:00 2001
From: Alexandre FLAMENT <alexandre.flament@hesge.ch>
Date: Fri, 26 Aug 2022 16:10:12 +0000
Subject: [PATCH 4/7] Science category: update the engines

* use the paper.html template
* fetch more data from the engines
* add crossref.py
---
 searx/engines/arxiv.py            | 74 ++++++++++++++++++------
 searx/engines/crossref.py         | 59 +++++++++++++++++++
 searx/engines/google_scholar.py   | 85 +++++++++++++++++++++++----
 searx/engines/pubmed.py           | 95 ++++++++++++++++++-------------
 searx/engines/semantic_scholar.py | 57 +++++++++++++------
 searx/engines/springer.py         | 38 ++++++-------
 searx/searxng.msg                 |  1 +
 searx/settings.yml                | 26 ++-------
 8 files changed, 309 insertions(+), 126 deletions(-)
 create mode 100644 searx/engines/crossref.py

diff --git a/searx/engines/arxiv.py b/searx/engines/arxiv.py
index a1a58172d..a4811ebd5 100644
--- a/searx/engines/arxiv.py
+++ b/searx/engines/arxiv.py
@@ -3,9 +3,10 @@
  ArXiV (Scientific preprints)
 """
 
-from lxml import html
+from lxml import etree
+from lxml.etree import XPath
 from datetime import datetime
-from searx.utils import eval_xpath_list, eval_xpath_getindex
+from searx.utils import eval_xpath, eval_xpath_list, eval_xpath_getindex
 
 # about
 about = {
@@ -17,7 +18,7 @@ about = {
     "results": 'XML-RSS',
 }
 
-categories = ['science']
+categories = ['science', 'scientific publications']
 paging = True
 
 base_url = (
@@ -27,6 +28,23 @@ base_url = (
 # engine dependent config
 number_of_results = 10
 
+# xpaths
+arxiv_namespaces = {
+    "atom": "http://www.w3.org/2005/Atom",
+    "arxiv": "http://arxiv.org/schemas/atom",
+}
+xpath_entry = XPath('//atom:entry', namespaces=arxiv_namespaces)
+xpath_title = XPath('.//atom:title', namespaces=arxiv_namespaces)
+xpath_id = XPath('.//atom:id', namespaces=arxiv_namespaces)
+xpath_summary = XPath('.//atom:summary', namespaces=arxiv_namespaces)
+xpath_author_name = XPath('.//atom:author/atom:name', namespaces=arxiv_namespaces)
+xpath_doi = XPath('.//arxiv:doi', namespaces=arxiv_namespaces)
+xpath_pdf = XPath('.//atom:link[@title="pdf"]', namespaces=arxiv_namespaces)
+xpath_published = XPath('.//atom:published', namespaces=arxiv_namespaces)
+xpath_journal = XPath('.//arxiv:journal_ref', namespaces=arxiv_namespaces)
+xpath_category = XPath('.//atom:category/@term', namespaces=arxiv_namespaces)
+xpath_comment = XPath('./arxiv:comment', namespaces=arxiv_namespaces)
+
 
 def request(query, params):
     # basic search
@@ -41,30 +59,50 @@ def request(query, params):
 
 def response(resp):
     results = []
+    dom = etree.fromstring(resp.content)
+    for entry in eval_xpath_list(dom, xpath_entry):
+        title = eval_xpath_getindex(entry, xpath_title, 0).text
 
-    dom = html.fromstring(resp.content)
+        url = eval_xpath_getindex(entry, xpath_id, 0).text
+        abstract = eval_xpath_getindex(entry, xpath_summary, 0).text
 
-    for entry in eval_xpath_list(dom, '//entry'):
-        title = eval_xpath_getindex(entry, './/title', 0).text
+        authors = [author.text for author in eval_xpath_list(entry, xpath_author_name)]
 
-        url = eval_xpath_getindex(entry, './/id', 0).text
+        #  doi
+        doi_element = eval_xpath_getindex(entry, xpath_doi, 0, default=None)
+        doi = None if doi_element is None else doi_element.text
 
-        content_string = '{doi_content}{abstract_content}'
+        # pdf
+        pdf_element = eval_xpath_getindex(entry, xpath_pdf, 0, default=None)
+        pdf_url = None if pdf_element is None else pdf_element.attrib.get('href')
 
-        abstract = eval_xpath_getindex(entry, './/summary', 0).text
+        # journal
+        journal_element = eval_xpath_getindex(entry, xpath_journal, 0, default=None)
+        journal = None if journal_element is None else journal_element.text
 
-        #  If a doi is available, add it to the snipppet
-        doi_element = eval_xpath_getindex(entry, './/link[@title="doi"]', 0, default=None)
-        doi_content = doi_element.text if doi_element is not None else ''
-        content = content_string.format(doi_content=doi_content, abstract_content=abstract)
+        # tags
+        tag_elements = eval_xpath(entry, xpath_category)
+        tags = [str(tag) for tag in tag_elements]
 
-        if len(content) > 300:
-            content = content[0:300] + "..."
-        # TODO: center snippet on query term
+        # comments
+        comments_elements = eval_xpath_getindex(entry, xpath_comment, 0, default=None)
+        comments = None if comments_elements is None else comments_elements.text
 
-        publishedDate = datetime.strptime(eval_xpath_getindex(entry, './/published', 0).text, '%Y-%m-%dT%H:%M:%SZ')
+        publishedDate = datetime.strptime(eval_xpath_getindex(entry, xpath_published, 0).text, '%Y-%m-%dT%H:%M:%SZ')
 
-        res_dict = {'url': url, 'title': title, 'publishedDate': publishedDate, 'content': content}
+        res_dict = {
+            'template': 'paper.html',
+            'url': url,
+            'title': title,
+            'publishedDate': publishedDate,
+            'content': abstract,
+            'doi': doi,
+            'authors': authors,
+            'journal': journal,
+            'tags': tags,
+            'comments': comments,
+            'pdf_url': pdf_url,
+        }
 
         results.append(res_dict)
 
diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py
new file mode 100644
index 000000000..d61318146
--- /dev/null
+++ b/searx/engines/crossref.py
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Semantic Scholar (Science)
+"""
+
+from urllib.parse import urlencode
+from searx.utils import html_to_text
+
+about = {
+    "website": 'https://www.crossref.org/',
+    "wikidata_id": 'Q5188229',
+    "official_api_documentation": 'https://github.com/CrossRef/rest-api-doc',
+    "use_official_api": False,
+    "require_api_key": False,
+    "results": 'JSON',
+}
+
+categories = ['science', 'scientific publications']
+paging = True
+search_url = 'https://api.crossref.org/works'
+
+
+def request(query, params):
+    params['url'] = search_url + '?' + urlencode(dict(query=query, offset=20 * (params['pageno'] - 1)))
+    return params
+
+
+def response(resp):
+    res = resp.json()
+    results = []
+    for record in res['message']['items']:
+        record_type = record['type']
+        if record_type == 'book-chapter':
+            title = record['container-title'][0]
+            if record['title'][0].lower().strip() != title.lower().strip():
+                title = title + ' (' + record['title'][0] + ')'
+            journal = None
+        else:
+            title = record['title'][0]
+            journal = record.get('container-title', [None])[0]
+        url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
+        authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
+        isbn = record.get('isbn') or [i['value'] for i in record.get('isbn-type', [])]
+        results.append(
+            {
+                'template': 'paper.html',
+                'url': url,
+                'title': title,
+                'journal': journal,
+                'volume': record.get('volume'),
+                'type': record['type'],
+                'content': html_to_text(record.get('abstract', '')),
+                'publisher': record.get('publisher'),
+                'authors': authors,
+                'doi': record['DOI'],
+                'isbn': isbn,
+            }
+        )
+    return results
diff --git a/searx/engines/google_scholar.py b/searx/engines/google_scholar.py
index 41c62886b..c07cd4cea 100644
--- a/searx/engines/google_scholar.py
+++ b/searx/engines/google_scholar.py
@@ -13,10 +13,12 @@ Definitions`_.
 
 from urllib.parse import urlencode
 from datetime import datetime
+from typing import Optional
 from lxml import html
 
 from searx.utils import (
     eval_xpath,
+    eval_xpath_getindex,
     eval_xpath_list,
     extract_text,
 )
@@ -46,7 +48,7 @@ about = {
 }
 
 # engine dependent config
-categories = ['science']
+categories = ['science', 'scientific publications']
 paging = True
 language_support = True
 use_locale_domain = True
@@ -99,7 +101,43 @@ def request(query, params):
     return params
 
 
-def response(resp):
+def parse_gs_a(text: Optional[str]):
+    """Parse the text written in green.
+
+    Possible formats:
+    * "{authors} - {journal}, {year} - {publisher}"
+    * "{authors} - {year} - {publisher}"
+    * "{authors} - {publisher}"
+    """
+    if text is None or text == "":
+        return None, None, None, None
+
+    s_text = text.split(' - ')
+    authors = s_text[0].split(', ')
+    publisher = s_text[-1]
+    if len(s_text) != 3:
+        return authors, None, publisher, None
+
+    # the format is "{authors} - {journal}, {year} - {publisher}" or "{authors} - {year} - {publisher}"
+    # get journal and year
+    journal_year = s_text[1].split(', ')
+    # journal is optional and may contains some coma
+    if len(journal_year) > 1:
+        journal = ', '.join(journal_year[0:-1])
+        if journal == '…':
+            journal = None
+    else:
+        journal = None
+    # year
+    year = journal_year[-1]
+    try:
+        publishedDate = datetime.strptime(year.strip(), '%Y')
+    except ValueError:
+        publishedDate = None
+    return authors, journal, publisher, publishedDate
+
+
+def response(resp):  # pylint: disable=too-many-locals
     """Get response from google's search request"""
     results = []
 
@@ -112,30 +150,53 @@ def response(resp):
     dom = html.fromstring(resp.text)
 
     # parse results
-    for result in eval_xpath_list(dom, '//div[@class="gs_ri"]'):
+    for result in eval_xpath_list(dom, '//div[@data-cid]'):
 
-        title = extract_text(eval_xpath(result, './h3[1]//a'))
+        title = extract_text(eval_xpath(result, './/h3[1]//a'))
 
         if not title:
             # this is a [ZITATION] block
             continue
 
-        url = eval_xpath(result, './h3[1]//a/@href')[0]
-        content = extract_text(eval_xpath(result, './div[@class="gs_rs"]')) or ''
-
-        pub_info = extract_text(eval_xpath(result, './div[@class="gs_a"]'))
-        if pub_info:
-            content += "[%s]" % pub_info
-
         pub_type = extract_text(eval_xpath(result, './/span[@class="gs_ct1"]'))
         if pub_type:
-            title = title + " " + pub_type
+            pub_type = pub_type[1:-1].lower()
+
+        url = eval_xpath_getindex(result, './/h3[1]//a/@href', 0)
+        content = extract_text(eval_xpath(result, './/div[@class="gs_rs"]'))
+        authors, journal, publisher, publishedDate = parse_gs_a(
+            extract_text(eval_xpath(result, './/div[@class="gs_a"]'))
+        )
+        if publisher in url:
+            publisher = None
+
+        # cited by
+        comments = extract_text(eval_xpath(result, './/div[@class="gs_fl"]/a[starts-with(@href,"/scholar?cites=")]'))
+
+        # link to the html or pdf document
+        html_url = None
+        pdf_url = None
+        doc_url = eval_xpath_getindex(result, './/div[@class="gs_or_ggsm"]/a/@href', 0, default=None)
+        doc_type = extract_text(eval_xpath(result, './/span[@class="gs_ctg2"]'))
+        if doc_type == "[PDF]":
+            pdf_url = doc_url
+        else:
+            html_url = doc_url
 
         results.append(
             {
+                'template': 'paper.html',
+                'type': pub_type,
                 'url': url,
                 'title': title,
+                'authors': authors,
+                'publisher': publisher,
+                'journal': journal,
+                'publishedDate': publishedDate,
                 'content': content,
+                'comments': comments,
+                'html_url': html_url,
+                'pdf_url': pdf_url,
             }
         )
 
diff --git a/searx/engines/pubmed.py b/searx/engines/pubmed.py
index 27444ae24..02e282d5f 100644
--- a/searx/engines/pubmed.py
+++ b/searx/engines/pubmed.py
@@ -3,11 +3,15 @@
  PubMed (Scholar publications)
 """
 
-from flask_babel import gettext
 from lxml import etree
 from datetime import datetime
 from urllib.parse import urlencode
 from searx.network import get
+from searx.utils import (
+    eval_xpath_getindex,
+    eval_xpath_list,
+    extract_text,
+)
 
 # about
 about = {
@@ -22,7 +26,7 @@ about = {
     "results": 'XML',
 }
 
-categories = ['science']
+categories = ['science', 'scientific publications']
 
 base_url = (
     'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi' + '?db=pubmed&{query}&retstart={offset}&retmax={hits}'
@@ -63,46 +67,61 @@ def response(resp):
 
     retrieve_url_encoded = pubmed_retrieve_api_url.format(**retrieve_notice_args)
 
-    search_results_xml = get(retrieve_url_encoded).content
-    search_results = etree.XML(search_results_xml).xpath('//PubmedArticleSet/PubmedArticle/MedlineCitation')
+    search_results_response = get(retrieve_url_encoded).content
+    search_results = etree.XML(search_results_response)
+    for entry in eval_xpath_list(search_results, '//PubmedArticle'):
+        medline = eval_xpath_getindex(entry, './MedlineCitation', 0)
 
-    for entry in search_results:
-        title = entry.xpath('.//Article/ArticleTitle')[0].text
-
-        pmid = entry.xpath('.//PMID')[0].text
+        title = eval_xpath_getindex(medline, './/Article/ArticleTitle', 0).text
+        pmid = eval_xpath_getindex(medline, './/PMID', 0).text
         url = pubmed_url + pmid
+        content = extract_text(
+            eval_xpath_getindex(medline, './/Abstract/AbstractText//text()', 0, default=None), allow_none=True
+        )
+        doi = extract_text(
+            eval_xpath_getindex(medline, './/ELocationID[@EIdType="doi"]/text()', 0, default=None), allow_none=True
+        )
+        journal = extract_text(
+            eval_xpath_getindex(medline, './Article/Journal/Title/text()', 0, default=None), allow_none=True
+        )
+        issn = extract_text(
+            eval_xpath_getindex(medline, './Article/Journal/ISSN/text()', 0, default=None), allow_none=True
+        )
+        authors = []
+        for author in eval_xpath_list(medline, './Article/AuthorList/Author'):
+            f = eval_xpath_getindex(author, './ForeName', 0, default=None)
+            l = eval_xpath_getindex(author, './LastName', 0, default=None)
+            f = '' if f is None else f.text
+            l = '' if l is None else l.text
+            authors.append((f + ' ' + l).strip())
 
-        try:
-            content = entry.xpath('.//Abstract/AbstractText')[0].text
-        except:
-            content = gettext('No abstract is available for this publication.')
+        res_dict = {
+            'template': 'paper.html',
+            'url': url,
+            'title': title,
+            'content': content,
+            'journal': journal,
+            'issn': [issn],
+            'authors': authors,
+            'doi': doi,
+        }
 
-        #  If a doi is available, add it to the snipppet
-        try:
-            doi = entry.xpath('.//ELocationID[@EIdType="doi"]')[0].text
-            content = 'DOI: {doi} Abstract: {content}'.format(doi=doi, content=content)
-        except:
-            pass
-
-        if len(content) > 300:
-            content = content[0:300] + "..."
-        # TODO: center snippet on query term
-
-        res_dict = {'url': url, 'title': title, 'content': content}
-
-        try:
-            publishedDate = datetime.strptime(
-                entry.xpath('.//DateCreated/Year')[0].text
-                + '-'
-                + entry.xpath('.//DateCreated/Month')[0].text
-                + '-'
-                + entry.xpath('.//DateCreated/Day')[0].text,
-                '%Y-%m-%d',
-            )
-            res_dict['publishedDate'] = publishedDate
-        except:
-            pass
+        accepted_date = eval_xpath_getindex(
+            entry, './PubmedData/History//PubMedPubDate[@PubStatus="accepted"]', 0, default=None
+        )
+        if accepted_date is not None:
+            year = eval_xpath_getindex(accepted_date, './Year', 0)
+            month = eval_xpath_getindex(accepted_date, './Month', 0)
+            day = eval_xpath_getindex(accepted_date, './Day', 0)
+            try:
+                publishedDate = datetime.strptime(
+                    year.text + '-' + month.text + '-' + day.text,
+                    '%Y-%m-%d',
+                )
+                res_dict['publishedDate'] = publishedDate
+            except Exception as e:
+                print(e)
 
         results.append(res_dict)
 
-        return results
+    return results
diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py
index bda731047..b2701c333 100644
--- a/searx/engines/semantic_scholar.py
+++ b/searx/engines/semantic_scholar.py
@@ -6,6 +6,8 @@
 from json import dumps, loads
 from datetime import datetime
 
+from flask_babel import gettext
+
 about = {
     "website": 'https://www.semanticscholar.org/',
     "wikidata_id": 'Q22908627',
@@ -15,6 +17,7 @@ about = {
     "results": 'JSON',
 }
 
+categories = ['science', 'scientific publications']
 paging = True
 search_url = 'https://www.semanticscholar.org/api/1/search'
 paper_url = 'https://www.semanticscholar.org/paper'
@@ -47,9 +50,6 @@ def response(resp):
     results = []
 
     for result in res['results']:
-        item = {}
-        metadata = []
-
         url = result.get('primaryPaperLink', {}).get('url')
         if not url and result.get('links'):
             url = result.get('links')[0]
@@ -60,22 +60,47 @@ def response(resp):
         if not url:
             url = paper_url + '/%s' % result['id']
 
-        item['url'] = url
+        # publishedDate
+        if 'pubDate' in result:
+            publishedDate = datetime.strptime(result['pubDate'], "%Y-%m-%d")
+        else:
+            publishedDate = None
 
-        item['title'] = result['title']['text']
-        item['content'] = result['paperAbstract']['text']
+        # authors
+        authors = [author[0]['name'] for author in result.get('authors', [])]
 
-        metadata = result.get('fieldsOfStudy') or []
-        venue = result.get('venue', {}).get('text')
-        if venue:
-            metadata.append(venue)
-        if metadata:
-            item['metadata'] = ', '.join(metadata)
+        # pick for the first alternate link, but not from the crawler
+        pdf_url = None
+        for doc in result.get('alternatePaperLinks', []):
+            if doc['linkType'] != 'crawler':
+                pdf_url = doc['url']
+                break
 
-        pubDate = result.get('pubDate')
-        if pubDate:
-            item['publishedDate'] = datetime.strptime(pubDate, "%Y-%m-%d")
+        # comments
+        comments = None
+        if 'citationStats' in result:
+            comments = gettext(
+                '{numCitations} citations from the year {firstCitationVelocityYear} to {lastCitationVelocityYear}'
+            ).format(
+                numCitations=result['citationStats']['numCitations'],
+                firstCitationVelocityYear=result['citationStats']['firstCitationVelocityYear'],
+                lastCitationVelocityYear=result['citationStats']['lastCitationVelocityYear'],
+            )
 
-        results.append(item)
+        results.append(
+            {
+                'template': 'paper.html',
+                'url': url,
+                'title': result['title']['text'],
+                'content': result['paperAbstract']['text'],
+                'journal': result.get('venue', {}).get('text') or result.get('journal', {}).get('name'),
+                'doi': result.get('doiInfo', {}).get('doi'),
+                'tags': result.get('fieldsOfStudy'),
+                'authors': authors,
+                'pdf_url': pdf_url,
+                'publishedDate': publishedDate,
+                'comments': comments,
+            }
+        )
 
     return results
diff --git a/searx/engines/springer.py b/searx/engines/springer.py
index 512d71e5e..2711fa807 100644
--- a/searx/engines/springer.py
+++ b/searx/engines/springer.py
@@ -19,7 +19,7 @@ about = {
     "results": 'JSON',
 }
 
-categories = ['science']
+categories = ['science', 'scientific publications']
 paging = True
 nb_per_page = 10
 api_key = 'unset'
@@ -41,32 +41,30 @@ def response(resp):
     json_data = loads(resp.text)
 
     for record in json_data['records']:
-        content = record['abstract'][0:500]
-        if len(record['abstract']) > len(content):
-            content += "..."
+        content = record['abstract']
         published = datetime.strptime(record['publicationDate'], '%Y-%m-%d')
-
-        metadata = [
-            record[x]
-            for x in [
-                'publicationName',
-                'identifier',
-                'contentType',
-            ]
-            if record.get(x) is not None
-        ]
-
-        metadata = ' / '.join(metadata)
-        if record.get('startingPage') and record.get('endingPage') is not None:
-            metadata += " (%(startingPage)s-%(endingPage)s)" % record
-
+        authors = [" ".join(author['creator'].split(', ')[::-1]) for author in record['creators']]
+        tags = record.get('genre')
+        if isinstance(tags, str):
+            tags = [tags]
         results.append(
             {
+                'template': 'paper.html',
                 'title': record['title'],
                 'url': record['url'][0]['value'].replace('http://', 'https://', 1),
+                'type': record.get('contentType'),
                 'content': content,
                 'publishedDate': published,
-                'metadata': metadata,
+                'authors': authors,
+                'doi': record.get('doi'),
+                'journal': record.get('publicationName'),
+                'start_page': record.get('start_page'),
+                'end_page': record.get('end_page'),
+                'tags': tags,
+                'issn': [record.get('issn')],
+                'isbn': [record.get('isbn')],
+                'volume': record.get('volume') or None,
+                'number': record.get('number') or None,
             }
         )
     return results
diff --git a/searx/searxng.msg b/searx/searxng.msg
index 3b876f96d..c37240f83 100644
--- a/searx/searxng.msg
+++ b/searx/searxng.msg
@@ -43,6 +43,7 @@ CATEGORY_GROUPS = {
     'REPOS': 'repos',
     'SOFTWARE_WIKIS': 'software wikis',
     'WEB': 'web',
+    'SCIENTIFIC PUBLICATIONS': 'scientific publications',
 }
 
 STYLE_NAMES = {
diff --git a/searx/settings.yml b/searx/settings.yml
index 3f07bb2dd..ba38e694a 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -319,7 +319,6 @@ engines:
   - name: arxiv
     engine: arxiv
     shortcut: arx
-    categories: science
     timeout: 4.0
 
   # tmp suspended:  dh key too small
@@ -411,23 +410,9 @@ engines:
   #   api_key: 'unset'
 
   - name: crossref
-    engine: json_engine
-    paging: true
-    search_url: https://search.crossref.org/dois?q={query}&page={pageno}
-    url_query: doi
-    title_query: title
-    title_html_to_text: true
-    content_query: fullCitation
-    content_html_to_text: true
-    categories: science
+    engine: crossref
     shortcut: cr
-    about:
-      website: https://www.crossref.org/
-      wikidata_id: Q5188229
-      official_api_documentation: https://github.com/CrossRef/rest-api-doc
-      use_official_api: false
-      require_api_key: false
-      results: JSON
+    timeout: 10
 
   - name: yep
     engine: json_engine
@@ -1068,7 +1053,7 @@ engines:
     title_query: metadata/oaf:entity/oaf:result/title/$
     content_query: metadata/oaf:entity/oaf:result/description/$
     content_html_to_text: true
-    categories: science
+    categories: "science"
     shortcut: oad
     timeout: 5.0
     about:
@@ -1198,7 +1183,6 @@ engines:
   - name: pubmed
     engine: pubmed
     shortcut: pub
-    categories: science
     timeout: 3.0
 
   - name: pypi
@@ -1346,7 +1330,6 @@ engines:
     engine: semantic_scholar
     disabled: true
     shortcut: se
-    categories: science
 
   # Spotify needs API credentials
   # - name: spotify
@@ -1372,8 +1355,7 @@ engines:
   #   # working API key, for test & debug: "a69685087d07eca9f13db62f65b8f601"
   #   api_key: 'unset'
   #   shortcut: springer
-  #   categories: science
-  #   timeout: 6.0
+  #   timeout: 15.0
 
   - name: startpage
     engine: startpage

From fe43b6e8211c972700bc2602e8f16018c72bb269 Mon Sep 17 00:00:00 2001
From: Alexandre FLAMENT <alexandre.flament@hesge.ch>
Date: Fri, 26 Aug 2022 17:25:45 +0000
Subject: [PATCH 5/7] [build] /static

---
 .../themes/simple/css/searxng-rtl.min.css     | Bin 68358 -> 69227 bytes
 .../themes/simple/css/searxng-rtl.min.css.map | Bin 110492 -> 111870 bytes
 .../static/themes/simple/css/searxng.min.css  | Bin 67261 -> 68129 bytes
 .../themes/simple/css/searxng.min.css.map     | Bin 108805 -> 110183 bytes
 4 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/searx/static/themes/simple/css/searxng-rtl.min.css b/searx/static/themes/simple/css/searxng-rtl.min.css
index 450f5d96d9d178aa8c54d05c0db09b23c9b0e51b..1462d0d5e32f8deb93d33388701732479bd60972 100644
GIT binary patch
delta 806
zcma))Jxc>I7{?JxT?-Y&LAXU|hs0~8h`GhDlDnk6U@jMu)I&?T&B@6FCqIPX;Og41
z;MB3=;vmlEdac?!T1(dF`91%a+}zjRA8L=YX{x9maswomR5=hEqnJN5RF4SLlH)66
zu+ORI$BIw|w8Q}u0|YzWo}$qhu3L8qyIe&86(`mCba^jda4cSo`mr!TvuldrnZ4mf
zSPhr}CYA`!uX-!eKtMUKf-79du!xr7W4O`5ToCFrr40x|#z}@>4$r;F%1Ib8A?mpQ
zrcf}12@682+qRJA-R*An)@r2fvhu*Xr^1XXuB(GlWCwNG(ju=#R*suLPhvvy-dr!S
zMS?_OiF`$I_W8P>eVpt)&8E$xbZr(tP5L|zF3L)NKg~+(Tg(=PFY8R1;7riX_(~;(
i|AewZXwa$aZnOTkjb+k`)pG*uWyh0)=SF?!tM&~<w=)d@

delta 24
gcmaDohox;A%Z3*ZHoHC+R-Jq(MQ(c?C*xZd0GiSZJ^%m!

diff --git a/searx/static/themes/simple/css/searxng-rtl.min.css.map b/searx/static/themes/simple/css/searxng-rtl.min.css.map
index 21923a32c14aa2ad74f6ac83f9f6a9b2e2cfa35b..8768d1ba7e1814257a217e67de68fdd18449eb97 100644
GIT binary patch
delta 1665
zcmb_b&u<$=7}XRhBx>xSqz-jO#R#O*;B{&zjqNx{WY)D)cV*lNiiwFLX6?1T&hFaT
zUB_udi3+D84xkZCL_r9UIB=+d93n0V{s6=Q4jfPrZ~_Tc91vV6%*=XYw}>0|!RznG
z``&xses6t0bo=L_k8YHAl$;ckQ6?)bBZQ;^${<POPs=Eaq*On1iT^h<yz<gvWr;r(
zd5`EW@tW>nO&&=R_CLpfQBmqJxQl-K(u!0+hvJ#Cm)po7MHLlUsRz51le&UHe_#<I
z^4p^m<kieD$dVsQp%FIQD<^N3Uk)@&NUCsBN@b%&;?MpSm@j4d02aaZXUM{8^!{c!
zC#`rA7(L9Nve+!|?W|r00ZTpam1}rIC80HA%D@-A2?gxnth6B*35)XTW<Qs}<ioX-
z56^4J183uEJuX>wjzz_w0#0drzpM=%2{xK?&ZC;IbeiP$b611$b@`&tKCw=&V;Zzg
zze=feO){<CIMr;)PxSa>vn3aL9qJj2#d~V>7P+O;aFUUg6fUrcTE0;VKHDPiRJMb0
zTfX1{6>-ESr>ma^+iXLg{AKKP$gOHPm~>gFXKB4dCbi!S)+TEdo)+-NCSxG>qwv>2
ztU^)eyFN!|!&aZ;9FO-nu5U5mgiOBEv*hjC_IPrONfoFHj%>+!X_>?y3hg~!{|rcW
z!sn$Rwlg6~!48kkoKp?{D>ka&nFN{A>*xP3isW1Ui~nNwd2<#>$s}Vf_!O|7l2^nr
zJuT(A=>^qD^|MZjJZL;Q7Ej4-**7$mBG-)7)p`cy4f)PHWS2B>eTDw@iAyV@rEKrA
zgz!8P=cLX?IJ$7Sh&yNGScd#$oINs|d?{@1hfVN)Xvfx^j^RezxUJc6d)0z)8oREo
zs~uN!ya0q%-Dw;6avHi=HM9j@1)<uqtC}5k+PI=y^)!r4$7kkj&17k>psH{B0Cz(~
z?N(RR(WgRSlDu#*Nk5_B*rE@>@%iV@!O@Cl7>DpY@9L)3p8+9x{@^(MV-bwguSUV}
z7~^K8Yr5^Y(Mm%%=;##y$=`pDE%06lP2Gxi^{U%Q!+6YN?j^4J5+D>1mfH{j)zdRm
zG;jr+B8%_cN3T5wj?q8>j0{g#T9&I>E<xXf=-1O=e1t_zlSLegwwb_J4u(&#dDyP&
z7I#YMgfaeQdT4wXgectxu}MC^#=xSEd)d&^P<OGRSN3nd2!4#v-3c&yVqjBu?TUW+
z1_&SZyyw<!(qG>NCy)C9PfOn|3d&%Dws*l%b3kuTao@CRzvGZxSJUeaH_cuFqssVu
gra|~`m_g%w9Qtt$JaP}KrmuYf9@u~VCb$>;13dlrxBvhE

delta 773
zcmb_aF>ljA6qXq>AYxR}3MzF(T{slSjXNUU1&6wpDN!3zhgQ|uO`SOBIEfRt0x=;K
zm601Vl#LxBd5D-0Li`3K7FM!WVgzw_wpGN;<md0b_r33X?|#n<Z@w4apSS4gle6M@
z2LcyD2uxSmp(}g8iv5NTEP>!*gRbu1y*X~yfXxvL8W3O}>G-&r`1@y4@x<Op%iCD7
zUmuWeyt@gSq8h8aIv65Y)j^vBhNE5PdUg%ed8*D+ZTO%Dw)wAUHJ!c-)0rk7vQS9W
z9iN3b<qlk0CX31Z;6jo|9;&4!hFkII96y&!9rP$S*6Y%-GnLDJUS+!LK3#UtSH}cB
z%&g&Kf~>5=8E{+-(s4xSH+Q;BLd2HDXK%r<3NxL!8_-vW&*V~wHdCT491iG>_6P0h
zh&6~s*0IWC;Y_$SAR`cwen@yKyTEWnrK6u4ClMnPofcLirmDfmn!G9B&Lr2()-$=(
z?Q=ZSJ(<vZ-WQp~%(WmhaTq5Hov#uoujSHAeo^7d#41PC`%cWi;@&3qDs<8J{-2`H
z{-=Kw(S^=R@~)>yfM`sq&|=|8LpD&@(|HUGZr*a@zp|=EuLj%KOBx!We0RY*CBa^y
oTZQ!q(cAsXmGat)R)5y&OK+E^w_Zw1vmd9@$2+r&M7k#Z0r7(Fh5!Hn

diff --git a/searx/static/themes/simple/css/searxng.min.css b/searx/static/themes/simple/css/searxng.min.css
index 09d26b534d5bf5b07d88ada94eb61c147bcb9184..90820978fda154ec09abd7db111fd7c3f4432231 100644
GIT binary patch
delta 796
zcma)4&q~8E97Y@pUK}zJyx5Dd9a6T5f+>QJ;47p}wg%HQB$-9a=o5H~;Mwyof@eV=
zLIm|NkDjJ)V7q_1mQBg^%lG^K-Cve&u1dFW{X`PgVHyY|h_q~^wWMxG6BT2s1ViUg
zBbO1!4J9TLD1ki6+t6+__azCs_Pp;Frf0JhK!#BfpG@v$AFPy~$E}cSpy&mG_F;O%
z36N}44s<A>J-q77NCO^WtVmqowKNS#p1cbdIv5IqEh?1;o==%+JXMT%SUYxtR8d5+
z_F<#`XAmPAuKghit~P7t<ayhCRLe;%uiQ)B6RzWe>mo1+QlQFP8s|06%BpA25eVa~
zx4ILhMS@5o0bNOuIU1~*!AseE?p4hC`{vzOe|a~Vo5{3MmxbPGUeV8ZX4?M;c2xYN
s)&t5J<zz8^z8V~x?&}sT4tY8(_`85fzKpaqw6iUnN4#^rv9i?r1~(osQ2+n{

delta 23
fcmZ2Dg=KFq%Z57-Hd{WHP@Q}zMQ(Z-C*x`WiMR@x

diff --git a/searx/static/themes/simple/css/searxng.min.css.map b/searx/static/themes/simple/css/searxng.min.css.map
index ad344d14426da5cb5d0d3fff98b480ca88fd366a..95d4dac281cc69c1f9b74602d58f5822e9639b6b 100644
GIT binary patch
delta 1558
zcma)5L2naB6xKwEleWar#z`8KqJ>lx*Xv*#VmoOLopsj1SjI#J+_+ZMS$lET*<IUf
zJ5Ci4MG=RzhenLV2n7i^acQNZ9HL593;hW_RtV?;_0kK69*|nvnc3YqQK{6u?CksA
z``-7y*>~+{&u@=B-`>h!NhigS1QW722O$(QFabrCo#vnn#drrf&%TWW%RIl6nPaCc
ztKro-mebyylY(M^ekUn0%!(~ach)VRofpe9P)y`)te${rg=fg3Lpz%k+gyO#5Pv%3
zyB_F%&+R9(&K%6K(>zm?W+$5fZ|1+~ujHUu<Up~W!;w$^>YvQXtR)>5l|RPQh2U@{
zpA->~Kq{Ei$jHcVeO_1xUQ;<`r<WyESMczH?$^<2yG0r`kSx}@NbXunS?*v_guh;R
zXKYf1HhUVCFFImmWr9X|rwod#TfZ-Oy1a`ODQRm>6<d|-=(0DwB%O8DN0;!Ih<Gi-
zO-dKr1`a8kM=Mq7jH4eJRVm|CD9242cc#&*__;zxVpOX=O%_&NqcZ4Z72hqcdc&4<
z+9t}PfQ8>Ledn#wJENYm9|AR6_<3o>8*9>#V`;gC-&X(dn#;5>!?Mz-zKri^zQAPL
z6-CEh>4=(OaYxkB3T28)_^~GA+ZR{gimlMuI86oxR-}|Thr>q(w_y1P&}Bt2(0oIU
zC@qQ1C^~!dJ4>TT6bH2OD?}Xqe<JWN+Qa|q{Zr)x5M!~9E-fz2^NqP6ruZ+T=<yEP
zh~vZhNBhHZsV2D|#p8HQFBHlNn9`-a%SP8_*NSC2E5_F;3uk3#eYjyM$ah9L5n${*
zltr!MQYf+ZOy7Q{gAex~u+&CNZw70qrdq;k!6f%Lz+it#Yt(e~*_6;kicX$f10w_D
z;mCxg8XpwQf+^6aq%D*C>tNszOHqSXwY>ugLZgOE#wG~lt`{66@=Y)_?bfqu_Uvh*
z28*h$@1hWdre>%%iA(dY2aI+zZUU#l5V<-A21)fE=)tuY-6=mUwuuG7(9B?4D>WCV
zgmB1KZ9|t``L@O>v$+^7E^2z|Sj4~6>&h4yCO^Iodhv^|4gf#-brTE_@f>)qZ@gGF
zo2uC)56*z0(at~)Erkt(!VO`Syj%l^1`K4CHFHV`jq_X~B!mcRQ|W&;!O1Q<*3+wC
zc>CZb@FYM^-2(f1oqHhPY=BYn{W|Ew#-qMB+`*{lH_m}U7W28WJ!b#C0gk-6YepNi
x-MZ5PqsN#F|H5D40zo%{?*;-ry`gb-?S69DLruAK0qi5$+n{Ir<vq{~{sF?<(@g*X

delta 727
zcmaEUhpqJz+lDz&liQ;^c}qeZb&?$&9d*2u4MHZ%#{A{Y3w71;1&R137KTn%kJS|~
z33t;;1dBL&I_gA%SgwiX;hVE#I~n;ZB0V6=)149xA}2G&=V+Eic<F?LRCy;FMY!q|
zIypMK>ZF1MJd;cja@moSr^m}G=OsHrO!rPK1Tw*naZSnsGQl?aCgvt@z8=rU#8;H=
z<OY%UOg2fM{5r9gPd~#c0OtJc%*ka*)A<auV5+>6&9Wv-C(l+Z$#L?7dLXep$H^C}
zBdGu+4)soP&g47E(<O8Aoq!$(Io01W-`Nr34d2Af+{ryD)qMK-PF@g${1fvFCX1%-
z<f|$IYVZbW2uw68ntUf!k}n4&1P#-|qRGr@Up*>{ff~WadM6qb16>db4jxCC4c<w4
zr7$-FgP|1R#w?IH)QzR3lO58J3gwgmeF!$pH!;7=$x|nAvZ0dbW}S?Ej5>zKFz0xK
z;tFb|5lGa?C8-QVCxWd`OgEV<mX%IDmmSSIh|6VbvyB;ba!e7fE;e<F)G+`ii!dEe
zkZXLC%8+D@Oegc?2J`EiIu$s<!ZFuua$ar>Tc)#PV6juPTHbcGJVpop?KTaJSG2cB
JuVv(61OStm-@O0;


From 08b88597052dfdf17e947289d79510fdadad51e3 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Sun, 18 Sep 2022 14:52:54 +0200
Subject: [PATCH 6/7] [doc] paper.html result template

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 docs/dev/engine_overview.rst | 89 ++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst
index e950ae667..731e2f86a 100644
--- a/docs/dev/engine_overview.rst
+++ b/docs/dev/engine_overview.rst
@@ -311,3 +311,92 @@ the parameter ``template`` must be set to the desired type.
    address.postcode          postcode of object
    address.country           country of object
    ========================= =====================================================
+
+.. _BibTeX format: https://www.bibtex.com/g/bibtex-format/
+.. _BibTeX field types: https://en.wikipedia.org/wiki/BibTeX#Field_types
+
+.. list-table:: Parameter of the **paper** media type /
+                see `BibTeX field types`_ and `BibTeX format`_
+   :header-rows: 2
+   :width: 100%
+
+   * - result-parameter
+     - Python type
+     - information
+
+   * - template
+     - :py:class:`str`
+     - is set to ``paper.html``
+
+   * - title
+     - :py:class:`str`
+     - title of the result
+
+   * - content
+     - :py:class:`str`
+     - abstract
+
+   * - comments
+     - :py:class:`str`
+     - free text display in italic below the content
+
+   * - tags
+     - :py:class:`List <list>`\ [\ :py:class:`str`\ ]
+     - free tag list
+
+   * - publishedDate
+     - :py:class:`datetime <datetime.datetime>`
+     - last publication date
+
+   * - authors
+     - :py:class:`List <list>`\ [\ :py:class:`str`\ ]
+     - list of authors of the work (authors with a "s")
+
+   * - editor
+     - :py:class:`str`
+     - list of editors of a book
+
+   * - publisher
+     - :py:class:`str`
+     - name of the publisher
+
+   * - journal
+     - :py:class:`str`
+     - name of the journal or magazine the article was
+       published in
+
+   * - volume
+     - :py:class:`str`
+     - volume number
+
+   * - start_page
+     - :py:class:`int`
+     - page number where the article starts
+
+   * - end_page
+     - :py:class:`int`
+     - page number where the article ends
+
+   * - number
+     - :py:class:`str`
+     - number of the report or the issue number for a journal article
+
+   * - doi
+     - :py:class:`str`
+     - DOI number (like ``10.1038/d41586-018-07848-2``)
+
+   * - issn
+     - :py:class:`str`
+     - ISSN number like ``1476-4687``
+
+   * - isbn
+     - :py:class:`str`
+     - ISBN number like ``9780201896831``
+
+   * - pdf_url
+     - :py:class:`str`
+     - URL to the full article, the PDF version
+
+   * - html_url
+     - :py:class:`str`
+     - URL to full article, HTML version

From d6446be38f3f858c09887a89c8fc490a3c300b95 Mon Sep 17 00:00:00 2001
From: Alexandre Flament <alex@al-f.net>
Date: Fri, 23 Sep 2022 19:58:14 +0200
Subject: [PATCH 7/7] [mod] science category: various update of about PR 1705

---
 docs/dev/engine_overview.rst                       | 10 +++-------
 searx/engines/crossref.py                          |  4 ++--
 searx/engines/semantic_scholar.py                  |  3 +--
 searx/engines/springer.py                          |  3 +--
 searx/settings.yml                                 |  3 ++-
 searx/templates/simple/result_templates/paper.html |  4 ++--
 searx/webapp.py                                    |  4 ++--
 searx/webutils.py                                  |  8 +++++++-
 8 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/docs/dev/engine_overview.rst b/docs/dev/engine_overview.rst
index 731e2f86a..7d94b83f1 100644
--- a/docs/dev/engine_overview.rst
+++ b/docs/dev/engine_overview.rst
@@ -369,13 +369,9 @@ the parameter ``template`` must be set to the desired type.
      - :py:class:`str`
      - volume number
 
-   * - start_page
-     - :py:class:`int`
-     - page number where the article starts
-
-   * - end_page
-     - :py:class:`int`
-     - page number where the article ends
+   * - pages
+     - :py:class:`str`
+     - page range where the article is
 
    * - number
      - :py:class:`str`
diff --git a/searx/engines/crossref.py b/searx/engines/crossref.py
index d61318146..fbe2f0c2a 100644
--- a/searx/engines/crossref.py
+++ b/searx/engines/crossref.py
@@ -33,10 +33,10 @@ def response(resp):
         if record_type == 'book-chapter':
             title = record['container-title'][0]
             if record['title'][0].lower().strip() != title.lower().strip():
-                title = title + ' (' + record['title'][0] + ')'
+                title = html_to_text(title) + ' (' + html_to_text(record['title'][0]) + ')'
             journal = None
         else:
-            title = record['title'][0]
+            title = html_to_text(record['title'][0])
             journal = record.get('container-title', [None])[0]
         url = record.get('resource', {}).get('primary', {}).get('URL') or record['URL']
         authors = [author.get('given', '') + ' ' + author.get('family', '') for author in record.get('author', [])]
diff --git a/searx/engines/semantic_scholar.py b/searx/engines/semantic_scholar.py
index b2701c333..7a1b5b231 100644
--- a/searx/engines/semantic_scholar.py
+++ b/searx/engines/semantic_scholar.py
@@ -48,7 +48,6 @@ def request(query, params):
 def response(resp):
     res = loads(resp.text)
     results = []
-
     for result in res['results']:
         url = result.get('primaryPaperLink', {}).get('url')
         if not url and result.get('links'):
@@ -72,7 +71,7 @@ def response(resp):
         # pick for the first alternate link, but not from the crawler
         pdf_url = None
         for doc in result.get('alternatePaperLinks', []):
-            if doc['linkType'] != 'crawler':
+            if doc['linkType'] not in ('crawler', 'doi'):
                 pdf_url = doc['url']
                 break
 
diff --git a/searx/engines/springer.py b/searx/engines/springer.py
index 2711fa807..e5255b794 100644
--- a/searx/engines/springer.py
+++ b/searx/engines/springer.py
@@ -58,8 +58,7 @@ def response(resp):
                 'authors': authors,
                 'doi': record.get('doi'),
                 'journal': record.get('publicationName'),
-                'start_page': record.get('start_page'),
-                'end_page': record.get('end_page'),
+                'pages': record.get('start_page') + '-' + record.get('end_page'),
                 'tags': tags,
                 'issn': [record.get('issn')],
                 'isbn': [record.get('isbn')],
diff --git a/searx/settings.yml b/searx/settings.yml
index ba38e694a..9e9b2f9e6 100644
--- a/searx/settings.yml
+++ b/searx/settings.yml
@@ -412,7 +412,8 @@ engines:
   - name: crossref
     engine: crossref
     shortcut: cr
-    timeout: 10
+    timeout: 30
+    disable: true
 
   - name: yep
     engine: json_engine
diff --git a/searx/templates/simple/result_templates/paper.html b/searx/templates/simple/result_templates/paper.html
index 3ede1b250..54704c866 100644
--- a/searx/templates/simple/result_templates/paper.html
+++ b/searx/templates/simple/result_templates/paper.html
@@ -13,8 +13,8 @@
           .{{- result.number -}}
         {%- endif -%}
       {%- endif -%}
-      {%- if result.start_page -%}
-        &nbsp;{{- result.start_page -}} / {{- result.end_page -}}
+      {%- if result.pages -%}
+        &nbsp;{{- result.pages -}}
       {%- endif -%}
       </span>
     </div>
diff --git a/searx/webapp.py b/searx/webapp.py
index e6bda42be..44500911a 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -77,7 +77,7 @@ from searx.webutils import (
     is_hmac_of,
     is_flask_run_cmdline,
     group_engines_in_tab,
-    searxng_format_date,
+    searxng_l10n_timespan,
 )
 from searx.webadapter import (
     get_search_query_from_webapp,
@@ -723,7 +723,7 @@ def search():
             except ValueError:
                 result['publishedDate'] = None
             else:
-                result['publishedDate'] = searxng_format_date(result['publishedDate'])
+                result['publishedDate'] = searxng_l10n_timespan(result['publishedDate'])
 
         # set result['open_group'] = True when the template changes from the previous result
         # set result['close_group'] = True when the template changes on the next result
diff --git a/searx/webutils.py b/searx/webutils.py
index f084fe9d3..a5ed27c2c 100644
--- a/searx/webutils.py
+++ b/searx/webutils.py
@@ -141,7 +141,13 @@ def highlight_content(content, query):
     return content
 
 
-def searxng_format_date(dt: datetime):  # pylint: disable=invalid-name
+def searxng_l10n_timespan(dt: datetime) -> str:  # pylint: disable=invalid-name
+    """Returns a human-readable and translated string indicating how long ago
+    a date was in the past / the time span of the date to the present.
+
+    On January 1st, midnight, the returned string only indicates how many years
+    ago the date was.
+    """
     # TODO, check if timezone is calculated right  # pylint: disable=fixme
     d = dt.date()
     t = dt.time()