| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | """CORE (science)
 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from datetime import datetime | 
					
						
							|  |  |  | from urllib.parse import urlencode | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | from searx.exceptions import SearxEngineAPIException | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://core.ac.uk', | 
					
						
							|  |  |  |     "wikidata_id": 'Q22661180', | 
					
						
							|  |  |  |     "official_api_documentation": 'https://core.ac.uk/documentation/api/', | 
					
						
							|  |  |  |     "use_official_api": True, | 
					
						
							|  |  |  |     "require_api_key": True, | 
					
						
							|  |  |  |     "results": 'JSON', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-24 16:16:22 +02:00
										 |  |  | categories = ['science', 'scientific publications'] | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | paging = True | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | nb_per_page = 10 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | api_key = 'unset' | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | base_url = 'https://core.ac.uk:443/api-v2/search/' | 
					
						
							|  |  |  | search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | def request(query, params): | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |     if api_key == 'unset': | 
					
						
							|  |  |  |         raise SearxEngineAPIException('missing CORE API key') | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |     search_path = search_string.format( | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         query=urlencode({'q': query}), | 
					
						
							|  |  |  |         nb_per_page=nb_per_page, | 
					
						
							|  |  |  |         page=params['pageno'], | 
					
						
							|  |  |  |         apikey=api_key, | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  |     params['url'] = base_url + search_path | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |     return params | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							| 
									
										
										
										
											2022-09-24 13:17:01 +02:00
										 |  |  |     json_data = resp.json() | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  |     for result in json_data['data']: | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |         source = result['_source'] | 
					
						
							| 
									
										
										
										
											2022-09-24 14:26:07 +02:00
										 |  |  |         url = None | 
					
						
							|  |  |  |         if source.get('urls'): | 
					
						
							|  |  |  |             url = source['urls'][0].replace('http://', 'https://', 1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if url is None and source.get('doi'): | 
					
						
							|  |  |  |             # use the DOI reference | 
					
						
							|  |  |  |             url = 'https://doi.org/' + source['doi'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if url is None and source.get('downloadUrl'): | 
					
						
							|  |  |  |             # use the downloadUrl | 
					
						
							|  |  |  |             url = source['downloadUrl'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if url is None and source.get('identifiers'): | 
					
						
							|  |  |  |             # try to find an ark id, see | 
					
						
							|  |  |  |             # https://www.wikidata.org/wiki/Property:P8091 | 
					
						
							|  |  |  |             # and https://en.wikipedia.org/wiki/Archival_Resource_Key | 
					
						
							|  |  |  |             arkids = [ | 
					
						
							|  |  |  |                 identifier[5:]  # 5 is the length of "ark:/" | 
					
						
							|  |  |  |                 for identifier in source.get('identifiers') | 
					
						
							|  |  |  |                 if isinstance(identifier, str) and identifier.startswith('ark:/') | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  |             if len(arkids) > 0: | 
					
						
							|  |  |  |                 url = 'https://n2t.net/' + arkids[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if url is None: | 
					
						
							| 
									
										
										
											
												[fix] ERROR searx.engines.core.ac.uk: list index out of range
Some result items from core.ac.uk do not have an URL::
  Traceback (most recent call last):
  File "searx/search/processors/online.py", line 154, in search
    search_results = self._search_basic(query, params)
  File "searx/search/processors/online.py", line 142, in _search_basic
    return self.engine.response(response)
  File "SearXNG/searx/engines/core.py", line 73, in response
    'url': source['urls'][0].replace('http://', 'https://', 1),
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
											
										 
											2022-09-24 11:54:12 +02:00
										 |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-25 15:46:29 +02:00
										 |  |  |         publishedDate = None | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |         time = source['publishedDate'] or source['depositedDate'] | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         if time: | 
					
						
							| 
									
										
										
										
											2022-09-24 13:17:01 +02:00
										 |  |  |             publishedDate = datetime.fromtimestamp(time / 1000) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-09-24 14:26:07 +02:00
										 |  |  |         # sometimes the 'title' is None / filter None values | 
					
						
							|  |  |  |         journals = [j['title'] for j in (source.get('journals') or []) if j['title']] | 
					
						
							| 
									
										
										
										
											2022-09-24 13:17:01 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         publisher = source['publisher'] | 
					
						
							|  |  |  |         if publisher: | 
					
						
							|  |  |  |             publisher = source['publisher'].strip("'") | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |         results.append( | 
					
						
							|  |  |  |             { | 
					
						
							| 
									
										
										
										
											2022-09-24 13:17:01 +02:00
										 |  |  |                 'template': 'paper.html', | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |                 'title': source['title'], | 
					
						
							| 
									
										
										
										
											2022-09-24 14:26:07 +02:00
										 |  |  |                 'url': url, | 
					
						
							|  |  |  |                 'content': source['description'] or '', | 
					
						
							| 
									
										
										
										
											2022-09-24 13:17:01 +02:00
										 |  |  |                 # 'comments': '', | 
					
						
							|  |  |  |                 'tags': source['topics'], | 
					
						
							|  |  |  |                 'publishedDate': publishedDate, | 
					
						
							|  |  |  |                 'type': (source['types'] or [None])[0], | 
					
						
							|  |  |  |                 'authors': source['authors'], | 
					
						
							|  |  |  |                 'editor': ', '.join(source['contributors'] or []), | 
					
						
							|  |  |  |                 'publisher': publisher, | 
					
						
							|  |  |  |                 'journal': ', '.join(journals), | 
					
						
							|  |  |  |                 # 'volume': '', | 
					
						
							|  |  |  |                 # 'pages' : '', | 
					
						
							|  |  |  |                 # 'number': '', | 
					
						
							|  |  |  |                 'doi': source['doi'], | 
					
						
							| 
									
										
										
										
											2022-09-25 15:46:29 +02:00
										 |  |  |                 'issn': [x for x in [source.get('issn')] if x], | 
					
						
							|  |  |  |                 'isbn': [x for x in [source.get('isbn')] if x],  # exists in the rawRecordXml | 
					
						
							| 
									
										
										
										
											2022-09-24 13:17:01 +02:00
										 |  |  |                 'pdf_url': source.get('repositoryDocument', {}).get('pdfOrigin'), | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  |             } | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results |