| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2021-04-26 20:18:20 +02:00
										 |  |  | # lint: pylint | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | """CORE (science)
 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from json import loads | 
					
						
							|  |  |  | from datetime import datetime | 
					
						
							|  |  |  | from urllib.parse import urlencode | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | from searx.exceptions import SearxEngineAPIException | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://core.ac.uk', | 
					
						
							|  |  |  |     "wikidata_id": 'Q22661180', | 
					
						
							|  |  |  |     "official_api_documentation": 'https://core.ac.uk/documentation/api/', | 
					
						
							|  |  |  |     "use_official_api": True, | 
					
						
							|  |  |  |     "require_api_key": True, | 
					
						
							|  |  |  |     "results": 'JSON', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | categories = ['science'] | 
					
						
							|  |  |  | paging = True | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | nb_per_page = 10 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | api_key = 'unset' | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | base_url = 'https://core.ac.uk:443/api-v2/search/' | 
					
						
							|  |  |  | search_string = '{query}?page={page}&pageSize={nb_per_page}&apiKey={apikey}' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |     if api_key == 'unset': | 
					
						
							|  |  |  |         raise SearxEngineAPIException('missing CORE API key') | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |     search_path = search_string.format( | 
					
						
							|  |  |  |         query = urlencode({'q': query}), | 
					
						
							|  |  |  |         nb_per_page = nb_per_page, | 
					
						
							|  |  |  |         page = params['pageno'], | 
					
						
							|  |  |  |         apikey = api_key, | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  |     params['url'] = base_url + search_path | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |     logger.debug("query_url --> %s", params['url']) | 
					
						
							|  |  |  |     return params | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  |     json_data = loads(resp.text) | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  |     for result in json_data['data']: | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         source = result['_source'] | 
					
						
							|  |  |  |         time = source['publishedDate'] or source['depositedDate'] | 
					
						
							|  |  |  |         if time : | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  |             date = datetime.fromtimestamp(time / 1000) | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |         else: | 
					
						
							|  |  |  |             date = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         metadata = [] | 
					
						
							|  |  |  |         if source['publisher'] and len(source['publisher']) > 3: | 
					
						
							|  |  |  |             metadata.append(source['publisher']) | 
					
						
							|  |  |  |         if source['topics']: | 
					
						
							|  |  |  |             metadata.append(source['topics'][0]) | 
					
						
							|  |  |  |         if source['doi']: | 
					
						
							|  |  |  |             metadata.append(source['doi']) | 
					
						
							|  |  |  |         metadata = ' / '.join(metadata) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  |         results.append({ | 
					
						
							| 
									
										
										
										
											2021-04-04 12:48:24 +02:00
										 |  |  |             'url': source['urls'][0].replace('http://', 'https://', 1), | 
					
						
							|  |  |  |             'title': source['title'], | 
					
						
							|  |  |  |             'content': source['description'], | 
					
						
							|  |  |  |             'publishedDate': date, | 
					
						
							|  |  |  |             'metadata' : metadata, | 
					
						
							|  |  |  |         }) | 
					
						
							| 
									
										
										
										
											2021-03-26 12:22:49 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results |