| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  | """Public domain image archive""" | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl | 
					
						
							|  |  |  | from json import dumps | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  | from searx.network import get | 
					
						
							|  |  |  | from searx.utils import extr | 
					
						
							|  |  |  | from searx.exceptions import SearxEngineAccessDeniedException, SearxEngineException | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | THUMBNAIL_SUFFIX = "?fit=max&h=360&w=360" | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | Example thumbnail urls (from requests & html): | 
					
						
							|  |  |  | - https://the-public-domain-review.imgix.net | 
					
						
							|  |  |  |   /shop/nov-2023-prints-00043.jpg | 
					
						
							|  |  |  |   ?fit=max&h=360&w=360 | 
					
						
							|  |  |  | - https://the-public-domain-review.imgix.net | 
					
						
							|  |  |  |   /collections/the-history-of-four-footed-beasts-and-serpents-1658/ | 
					
						
							|  |  |  |   8616383182_5740fa7851_o.jpg | 
					
						
							|  |  |  |   ?fit=max&h=360&w=360 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Example full image urls (from html) | 
					
						
							|  |  |  | - https://the-public-domain-review.imgix.net/shop/ | 
					
						
							|  |  |  |   nov-2023-prints-00043.jpg | 
					
						
							|  |  |  |   ?fit=clip&w=970&h=800&auto=format,compress | 
					
						
							|  |  |  | - https://the-public-domain-review.imgix.net/collections/ | 
					
						
							|  |  |  |   the-history-of-four-footed-beasts-and-serpents-1658/8616383182_5740fa7851_o.jpg | 
					
						
							|  |  |  |   ?fit=clip&w=310&h=800&auto=format,compress | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | The thumbnail url from the request will be cleaned for the full image link | 
					
						
							|  |  |  | The cleaned thumbnail url will have THUMBNAIL_SUFFIX added to them, based on the original thumbnail parameters | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # about | 
					
						
							|  |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://pdimagearchive.org', | 
					
						
							|  |  |  |     "use_official_api": False, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": 'JSON', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  | base_url = 'https://oqi2j6v4iz-dsn.algolia.net' | 
					
						
							|  |  |  | pdia_config_url = 'https://pdimagearchive.org/_astro/config.BiNvrvzG.js' | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  | categories = ['images'] | 
					
						
							|  |  |  | page_size = 20 | 
					
						
							|  |  |  | paging = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  | __CACHED_API_KEY = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _clean_url(url): | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  |     parsed = urlparse(url) | 
					
						
							|  |  |  |     query = [(k, v) for (k, v) in parse_qsl(parsed.query) if k not in ['ixid', 's']] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return urlunparse((parsed.scheme, parsed.netloc, parsed.path, parsed.params, urlencode(query), parsed.fragment)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  | def _get_algolia_api_key(): | 
					
						
							|  |  |  |     global __CACHED_API_KEY  # pylint:disable=global-statement | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if __CACHED_API_KEY: | 
					
						
							|  |  |  |         return __CACHED_API_KEY | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     resp = get(pdia_config_url) | 
					
						
							|  |  |  |     if resp.status_code != 200: | 
					
						
							|  |  |  |         raise LookupError("Failed to obtain Algolia API key for PDImageArchive") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     api_key = extr(resp.text, 'r="', '"', default=None) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if api_key is None: | 
					
						
							|  |  |  |         raise LookupError("Couldn't obtain Algolia API key for PDImageArchive") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     __CACHED_API_KEY = api_key | 
					
						
							|  |  |  |     return api_key | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def _clear_cached_api_key(): | 
					
						
							|  |  |  |     global __CACHED_API_KEY  # pylint:disable=global-statement | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     __CACHED_API_KEY = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  |     api_key = _get_algolia_api_key() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     args = { | 
					
						
							|  |  |  |         'x-algolia-api-key': api_key, | 
					
						
							|  |  |  |         'x-algolia-application-id': 'OQI2J6V4IZ', | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     params['url'] = f"{base_url}/1/indexes/*/queries?{urlencode(args)}" | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  |     params["method"] = "POST" | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  |     request_params = { | 
					
						
							|  |  |  |         "page": params["pageno"] - 1, | 
					
						
							|  |  |  |         "query": query, | 
					
						
							|  |  |  |         "highlightPostTag": "__ais-highlight__", | 
					
						
							|  |  |  |         "highlightPreTag": "__ais-highlight__", | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     data = { | 
					
						
							|  |  |  |         "requests": [ | 
					
						
							|  |  |  |             {"indexName": "prod_all-images", "params": urlencode(request_params)}, | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     params["data"] = dumps(data) | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # http errors are handled manually to be able to reset the api key | 
					
						
							|  |  |  |     params['raise_for_httperror'] = False | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  |     json_data = resp.json() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  |     if resp.status_code == 403: | 
					
						
							|  |  |  |         _clear_cached_api_key() | 
					
						
							|  |  |  |         raise SearxEngineAccessDeniedException() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if resp.status_code != 200: | 
					
						
							|  |  |  |         raise SearxEngineException() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  |     if 'results' not in json_data: | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for result in json_data['results'][0]['hits']: | 
					
						
							|  |  |  |         content = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if "themes" in result: | 
					
						
							|  |  |  |             content.append("Themes: " + result['themes']) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if "encompassingWork" in result: | 
					
						
							|  |  |  |             content.append("Encompassing work: " + result['encompassingWork']) | 
					
						
							|  |  |  |         content = "\n".join(content) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         base_image_url = result['thumbnail'].split("?")[0] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         results.append( | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 'template': 'images.html', | 
					
						
							| 
									
										
										
										
											2025-01-19 22:55:09 +01:00
										 |  |  |                 'url': _clean_url(f"{about['website']}/images/{result['objectID']}"), | 
					
						
							|  |  |  |                 'img_src': _clean_url(base_image_url), | 
					
						
							|  |  |  |                 'thumbnail_src': _clean_url(base_image_url + THUMBNAIL_SUFFIX), | 
					
						
							| 
									
										
										
										
											2025-01-10 18:12:16 +01:00
										 |  |  |                 'title': f"{result['title'].strip()} by {result['artist']} {result.get('displayYear', '')}", | 
					
						
							|  |  |  |                 'content': content, | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return results |