| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | """Wikimedia Commons (images)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-25 18:21:47 +02:00
										 |  |  | import datetime | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  | from urllib.parse import urlencode | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-11-27 14:56:14 +01:00
										 |  |  | from searx.utils import html_to_text, humanize_bytes | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  | # about | 
					
						
							|  |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://commons.wikimedia.org/', | 
					
						
							|  |  |  |     "wikidata_id": 'Q565', | 
					
						
							|  |  |  |     "official_api_documentation": 'https://commons.wikimedia.org/w/api.php', | 
					
						
							|  |  |  |     "use_official_api": True, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": 'JSON', | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-04-25 18:21:47 +02:00
										 |  |  | categories = ['images'] | 
					
						
							|  |  |  | search_type = 'images' | 
					
						
							| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | base_url = "https://commons.wikimedia.org" | 
					
						
							|  |  |  | search_prefix = ( | 
					
						
							|  |  |  |     '?action=query' | 
					
						
							|  |  |  |     '&format=json' | 
					
						
							|  |  |  |     '&generator=search' | 
					
						
							|  |  |  |     '&gsrnamespace=6' | 
					
						
							|  |  |  |     '&gsrprop=snippet' | 
					
						
							|  |  |  |     '&prop=info|imageinfo' | 
					
						
							|  |  |  |     '&iiprop=url|size|mime' | 
					
						
							|  |  |  |     '&iiurlheight=180'  # needed for the thumb url | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | paging = True | 
					
						
							|  |  |  | number_of_results = 10 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-25 18:21:47 +02:00
										 |  |  | search_types = { | 
					
						
							|  |  |  |     'images': 'bitmap|drawing', | 
					
						
							|  |  |  |     'videos': 'video', | 
					
						
							|  |  |  |     'audio': 'audio', | 
					
						
							|  |  |  |     'files': 'multimedia|office|archive|3d', | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     language = 'en' | 
					
						
							|  |  |  |     if params['language'] != 'all': | 
					
						
							|  |  |  |         language = params['language'].split('-')[0] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-25 18:21:47 +02:00
										 |  |  |     if search_type not in search_types: | 
					
						
							|  |  |  |         raise ValueError(f"Unsupported search type: {search_type}") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     filetype = search_types[search_type] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  |     args = { | 
					
						
							|  |  |  |         'uselang': language, | 
					
						
							|  |  |  |         'gsrlimit': number_of_results, | 
					
						
							|  |  |  |         'gsroffset': number_of_results * (params["pageno"] - 1), | 
					
						
							| 
									
										
										
										
											2024-04-25 18:21:47 +02:00
										 |  |  |         'gsrsearch': f"filetype:{filetype} {query}", | 
					
						
							| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-03 09:32:10 +02:00
										 |  |  |     params["url"] = f"{base_url}/w/api.php{search_prefix}&{urlencode(args, safe=':|')}" | 
					
						
							| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  |     json = resp.json() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not json.get("query", {}).get("pages"): | 
					
						
							|  |  |  |         return results | 
					
						
							|  |  |  |     for item in json["query"]["pages"].values(): | 
					
						
							|  |  |  |         imageinfo = item["imageinfo"][0] | 
					
						
							|  |  |  |         title = item["title"].replace("File:", "").rsplit('.', 1)[0] | 
					
						
							|  |  |  |         result = { | 
					
						
							|  |  |  |             'url': imageinfo["descriptionurl"], | 
					
						
							|  |  |  |             'title': title, | 
					
						
							| 
									
										
										
										
											2024-11-27 14:56:14 +01:00
										 |  |  |             'content': html_to_text(item["snippet"]), | 
					
						
							| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2024-04-25 18:21:47 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if search_type == "images": | 
					
						
							|  |  |  |             result['template'] = 'images.html' | 
					
						
							|  |  |  |             result['img_src'] = imageinfo["url"] | 
					
						
							|  |  |  |             result['thumbnail_src'] = imageinfo["thumburl"] | 
					
						
							|  |  |  |             result['resolution'] = f'{imageinfo["width"]} x {imageinfo["height"]}' | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             result['thumbnail'] = imageinfo["thumburl"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if search_type == "videos": | 
					
						
							|  |  |  |             result['template'] = 'videos.html' | 
					
						
							|  |  |  |             if imageinfo.get('duration'): | 
					
						
							|  |  |  |                 result['length'] = datetime.timedelta(seconds=int(imageinfo['duration'])) | 
					
						
							|  |  |  |             result['iframe_src'] = imageinfo['url'] | 
					
						
							|  |  |  |         elif search_type == "files": | 
					
						
							|  |  |  |             result['template'] = 'files.html' | 
					
						
							|  |  |  |             result['metadata'] = imageinfo['mime'] | 
					
						
							| 
									
										
										
										
											2024-11-27 14:56:14 +01:00
										 |  |  |             result['size'] = humanize_bytes(imageinfo['size']) | 
					
						
							| 
									
										
										
										
											2024-04-25 18:21:47 +02:00
										 |  |  |         elif search_type == "audio": | 
					
						
							|  |  |  |             result['iframe_src'] = imageinfo['url'] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-03 18:33:10 +02:00
										 |  |  |         results.append(result) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return results |