| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | #!/usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import json | 
					
						
							|  |  |  | import collections | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # set path | 
					
						
							| 
									
										
										
										
											2021-02-25 17:42:52 +01:00
										 |  |  | from os.path import join | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from searx import searx_dir | 
					
						
							| 
									
										
										
										
											2021-09-19 11:10:02 +02:00
										 |  |  | from searx.engines import wikidata, set_loggers | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-19 11:10:02 +02:00
										 |  |  | set_loggers(wikidata, 'wikidata') | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  | # the response contains duplicate ?item with the different ?symbol | 
					
						
							|  |  |  | # "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result | 
					
						
							|  |  |  | # even if a ?item has different ?symbol of the same rank. | 
					
						
							| 
									
										
										
										
											2021-09-19 11:10:02 +02:00
										 |  |  | # A deterministic result | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  | # see: | 
					
						
							|  |  |  | # * https://www.wikidata.org/wiki/Help:Ranking | 
					
						
							|  |  |  | # * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section) | 
					
						
							|  |  |  | # * https://w.wiki/32BT | 
					
						
							|  |  |  | #   see the result for https://www.wikidata.org/wiki/Q11582 | 
					
						
							|  |  |  | #   there are multiple symbols the same rank | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | SARQL_REQUEST = """
 | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  | SELECT DISTINCT ?item ?symbol | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | WHERE | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  |   ?item wdt:P31/wdt:P279 wd:Q47574 . | 
					
						
							|  |  |  |   ?item p:P5061 ?symbolP . | 
					
						
							|  |  |  |   ?symbolP ps:P5061 ?symbol ; | 
					
						
							|  |  |  |            wikibase:rank ?rank . | 
					
						
							|  |  |  |   FILTER(LANG(?symbol) = "en"). | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  | ORDER BY ?item DESC(?rank) ?symbol | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_data(): | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  |     results = collections.OrderedDict() | 
					
						
							| 
									
										
										
										
											2021-09-19 11:10:02 +02:00
										 |  |  |     response = wikidata.send_wikidata_query(SARQL_REQUEST) | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  |     for unit in response['results']['bindings']: | 
					
						
							|  |  |  |         name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '') | 
					
						
							|  |  |  |         unit = unit['symbol']['value'] | 
					
						
							|  |  |  |         if name not in results: | 
					
						
							|  |  |  |             # ignore duplicate: always use the first one | 
					
						
							|  |  |  |             results[name] = unit | 
					
						
							|  |  |  |     return results | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_wikidata_units_filename(): | 
					
						
							|  |  |  |     return join(join(searx_dir, "data"), "wikidata_units.json") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | with open(get_wikidata_units_filename(), 'w') as f: | 
					
						
							|  |  |  |     json.dump(get_data(), f, indent=4, ensure_ascii=False) |