| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | #!/usr/bin/env python | 
					
						
							| 
									
										
										
										
											2021-10-03 15:12:09 +02:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | # lint: pylint | 
					
						
							|  |  |  | # pylint: disable=missing-module-docstring | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  | """Fetch units from :origin:`searx/engines/wikidata.py` engine.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | Output file: :origin:`searx/data/wikidata_units.json` (:origin:`CI Update data | 
					
						
							|  |  |  | ...  <.github/workflows/data-update.yml>`). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | import json | 
					
						
							|  |  |  | import collections | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # set path | 
					
						
							| 
									
										
										
										
											2021-02-25 17:42:52 +01:00
										 |  |  | from os.path import join | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from searx import searx_dir | 
					
						
							| 
									
										
										
										
											2021-09-19 11:10:02 +02:00
										 |  |  | from searx.engines import wikidata, set_loggers | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-19 11:10:02 +02:00
										 |  |  | set_loggers(wikidata, 'wikidata') | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  | # the response contains duplicate ?item with the different ?symbol | 
					
						
							|  |  |  | # "ORDER BY ?item DESC(?rank) ?symbol" provides a deterministic result | 
					
						
							|  |  |  | # even if a ?item has different ?symbol of the same rank. | 
					
						
							| 
									
										
										
										
											2021-09-19 11:10:02 +02:00
										 |  |  | # A deterministic result | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  | # see: | 
					
						
							|  |  |  | # * https://www.wikidata.org/wiki/Help:Ranking | 
					
						
							|  |  |  | # * https://www.mediawiki.org/wiki/Wikibase/Indexing/RDF_Dump_Format ("Statement representation" section) | 
					
						
							|  |  |  | # * https://w.wiki/32BT | 
					
						
							|  |  |  | #   see the result for https://www.wikidata.org/wiki/Q11582 | 
					
						
							|  |  |  | #   there are multiple symbols the same rank | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | SARQL_REQUEST = """
 | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  | SELECT DISTINCT ?item ?symbol | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | WHERE | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  |   ?item wdt:P31/wdt:P279 wd:Q47574 . | 
					
						
							|  |  |  |   ?item p:P5061 ?symbolP . | 
					
						
							|  |  |  |   ?symbolP ps:P5061 ?symbol ; | 
					
						
							|  |  |  |            wikibase:rank ?rank . | 
					
						
							|  |  |  |   FILTER(LANG(?symbol) = "en"). | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  | ORDER BY ?item DESC(?rank) ?symbol | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_data(): | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  |     results = collections.OrderedDict() | 
					
						
							| 
									
										
										
										
											2021-09-19 11:10:02 +02:00
										 |  |  |     response = wikidata.send_wikidata_query(SARQL_REQUEST) | 
					
						
							| 
									
										
										
										
											2021-02-23 13:10:38 +01:00
										 |  |  |     for unit in response['results']['bindings']: | 
					
						
							|  |  |  |         name = unit['item']['value'].replace('http://www.wikidata.org/entity/', '') | 
					
						
							|  |  |  |         unit = unit['symbol']['value'] | 
					
						
							|  |  |  |         if name not in results: | 
					
						
							|  |  |  |             # ignore duplicate: always use the first one | 
					
						
							|  |  |  |             results[name] = unit | 
					
						
							|  |  |  |     return results | 
					
						
							| 
									
										
										
										
											2020-10-26 19:19:18 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_wikidata_units_filename(): | 
					
						
							|  |  |  |     return join(join(searx_dir, "data"), "wikidata_units.json") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     with open(get_wikidata_units_filename(), 'w', encoding="utf8") as f: | 
					
						
							|  |  |  |         json.dump(get_data(), f, indent=4, ensure_ascii=False) |