| 
									
										
										
										
											2022-03-31 20:45:39 +02:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | Jisho (the Japanese-English dictionary) | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from urllib.parse import urlencode, urljoin | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # about | 
					
						
							|  |  |  | about = { | 
					
						
							|  |  |  |     "website": 'https://jisho.org', | 
					
						
							|  |  |  |     "wikidata_id": 'Q24568389', | 
					
						
							|  |  |  |     "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api", | 
					
						
							|  |  |  |     "use_official_api": True, | 
					
						
							|  |  |  |     "require_api_key": False, | 
					
						
							|  |  |  |     "results": 'JSON', | 
					
						
							| 
									
										
										
										
											2022-04-01 15:18:19 +02:00
										 |  |  |     "language": 'ja', | 
					
						
							| 
									
										
										
										
											2022-03-31 20:45:39 +02:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | categories = ['dictionaries'] | 
					
						
							|  |  |  | paging = False | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | URL = 'https://jisho.org' | 
					
						
							|  |  |  | BASE_URL = 'https://jisho.org/word/' | 
					
						
							|  |  |  | SEARCH_URL = URL + '/api/v1/search/words?{query}' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     query = urlencode({'keyword': query}) | 
					
						
							|  |  |  |     params['url'] = SEARCH_URL.format(query=query) | 
					
						
							|  |  |  |     logger.debug(f"query_url --> {params['url']}") | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  |     first_result = True | 
					
						
							| 
									
										
										
										
											2022-03-31 20:45:39 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-01 15:18:19 +02:00
										 |  |  |     search_results = resp.json() | 
					
						
							| 
									
										
										
										
											2022-03-31 20:45:39 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  |     for page in search_results.get('data', []): | 
					
						
							| 
									
										
										
										
											2022-03-31 20:45:39 +02:00
										 |  |  |         # Entries that are purely from Wikipedia are excluded. | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  |         parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech') | 
					
						
							|  |  |  |         if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition': | 
					
						
							| 
									
										
										
										
											2022-04-01 15:18:19 +02:00
										 |  |  |             pass | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-01 15:18:19 +02:00
										 |  |  |         # Process alternative forms | 
					
						
							|  |  |  |         alt_forms = [] | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  |         for title_raw in page['japanese']: | 
					
						
							| 
									
										
										
										
											2022-04-01 15:18:19 +02:00
										 |  |  |             if 'word' not in title_raw: | 
					
						
							|  |  |  |                 alt_forms.append(title_raw['reading']) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 title = title_raw['word'] | 
					
						
							|  |  |  |                 if 'reading' in title_raw: | 
					
						
							|  |  |  |                     title += ' (' + title_raw['reading'] + ')' | 
					
						
							|  |  |  |                 alt_forms.append(title) | 
					
						
							|  |  |  |          | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  |         # | 
					
						
							|  |  |  |         result_url = urljoin(BASE_URL, page['slug']) | 
					
						
							|  |  |  |         definitions = get_definitions(page) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-04-01 15:18:19 +02:00
										 |  |  |         # For results, we'll return the URL, all alternative forms (as title), | 
					
						
							|  |  |  |         # and all definitions (as description) truncated to 300 characters. | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  |         content = " ".join(f"{engdef}." for _, engdef, _ in definitions) | 
					
						
							| 
									
										
										
										
											2022-04-01 15:18:19 +02:00
										 |  |  |         results.append({ | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  |             'url': result_url, | 
					
						
							| 
									
										
										
										
											2022-04-01 15:18:19 +02:00
										 |  |  |             'title': ", ".join(alt_forms), | 
					
						
							|  |  |  |             'content': content[:300] + (content[300:] and '...') | 
					
						
							|  |  |  |         }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Like Wordnik, we'll return the first result in an infobox too. | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  |         if first_result: | 
					
						
							|  |  |  |             first_result = False | 
					
						
							|  |  |  |             results.append(get_infobox(alt_forms, result_url, definitions)) | 
					
						
							| 
									
										
										
										
											2022-03-31 20:45:39 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return results | 
					
						
							| 
									
										
										
										
											2022-04-02 15:21:58 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_definitions(page): | 
					
						
							|  |  |  |     # Process definitions | 
					
						
							|  |  |  |     definitions = [] | 
					
						
							|  |  |  |     for defn_raw in page['senses']: | 
					
						
							|  |  |  |         extra = [] | 
					
						
							|  |  |  |         # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions. | 
					
						
							|  |  |  |         if defn_raw.get('tags'): | 
					
						
							|  |  |  |             if defn_raw.get('info'): | 
					
						
							|  |  |  |                 # "usually written as kana: <kana>" | 
					
						
							|  |  |  |                 extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ') | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 # abbreviation, archaism, etc. | 
					
						
							|  |  |  |                 extra.append(', '.join(defn_raw['tags']) + '. ') | 
					
						
							|  |  |  |         elif defn_raw.get('info'): | 
					
						
							|  |  |  |             # inconsistent | 
					
						
							|  |  |  |             extra.append(', '.join(defn_raw['info']).capitalize() + '. ') | 
					
						
							|  |  |  |         if defn_raw.get('restrictions'): | 
					
						
							|  |  |  |             extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ') | 
					
						
							|  |  |  |         definitions.append(( | 
					
						
							|  |  |  |             ', '.join(defn_raw['parts_of_speech']), | 
					
						
							|  |  |  |             '; '.join(defn_raw['english_definitions']), | 
					
						
							|  |  |  |             ''.join(extra)[:-1], | 
					
						
							|  |  |  |         )) | 
					
						
							|  |  |  |     return definitions | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_infobox(alt_forms, result_url, definitions): | 
					
						
							|  |  |  |     infobox_content = [] | 
					
						
							|  |  |  |     # title & alt_forms | 
					
						
							|  |  |  |     infobox_title = alt_forms[0] | 
					
						
							|  |  |  |     if len(alt_forms) > 1: | 
					
						
							|  |  |  |         infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # definitions | 
					
						
							|  |  |  |     infobox_content.append('''
 | 
					
						
							|  |  |  |         <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>  | 
					
						
							|  |  |  |         and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>  | 
					
						
							|  |  |  |         by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small> | 
					
						
							|  |  |  |         <ul> | 
					
						
							|  |  |  |     ''')
 | 
					
						
							|  |  |  |     for pos, engdef, extra in definitions: | 
					
						
							|  |  |  |         if pos == 'Wikipedia definition': | 
					
						
							|  |  |  |             infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>') | 
					
						
							|  |  |  |         pos = f'<i>{pos}</i>: ' if pos else '' | 
					
						
							|  |  |  |         extra = f' ({extra})' if extra else '' | 
					
						
							|  |  |  |         infobox_content.append(f'<li>{pos}{engdef}{extra}</li>') | 
					
						
							|  |  |  |     infobox_content.append('</ul>') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # | 
					
						
							|  |  |  |     return { | 
					
						
							|  |  |  |         'infobox': infobox_title, | 
					
						
							|  |  |  |         'content': ''.join(infobox_content), | 
					
						
							|  |  |  |         'urls': [ | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 'title': 'Jisho.org', | 
					
						
							|  |  |  |                 'url': result_url, | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |     } |