[doc] improved docs of implementations for automatic speech recognition
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									6748e8e2d5
								
							
						
					
					
						commit
						0b1444b61e
					
				| @ -641,38 +641,56 @@ def _get_fasttext_model() -> "fasttext.FastText._FastText": | ||||
| 
 | ||||
| 
 | ||||
| def detect_language(text: str, threshold: float = 0.3, only_search_languages: bool = False) -> Optional[str]: | ||||
|     """Detect the language of the text parameter | ||||
|     """Detect the language of the ``text`` parameter. | ||||
| 
 | ||||
|     Args: | ||||
|         * text (str): the string whose language is to be detected. | ||||
|         * threshold (float): threshold filters the returned labels by a threshold on probability. | ||||
|           A choice of 0.3 will return labels with at least 0.3 probability. | ||||
|         * only_search_languages (bool): if True, returns only supported SearXNG search languages. | ||||
|           see :py:obj:`searx.languages` | ||||
|     :param str text: The string whose language is to be detected. | ||||
| 
 | ||||
|     :param float threshold: Threshold filters the returned labels by a threshold | ||||
|         on probability.  A choice of 0.3 will return labels with at least 0.3 | ||||
|         probability. | ||||
| 
 | ||||
|     Raises: | ||||
|         * ValueError: if text is not a string | ||||
|     :param bool only_search_languages: If ``True``, returns only supported | ||||
|         SearXNG search languages.  see :py:obj:`searx.languages` | ||||
| 
 | ||||
|     Returns: | ||||
|         * result (str, None): the detected language code or None. See below. | ||||
|     :rtype: str, None | ||||
|     :returns: | ||||
|         The detected language code or ``None``. See below. | ||||
| 
 | ||||
|     The language detection is done by using `a fork`_ of the fastText_ library (`python | ||||
|     fasttext`_). fastText_ distributes the `language identification model`_, for | ||||
|     reference: | ||||
|     :raises ValueError: If ``text`` is not a string. | ||||
| 
 | ||||
|     The language detection is done by using `a fork`_ of the fastText_ library | ||||
|     (`python fasttext`_). fastText_ distributes the `language identification | ||||
|     model`_, for reference: | ||||
| 
 | ||||
|     - `FastText.zip: Compressing text classification models`_ | ||||
|     - `Bag of Tricks for Efficient Text Classification`_ | ||||
| 
 | ||||
|     The `language identification model`_ support the language codes (ISO-639-3):: | ||||
|     af als am an ar arz as ast av az azb ba bar bcl be bg bh bn bo bpy br bs bxr | ||||
|     ca cbk ce ceb ckb co cs cv cy da de diq dsb dty dv el eml en eo es et eu fa | ||||
|     fi fr frr fy ga gd gl gn gom gu gv he hi hif hr hsb ht hu hy ia id ie ilo io | ||||
|     is it ja jbo jv ka kk km kn ko krc ku kv kw ky la lb lez li lmo lo lrc lt lv | ||||
|     mai mg mhr min mk ml mn mr mrj ms mt mwl my myv mzn nah nap nds ne new nl nn | ||||
|     no oc or os pa pam pfl pl pms pnb ps pt qu rm ro ru rue sa sah sc scn sco sd | ||||
|     sh si sk sl so sq sr su sv sw ta te tg th tk tl tr tt tyv ug uk ur uz vec vep | ||||
|     vi vls vo wa war wuu xal xmf yi yo yue zh | ||||
|     The `language identification model`_ support the language codes | ||||
|     (ISO-639-3):: | ||||
| 
 | ||||
|         af als am an ar arz as ast av az azb ba bar bcl be bg bh bn bo bpy br bs | ||||
|         bxr ca cbk ce ceb ckb co cs cv cy da de diq dsb dty dv el eml en eo es | ||||
|         et eu fa fi fr frr fy ga gd gl gn gom gu gv he hi hif hr hsb ht hu hy ia | ||||
|         id ie ilo io is it ja jbo jv ka kk km kn ko krc ku kv kw ky la lb lez li | ||||
|         lmo lo lrc lt lv mai mg mhr min mk ml mn mr mrj ms mt mwl my myv mzn nah | ||||
|         nap nds ne new nl nn no oc or os pa pam pfl pl pms pnb ps pt qu rm ro ru | ||||
|         rue sa sah sc scn sco sd sh si sk sl so sq sr su sv sw ta te tg th tk tl | ||||
|         tr tt tyv ug uk ur uz vec vep vi vls vo wa war wuu xal xmf yi yo yue zh | ||||
| 
 | ||||
|     By using ``only_search_languages=True`` the `language identification model`_ | ||||
|     is harmonized with the SearXNG's language (locale) model.  General | ||||
|     conditions of SearXNG's locale model are: | ||||
| 
 | ||||
|     a. SearXNG's locale of a query is passed to the | ||||
|        :py:obj:`searx.locales.get_engine_locale` to get a language and/or region | ||||
|        code that is used by an engine. | ||||
| 
 | ||||
|     b. Most of SearXNG's engines do not support all the languages from `language | ||||
|        identification model`_ and there is also a discrepancy in the ISO-639-3 | ||||
|        (fastext) and ISO-639-2 (SearXNG)handling.  Further more, in SearXNG the | ||||
|        locales like ``zh-TH`` (``zh-CN``) are mapped to ``zh_Hant`` | ||||
|        (``zh_Hans``) while the `language identification model`_ reduce both to | ||||
|        ``zh``. | ||||
| 
 | ||||
|     .. _a fork: https://github.com/searxng/fasttext-predict | ||||
|     .. _fastText: https://fasttext.cc/ | ||||
| @ -680,6 +698,7 @@ def detect_language(text: str, threshold: float = 0.3, only_search_languages: bo | ||||
|     .. _language identification model: https://fasttext.cc/docs/en/language-identification.html | ||||
|     .. _Bag of Tricks for Efficient Text Classification: https://arxiv.org/abs/1607.01759 | ||||
|     .. _`FastText.zip: Compressing text classification models`: https://arxiv.org/abs/1612.03651 | ||||
| 
 | ||||
|     """ | ||||
|     if not isinstance(text, str): | ||||
|         raise ValueError('text must a str') | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user