Merge pull request #165 from return42/patch-google
improve & document google engine
This commit is contained in:
		
						commit
						f3e56836d6
					
				
							
								
								
									
										55
									
								
								docs/src/searx.engines.google.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										55
									
								
								docs/src/searx.engines.google.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,55 @@ | |||||||
|  | .. _google engines: | ||||||
|  | 
 | ||||||
|  | ============== | ||||||
|  | Google Engines | ||||||
|  | ============== | ||||||
|  | 
 | ||||||
|  | .. contents:: Contents | ||||||
|  |    :depth: 2 | ||||||
|  |    :local: | ||||||
|  |    :backlinks: entry | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | .. _google API: | ||||||
|  | 
 | ||||||
|  | google API | ||||||
|  | ========== | ||||||
|  | 
 | ||||||
|  | .. _Query Parameter Definitions: | ||||||
|  |    https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions | ||||||
|  | 
 | ||||||
|  | For detailed description of the *REST-full* API see: `Query Parameter | ||||||
|  | Definitions`_.  Not all parameters can be appied and some engines are *special* | ||||||
|  | (e.g. :ref:`google news engine`). | ||||||
|  | 
 | ||||||
|  | .. _google web engine: | ||||||
|  | 
 | ||||||
|  | Google WEB | ||||||
|  | ========== | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.engines.google | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
|  | .. _google images engine: | ||||||
|  | 
 | ||||||
|  | Google Images | ||||||
|  | ============= | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.engines.google_images | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
|  | .. _google videos engine: | ||||||
|  | 
 | ||||||
|  | Google Videos | ||||||
|  | ============= | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.engines.google_videos | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
|  | .. _google news engine: | ||||||
|  | 
 | ||||||
|  | Google News | ||||||
|  | =========== | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.engines.google_news | ||||||
|  |   :members: | ||||||
| @ -1,12 +1,28 @@ | |||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
| # lint: pylint | # lint: pylint | ||||||
| """Google (Web) | """This is the implementation of the google WEB engine.  Some of this | ||||||
|  | implementations are shared by other engines: | ||||||
| 
 | 
 | ||||||
| For detailed description of the *REST-full* API see: `Query Parameter | - :ref:`google images engine` | ||||||
| Definitions`_. | - :ref:`google news engine` | ||||||
|  | - :ref:`google videos engine` | ||||||
|  | 
 | ||||||
|  | The google WEB engine itself has a special setup option: | ||||||
|  | 
 | ||||||
|  | .. code:: yaml | ||||||
|  | 
 | ||||||
|  |   - name: google | ||||||
|  |     ... | ||||||
|  |     use_mobile_ui: true | ||||||
|  | 
 | ||||||
|  | ``use_mobile_ui``: (default: ``true``) | ||||||
|  |   Enables to use *mobile endpoint* to bypass the google blocking (see | ||||||
|  |   :issue:`159`).  On the mobile UI of Google Search, the button :guilabel:`More | ||||||
|  |   results` is not affected by Google rate limiting and we can still do requests | ||||||
|  |   while actively blocked by the original Google search.  By activate | ||||||
|  |   ``use_mobile_ui`` this behavior is simulated by adding the parameter | ||||||
|  |   ``async=use_ac:true,_fmt:pc`` to the :py:func:`request`. | ||||||
| 
 | 
 | ||||||
| .. _Query Parameter Definitions: |  | ||||||
|    https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions |  | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| # pylint: disable=invalid-name, missing-function-docstring | # pylint: disable=invalid-name, missing-function-docstring | ||||||
| @ -137,8 +153,9 @@ spelling_suggestion_xpath = '//div[@class="med"]/p/a' | |||||||
| def get_lang_info(params, lang_list, custom_aliases, supported_any_language): | def get_lang_info(params, lang_list, custom_aliases, supported_any_language): | ||||||
|     """Composing various language properties for the google engines. |     """Composing various language properties for the google engines. | ||||||
| 
 | 
 | ||||||
|     This function is called by the various google engines (google itself, |     This function is called by the various google engines (:ref:`google web | ||||||
|     google-images, -news, -scholar, -videos). |     engine`, :ref:`google images engine`, :ref:`google news engine` and | ||||||
|  |     :ref:`google videos engine`). | ||||||
| 
 | 
 | ||||||
|     :param dict param: request parameters of the engine |     :param dict param: request parameters of the engine | ||||||
| 
 | 
 | ||||||
| @ -146,7 +163,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): | |||||||
|         :py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>` |         :py:obj:`ENGINES_LANGUAGES[engine-name] <searx.data.ENGINES_LANGUAGES>` | ||||||
| 
 | 
 | ||||||
|     :param dict lang_list: custom aliases for non standard language codes |     :param dict lang_list: custom aliases for non standard language codes | ||||||
|         (used when calling :py:func:`searx.utils.match_language) |         (used when calling :py:func:`searx.utils.match_language`) | ||||||
| 
 | 
 | ||||||
|     :param bool supported_any_language: When a language is not specified, the |     :param bool supported_any_language: When a language is not specified, the | ||||||
|         language interpretation is left up to Google to decide how the search |         language interpretation is left up to Google to decide how the search | ||||||
| @ -159,7 +176,7 @@ def get_lang_info(params, lang_list, custom_aliases, supported_any_language): | |||||||
|         Py-Dictionary with the key/value pairs: |         Py-Dictionary with the key/value pairs: | ||||||
| 
 | 
 | ||||||
|         language: |         language: | ||||||
|             Return value from :py:func:`searx.utils.match_language |             Return value from :py:func:`searx.utils.match_language` | ||||||
| 
 | 
 | ||||||
|         country: |         country: | ||||||
|             The country code (e.g. US, AT, CA, FR, DE ..) |             The country code (e.g. US, AT, CA, FR, DE ..) | ||||||
| @ -270,8 +287,7 @@ def request(query, params): | |||||||
|     additional_parameters = {} |     additional_parameters = {} | ||||||
|     if use_mobile_ui: |     if use_mobile_ui: | ||||||
|         additional_parameters = { |         additional_parameters = { | ||||||
|             'asearch': "arc", |             'async': 'use_ac:true,_fmt:pc', | ||||||
|             'async': 'arc_id:srp_510,ffilt:all,ve_name:MoreResultsContainer,next_id:srp_5,use_ac:true,_id:arc-srp_510,_pms:qs,_fmt:pc'  # pylint: disable=line-too-long |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|     # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium |     # https://www.google.de/search?q=corona&hl=de&lr=lang_de&start=0&tbs=qdr%3Ad&safe=medium | ||||||
| @ -312,9 +328,10 @@ def response(resp): | |||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
| 
 | 
 | ||||||
|     # results --> answer |     # results --> answer | ||||||
|     answer = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]//text()') |     answer_list = eval_xpath(dom, '//div[contains(@class, "LGOjhe")]') | ||||||
|     if answer: |     if answer_list: | ||||||
|         results.append({'answer': ' '.join(answer)}) |         answer_list = [_.xpath("normalize-space()") for _ in answer_list] | ||||||
|  |         results.append({'answer': ' '.join(answer_list)}) | ||||||
|     else: |     else: | ||||||
|         logger.debug("did not find 'answer'") |         logger.debug("did not find 'answer'") | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,19 +1,14 @@ | |||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
| # lint: pylint | # lint: pylint | ||||||
| """Google (Images) | """This is the implementation of the google images engine. | ||||||
| 
 | 
 | ||||||
| For detailed description of the *REST-full* API see: `Query Parameter | .. admonition:: Content-Security-Policy (CSP) | ||||||
| Definitions`_. |  | ||||||
| 
 |  | ||||||
| .. _admonition:: Content-Security-Policy (CSP) |  | ||||||
| 
 | 
 | ||||||
|    This engine needs to allow images from the `data URLs`_ (prefixed with the |    This engine needs to allow images from the `data URLs`_ (prefixed with the | ||||||
|    ``data:` scheme).:: |    ``data:`` scheme):: | ||||||
| 
 | 
 | ||||||
|        Header set Content-Security-Policy "img-src 'self' data: ;" |        Header set Content-Security-Policy "img-src 'self' data: ;" | ||||||
| 
 | 
 | ||||||
| .. _Query Parameter Definitions: |  | ||||||
|    https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions |  | ||||||
| .. _data URLs: | .. _data URLs: | ||||||
|    https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs |    https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs | ||||||
| """ | """ | ||||||
|  | |||||||
| @ -1,16 +1,11 @@ | |||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
| # lint: pylint | # lint: pylint | ||||||
| """Google (News) | """This is the implementation of the google news engine.  The google news API | ||||||
| 
 | ignores some parameters from the common :ref:`google API`: | ||||||
| For detailed description of the *REST-full* API see: `Query Parameter |  | ||||||
| Definitions`_.  Not all parameters can be appied: |  | ||||||
| 
 | 
 | ||||||
| - num_ : the number of search results is ignored | - num_ : the number of search results is ignored | ||||||
| - save_ : is ignored / Google-News results are always *SafeSearch* | - save_ : is ignored / Google-News results are always *SafeSearch* | ||||||
| 
 | 
 | ||||||
| .. _Query Parameter Definitions: |  | ||||||
|    https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions |  | ||||||
| 
 |  | ||||||
| .. _num: https://developers.google.com/custom-search/docs/xml_results#numsp | .. _num: https://developers.google.com/custom-search/docs/xml_results#numsp | ||||||
| .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp | .. _save: https://developers.google.com/custom-search/docs/xml_results#safesp | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,19 +1,14 @@ | |||||||
| # SPDX-License-Identifier: AGPL-3.0-or-later | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
| # lint: pylint | # lint: pylint | ||||||
| """Google (Video) | """This is the implementation of the google videos engine. | ||||||
| 
 | 
 | ||||||
| For detailed description of the *REST-full* API see: `Query Parameter | .. admonition:: Content-Security-Policy (CSP) | ||||||
| Definitions`_.  Not all parameters can be appied. |  | ||||||
| 
 |  | ||||||
| .. _admonition:: Content-Security-Policy (CSP) |  | ||||||
| 
 | 
 | ||||||
|    This engine needs to allow images from the `data URLs`_ (prefixed with the |    This engine needs to allow images from the `data URLs`_ (prefixed with the | ||||||
|    ``data:` scheme).:: |    ``data:`` scheme):: | ||||||
| 
 | 
 | ||||||
|      Header set Content-Security-Policy "img-src 'self' data: ;" |      Header set Content-Security-Policy "img-src 'self' data: ;" | ||||||
| 
 | 
 | ||||||
| .. _Query Parameter Definitions: |  | ||||||
|    https://developers.google.com/custom-search/docs/xml_results#WebSearch_Query_Parameter_Definitions |  | ||||||
| .. _data URLs: | .. _data URLs: | ||||||
|    https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs |    https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URIs | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -583,6 +583,7 @@ engines: | |||||||
|   - name: google |   - name: google | ||||||
|     engine: google |     engine: google | ||||||
|     shortcut: go |     shortcut: go | ||||||
|  |     # see https://searxng.github.io/searxng/src/searx.engines.google.html#module-searx.engines.google | ||||||
|     use_mobile_ui: true |     use_mobile_ui: true | ||||||
|     # additional_tests: |     # additional_tests: | ||||||
|     #   android: *test_android |     #   android: *test_android | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user