[mod] improve engine startpage to reduce the frequency of CAPTCHA
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
This commit is contained in:
		
							parent
							
								
									9100a48541
								
							
						
					
					
						commit
						79c499d145
					
				| @ -83,6 +83,7 @@ Startpage's category (for Web-search, News, Videos, ..) is set by | |||||||
| from typing import TYPE_CHECKING | from typing import TYPE_CHECKING | ||||||
| from collections import OrderedDict | from collections import OrderedDict | ||||||
| import re | import re | ||||||
|  | from urllib.parse import urlencode | ||||||
| from unicodedata import normalize, combining | from unicodedata import normalize, combining | ||||||
| from time import time | from time import time | ||||||
| from datetime import datetime, timedelta | from datetime import datetime, timedelta | ||||||
| @ -161,7 +162,7 @@ search_form_xpath = '//form[@id="search"]' | |||||||
| # timestamp of the last fetch of 'sc' code | # timestamp of the last fetch of 'sc' code | ||||||
| sc_code_ts = 0 | sc_code_ts = 0 | ||||||
| sc_code = '' | sc_code = '' | ||||||
| sc_code_cache_sec = 30 | sc_code_cache_sec = 3600 | ||||||
| """Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`.""" | """Time in seconds the sc-code is cached in memory :py:obj:`get_sc_code`.""" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -275,42 +276,46 @@ def _request_cat_web(query, params): | |||||||
|         args['language'] = engine_language |         args['language'] = engine_language | ||||||
|         args['lui'] = engine_language |         args['lui'] = engine_language | ||||||
| 
 | 
 | ||||||
|     args['abp'] = '1' |     # args['abp'] = '1' | ||||||
|     if params['pageno'] > 1: |     if params['pageno'] > 1: | ||||||
|         args['page'] = params['pageno'] |         args['page'] = params['pageno'] | ||||||
| 
 | 
 | ||||||
|     # build cookie |     # build cookie | ||||||
|     lang_homepage = 'en' |     lang_homepage = 'en' | ||||||
|     cookie = OrderedDict() |     cookie = OrderedDict() | ||||||
|  |     cookie['connect_to_server'] = 'us' | ||||||
|     cookie['date_time'] = 'world' |     cookie['date_time'] = 'world' | ||||||
|     cookie['disable_family_filter'] = safesearch_dict[params['safesearch']] |     cookie['disable_family_filter'] = safesearch_dict[params['safesearch']] | ||||||
|     cookie['disable_open_in_new_window'] = '0' |     cookie['disable_open_in_new_window'] = '0' | ||||||
|     cookie['enable_post_method'] = '1'  # hint: POST |     cookie['enable_post_method'] = '0'  # hint: GET | ||||||
|     cookie['enable_proxy_safety_suggest'] = '1' |     cookie['enable_proxy_safety_suggest'] = '1' | ||||||
|     cookie['enable_stay_control'] = '1' |     cookie['enable_stay_control'] = '1' | ||||||
|     cookie['instant_answers'] = '1' |     cookie['instant_answers'] = '1' | ||||||
|     cookie['lang_homepage'] = 's/device/%s/' % lang_homepage |     cookie['lang_homepage'] = 's/device/%s' % lang_homepage | ||||||
|     cookie['num_of_results'] = '10' |  | ||||||
|     cookie['suggestions'] = '1' |  | ||||||
|     cookie['wt_unit'] = 'celsius' |  | ||||||
| 
 |  | ||||||
|     if engine_language: |     if engine_language: | ||||||
|         cookie['language'] = engine_language |         cookie['language'] = engine_language | ||||||
|         cookie['language_ui'] = engine_language |         cookie['language_ui'] = engine_language | ||||||
| 
 |     cookie['num_of_results'] = '10' | ||||||
|     if engine_region: |     if engine_region: | ||||||
|         cookie['search_results_region'] = engine_region |         cookie['search_results_region'] = engine_region | ||||||
|  |     cookie['suggestions'] = '1' | ||||||
|  |     cookie['wt_unit'] = 'celsius' | ||||||
| 
 | 
 | ||||||
|     params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()]) |     params['cookies']['preferences'] = 'N1N'.join(["%sEEE%s" % x for x in cookie.items()]) | ||||||
|     logger.debug('cookie preferences: %s', params['cookies']['preferences']) |     logger.debug('cookie preferences: %s', params['cookies']['preferences']) | ||||||
| 
 | 
 | ||||||
|  |     # GET request | ||||||
|  |     params['method'] = 'GET' | ||||||
|  |     # https://www.startpage.com/do/search?sc=CmEL6wNu8t5j20&query=foo&cat=web&qloc=eyJsYXQiOiBudWxsLCAibG5nIjogbnVsbCwgInR5cGUiOiAibm9uZSJ9 | ||||||
|  |     params['url'] = search_url + '?' + urlencode(args) | ||||||
|  | 
 | ||||||
|     # POST request |     # POST request | ||||||
|     logger.debug("data: %s", args) |     # logger.debug("data: %s", args) | ||||||
|     params['data'] = args |     # params['data'] = args | ||||||
|     params['method'] = 'POST' |     # params['method'] = 'GET' | ||||||
|     params['url'] = search_url |     # params['url'] = search_url | ||||||
|     params['headers']['Origin'] = base_url |     # params['headers']['Origin'] = base_url | ||||||
|     params['headers']['Referer'] = base_url + '/' |     # params['headers']['Referer'] = base_url + '/' | ||||||
|     # is the Accept header needed? |     # is the Accept header needed? | ||||||
|     # params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' |     # params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8' | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user