[fix] prevent google engine to redirect
nid/pref cookies are also removed
This commit is contained in:
		
							parent
							
								
									029291eca1
								
							
						
					
					
						commit
						5cea4f9445
					
				| @ -13,7 +13,6 @@ from cgi import escape | |||||||
| from urllib import urlencode | from urllib import urlencode | ||||||
| from urlparse import urlparse, parse_qsl | from urlparse import urlparse, parse_qsl | ||||||
| from lxml import html, etree | from lxml import html, etree | ||||||
| from searx.poolrequests import get |  | ||||||
| from searx.engines.xpath import extract_text, extract_url | from searx.engines.xpath import extract_text, extract_url | ||||||
| from searx.search import logger | from searx.search import logger | ||||||
| 
 | 
 | ||||||
| @ -91,7 +90,7 @@ url_map = 'https://www.openstreetmap.org/'\ | |||||||
| search_path = '/search' | search_path = '/search' | ||||||
| search_url = ('https://{hostname}' + | search_url = ('https://{hostname}' + | ||||||
|               search_path + |               search_path + | ||||||
|               '?{query}&start={offset}&gbv=1') |               '?{query}&start={offset}&gbv=1&gws_rd=cr') | ||||||
| 
 | 
 | ||||||
| # other URLs | # other URLs | ||||||
| map_hostname_start = 'maps.google.' | map_hostname_start = 'maps.google.' | ||||||
| @ -129,27 +128,6 @@ image_img_src_xpath = './img/@src' | |||||||
| property_address = "Address" | property_address = "Address" | ||||||
| property_phone = "Phone number" | property_phone = "Phone number" | ||||||
| 
 | 
 | ||||||
| # cookies |  | ||||||
| pref_cookie = '' |  | ||||||
| nid_cookie = {} |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| # see https://support.google.com/websearch/answer/873?hl=en |  | ||||||
| def get_google_pref_cookie(): |  | ||||||
|     global pref_cookie |  | ||||||
|     if pref_cookie == '': |  | ||||||
|         resp = get('https://www.google.com/ncr', allow_redirects=False) |  | ||||||
|         pref_cookie = resp.cookies["PREF"] |  | ||||||
|     return pref_cookie |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| def get_google_nid_cookie(google_hostname): |  | ||||||
|     global nid_cookie |  | ||||||
|     if google_hostname not in nid_cookie: |  | ||||||
|         resp = get('https://' + google_hostname) |  | ||||||
|         nid_cookie[google_hostname] = resp.cookies.get("NID", None) |  | ||||||
|     return nid_cookie[google_hostname] |  | ||||||
| 
 |  | ||||||
| 
 | 
 | ||||||
| # remove google-specific tracking-url | # remove google-specific tracking-url | ||||||
| def parse_url(url_string, google_hostname): | def parse_url(url_string, google_hostname): | ||||||
| @ -201,12 +179,6 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
|     params['headers']['Accept-Language'] = language |     params['headers']['Accept-Language'] = language | ||||||
|     params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' |     params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' | ||||||
|     if google_hostname == default_hostname: |  | ||||||
|         try: |  | ||||||
|             params['cookies']['PREF'] = get_google_pref_cookie() |  | ||||||
|         except: |  | ||||||
|             logger.warning('cannot fetch PREF cookie') |  | ||||||
|     params['cookies']['NID'] = get_google_nid_cookie(google_hostname) |  | ||||||
| 
 | 
 | ||||||
|     params['google_hostname'] = google_hostname |     params['google_hostname'] = google_hostname | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -9,7 +9,7 @@ from searx.testing import SearxTestCase | |||||||
| class TestGoogleEngine(SearxTestCase): | class TestGoogleEngine(SearxTestCase): | ||||||
| 
 | 
 | ||||||
|     def mock_response(self, text): |     def mock_response(self, text): | ||||||
|         response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1') |         response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr') | ||||||
|         response.search_params = mock.Mock() |         response.search_params = mock.Mock() | ||||||
|         response.search_params.get = mock.Mock(return_value='www.google.com') |         response.search_params.get = mock.Mock(return_value='www.google.com') | ||||||
|         return response |         return response | ||||||
| @ -23,16 +23,12 @@ class TestGoogleEngine(SearxTestCase): | |||||||
|         self.assertIn('url', params) |         self.assertIn('url', params) | ||||||
|         self.assertIn(query, params['url']) |         self.assertIn(query, params['url']) | ||||||
|         self.assertIn('google.fr', params['url']) |         self.assertIn('google.fr', params['url']) | ||||||
|         self.assertNotIn('PREF', params['cookies']) |  | ||||||
|         self.assertIn('NID', params['cookies']) |  | ||||||
|         self.assertIn('fr', params['headers']['Accept-Language']) |         self.assertIn('fr', params['headers']['Accept-Language']) | ||||||
| 
 | 
 | ||||||
|         dicto['language'] = 'all' |         dicto['language'] = 'all' | ||||||
|         params = google.request(query, dicto) |         params = google.request(query, dicto) | ||||||
|         self.assertIn('google.com', params['url']) |         self.assertIn('google.com', params['url']) | ||||||
|         self.assertIn('en', params['headers']['Accept-Language']) |         self.assertIn('en', params['headers']['Accept-Language']) | ||||||
|         # self.assertIn('PREF', params['cookies']) |  | ||||||
|         self.assertIn('NID', params['cookies']) |  | ||||||
| 
 | 
 | ||||||
|     def test_response(self): |     def test_response(self): | ||||||
|         self.assertRaises(AttributeError, google.response, None) |         self.assertRaises(AttributeError, google.response, None) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user