[mod] stackoverflow & yandex: detect CAPTCHA response
This commit is contained in:
		
							parent
							
								
									7905d41487
								
							
						
					
					
						commit
						fa909c7c02
					
				| @ -10,9 +10,10 @@ | |||||||
|  @parse       url, title, content |  @parse       url, title, content | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from urllib.parse import urlencode, urljoin | from urllib.parse import urlencode, urljoin, urlparse | ||||||
| from lxml import html | from lxml import html | ||||||
| from searx.utils import extract_text | from searx.utils import extract_text | ||||||
|  | from searx.exceptions import SearxEngineCaptchaException | ||||||
| 
 | 
 | ||||||
| # engine dependent config | # engine dependent config | ||||||
| categories = ['it'] | categories = ['it'] | ||||||
| @ -37,6 +38,10 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
| # get response from search-request | # get response from search-request | ||||||
| def response(resp): | def response(resp): | ||||||
|  |     resp_url = urlparse(resp.url) | ||||||
|  |     if resp_url.path.startswith('/nocaptcha'): | ||||||
|  |         raise SearxEngineCaptchaException() | ||||||
|  | 
 | ||||||
|     results = [] |     results = [] | ||||||
| 
 | 
 | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
|  | |||||||
| @ -9,9 +9,10 @@ | |||||||
|  @parse       url, title, content |  @parse       url, title, content | ||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from urllib.parse import urlencode | from urllib.parse import urlencode, urlparse | ||||||
| from lxml import html | from lxml import html | ||||||
| from searx import logger | from searx import logger | ||||||
|  | from searx.exceptions import SearxEngineCaptchaException | ||||||
| 
 | 
 | ||||||
| logger = logger.getChild('yandex engine') | logger = logger.getChild('yandex engine') | ||||||
| 
 | 
 | ||||||
| @ -47,6 +48,10 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
| # get response from search-request | # get response from search-request | ||||||
| def response(resp): | def response(resp): | ||||||
|  |     resp_url = urlparse(resp.url) | ||||||
|  |     if resp_url.path.startswith('/showcaptcha'): | ||||||
|  |         raise SearxEngineCaptchaException() | ||||||
|  | 
 | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
|     results = [] |     results = [] | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user