Merge pull request #1563 from Nachtalb/ne/fix-google-image-search
Fix google image search engine
This commit is contained in:
		
						commit
						629b36d442
					
				| @ -11,7 +11,6 @@ | |||||||
| """ | """ | ||||||
| 
 | 
 | ||||||
| from datetime import date, timedelta | from datetime import date, timedelta | ||||||
| from json import loads |  | ||||||
| from lxml import html | from lxml import html | ||||||
| from searx.url_utils import urlencode, urlparse, parse_qs | from searx.url_utils import urlencode, urlparse, parse_qs | ||||||
| 
 | 
 | ||||||
| @ -39,7 +38,6 @@ time_range_dict = {'day': 'd', | |||||||
| # do search-request | # do search-request | ||||||
| def request(query, params): | def request(query, params): | ||||||
|     search_options = { |     search_options = { | ||||||
|         'ijn': params['pageno'] - 1, |  | ||||||
|         'start': (params['pageno'] - 1) * number_of_results |         'start': (params['pageno'] - 1) * number_of_results | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| @ -53,7 +51,7 @@ def request(query, params): | |||||||
|         search_options['tbs'] = time_range_custom_attr.format(start=start, end=end) |         search_options['tbs'] = time_range_custom_attr.format(start=start, end=end) | ||||||
| 
 | 
 | ||||||
|     if safesearch and params['safesearch']: |     if safesearch and params['safesearch']: | ||||||
|         search_options['safe'] = 'on' |         search_options['safe'] = 'active' | ||||||
| 
 | 
 | ||||||
|     params['url'] = search_url.format(query=urlencode({'q': query}), |     params['url'] = search_url.format(query=urlencode({'q': query}), | ||||||
|                                       search_options=urlencode(search_options)) |                                       search_options=urlencode(search_options)) | ||||||
| @ -63,24 +61,30 @@ def request(query, params): | |||||||
| 
 | 
 | ||||||
| # get response from search-request | # get response from search-request | ||||||
| def response(resp): | def response(resp): | ||||||
|     results = [] |  | ||||||
| 
 |  | ||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
| 
 | 
 | ||||||
|     # parse results |     results = [] | ||||||
|     for img in dom.xpath('//a'): |     for element in dom.xpath('//div[@id="search"] //td'): | ||||||
|         r = { |         link = element.xpath('./a')[0] | ||||||
|             'title': u' '.join(img.xpath('.//div[class="rg_ilmbg"]//text()')), | 
 | ||||||
|  |         google_url = urlparse(link.xpath('.//@href')[0]) | ||||||
|  |         query = parse_qs(google_url.query) | ||||||
|  |         source_url = next(iter(query.get('q', [])), None) | ||||||
|  | 
 | ||||||
|  |         title_parts = element.xpath('./cite//following-sibling::*/text()') | ||||||
|  |         title_parts.extend(element.xpath('./cite//following-sibling::text()')[:-1]) | ||||||
|  | 
 | ||||||
|  |         result = { | ||||||
|  |             'title': ''.join(title_parts), | ||||||
|             'content': '', |             'content': '', | ||||||
|             'template': 'images.html', |             'template': 'images.html', | ||||||
|  |             'url': source_url, | ||||||
|  |             'img_src': source_url, | ||||||
|  |             'thumbnail_src': next(iter(link.xpath('.//img //@src')), None) | ||||||
|         } |         } | ||||||
|         url = urlparse(img.xpath('.//@href')[0]) |  | ||||||
|         query = parse_qs(url.query) |  | ||||||
|         r['url'] = query['imgrefurl'][0] |  | ||||||
|         r['img_src'] = query['imgurl'][0] |  | ||||||
|         r['thumbnail_src'] = r['img_src'] |  | ||||||
|         # append result |  | ||||||
|         results.append(r) |  | ||||||
| 
 | 
 | ||||||
|     # return results |         if not source_url or not result['thumbnail_src']: | ||||||
|  |             continue | ||||||
|  | 
 | ||||||
|  |         results.append(result) | ||||||
|     return results |     return results | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user