64 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			64 lines
		
	
	
		
			1.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|  | ## Gigablast (Web) | ||
|  | # | ||
|  | # @website     http://gigablast.com | ||
|  | # @provide-api yes (http://gigablast.com/api.html) | ||
|  | # | ||
|  | # @using-api   yes | ||
|  | # @results     XML | ||
|  | # @stable      yes | ||
|  | # @parse       url, title, content | ||
|  | 
 | ||
|  | from urllib import urlencode | ||
|  | from cgi import escape | ||
|  | from lxml import etree | ||
|  | 
 | ||
|  | # engine dependent config | ||
|  | categories = ['general'] | ||
|  | paging = True | ||
|  | number_of_results = 5 | ||
|  | 
 | ||
|  | # search-url | ||
|  | base_url = 'http://gigablast.com/' | ||
|  | search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' | ||
|  | 
 | ||
|  | # specific xpath variables | ||
|  | results_xpath = '//response//result' | ||
|  | url_xpath = './/url' | ||
|  | title_xpath = './/title' | ||
|  | content_xpath = './/sum' | ||
|  | 
 | ||
|  | 
 | ||
|  | # do search-request | ||
|  | def request(query, params): | ||
|  |     offset = (params['pageno'] - 1) * number_of_results | ||
|  | 
 | ||
|  |     search_path = search_string.format( | ||
|  |         query=urlencode({'q': query}), | ||
|  |         offset=offset, | ||
|  |         number_of_results=number_of_results) | ||
|  | 
 | ||
|  |     params['url'] = base_url + search_path | ||
|  | 
 | ||
|  |     return params | ||
|  | 
 | ||
|  | 
 | ||
|  | # get response from search-request | ||
|  | def response(resp): | ||
|  |     results = [] | ||
|  | 
 | ||
|  |     dom = etree.fromstring(resp.content) | ||
|  | 
 | ||
|  |     # parse results | ||
|  |     for result in dom.xpath(results_xpath): | ||
|  |         url = result.xpath(url_xpath)[0].text | ||
|  |         title = result.xpath(title_xpath)[0].text | ||
|  |         content = escape(result.xpath(content_xpath)[0].text) | ||
|  | 
 | ||
|  |         # append result | ||
|  |         results.append({'url': url, | ||
|  |                         'title': title, | ||
|  |                         'content': content}) | ||
|  | 
 | ||
|  |     # return results | ||
|  |     return results |