| 
									
										
										
										
											2015-05-02 15:45:17 +02:00
										 |  |  | """
 | 
					
						
							|  |  |  |  Gigablast (Web) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @website     http://gigablast.com | 
					
						
							|  |  |  |  @provide-api yes (http://gigablast.com/api.html) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @using-api   yes | 
					
						
							|  |  |  |  @results     XML | 
					
						
							|  |  |  |  @stable      yes | 
					
						
							|  |  |  |  @parse       url, title, content | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from urllib import urlencode | 
					
						
							|  |  |  | from cgi import escape | 
					
						
							|  |  |  | from lxml import etree | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # engine dependent config | 
					
						
							|  |  |  | categories = ['general'] | 
					
						
							|  |  |  | paging = True | 
					
						
							|  |  |  | number_of_results = 5 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-02 11:43:12 +02:00
										 |  |  | # search-url, invalid HTTPS certificate | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | base_url = 'http://gigablast.com/' | 
					
						
							|  |  |  | search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # specific xpath variables | 
					
						
							|  |  |  | results_xpath = '//response//result' | 
					
						
							|  |  |  | url_xpath = './/url' | 
					
						
							|  |  |  | title_xpath = './/title' | 
					
						
							|  |  |  | content_xpath = './/sum' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # do search-request | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     offset = (params['pageno'] - 1) * number_of_results | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     search_path = search_string.format( | 
					
						
							|  |  |  |         query=urlencode({'q': query}), | 
					
						
							|  |  |  |         offset=offset, | 
					
						
							|  |  |  |         number_of_results=number_of_results) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     params['url'] = base_url + search_path | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get response from search-request | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dom = etree.fromstring(resp.content) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # parse results | 
					
						
							|  |  |  |     for result in dom.xpath(results_xpath): | 
					
						
							|  |  |  |         url = result.xpath(url_xpath)[0].text | 
					
						
							|  |  |  |         title = result.xpath(title_xpath)[0].text | 
					
						
							|  |  |  |         content = escape(result.xpath(content_xpath)[0].text) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # append result | 
					
						
							|  |  |  |         results.append({'url': url, | 
					
						
							|  |  |  |                         'title': title, | 
					
						
							|  |  |  |                         'content': content}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # return results | 
					
						
							|  |  |  |     return results |