[enh] add gigablast engine
This commit is contained in:
		
							parent
							
								
									7c075aa731
								
							
						
					
					
						commit
						04f7118d0a
					
				
							
								
								
									
										63
									
								
								searx/engines/gigablast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								searx/engines/gigablast.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,63 @@ | |||||||
|  | ## Gigablast (Web) | ||||||
|  | # | ||||||
|  | # @website     http://gigablast.com | ||||||
|  | # @provide-api yes (http://gigablast.com/api.html) | ||||||
|  | # | ||||||
|  | # @using-api   yes | ||||||
|  | # @results     XML | ||||||
|  | # @stable      yes | ||||||
|  | # @parse       url, title, content | ||||||
|  | 
 | ||||||
|  | from urllib import urlencode | ||||||
|  | from cgi import escape | ||||||
|  | from lxml import etree | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['general'] | ||||||
|  | paging = True | ||||||
|  | number_of_results = 5 | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | base_url = 'http://gigablast.com/' | ||||||
|  | search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' | ||||||
|  | 
 | ||||||
|  | # specific xpath variables | ||||||
|  | results_xpath = '//response//result' | ||||||
|  | url_xpath = './/url' | ||||||
|  | title_xpath = './/title' | ||||||
|  | content_xpath = './/sum' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
|  | def request(query, params): | ||||||
|  |     offset = (params['pageno'] - 1) * number_of_results | ||||||
|  | 
 | ||||||
|  |     search_path = search_string.format( | ||||||
|  |         query=urlencode({'q': query}), | ||||||
|  |         offset=offset, | ||||||
|  |         number_of_results=number_of_results) | ||||||
|  | 
 | ||||||
|  |     params['url'] = base_url + search_path | ||||||
|  | 
 | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get response from search-request | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     dom = etree.fromstring(resp.content) | ||||||
|  | 
 | ||||||
|  |     # parse results | ||||||
|  |     for result in dom.xpath(results_xpath): | ||||||
|  |         url = result.xpath(url_xpath)[0].text | ||||||
|  |         title = result.xpath(title_xpath)[0].text | ||||||
|  |         content = escape(result.xpath(content_xpath)[0].text) | ||||||
|  | 
 | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': url, | ||||||
|  |                         'title': title, | ||||||
|  |                         'content': content}) | ||||||
|  | 
 | ||||||
|  |     # return results | ||||||
|  |     return results | ||||||
| @ -103,6 +103,10 @@ engines: | |||||||
|     shortcut : gf |     shortcut : gf | ||||||
|     disabled : True |     disabled : True | ||||||
| 
 | 
 | ||||||
|  |   - name : gigablast | ||||||
|  |     engine : gigablast | ||||||
|  |     shortcut : gb | ||||||
|  | 
 | ||||||
|   - name : github |   - name : github | ||||||
|     engine : github |     engine : github | ||||||
|     shortcut : gh |     shortcut : gh | ||||||
|  | |||||||
							
								
								
									
										57
									
								
								searx/tests/engines/test_gigablast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								searx/tests/engines/test_gigablast.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,57 @@ | |||||||
|  | from collections import defaultdict | ||||||
|  | import mock | ||||||
|  | from searx.engines import gigablast | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestGigablastEngine(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_request(self): | ||||||
|  |         query = 'test_query' | ||||||
|  |         dicto = defaultdict(dict) | ||||||
|  |         dicto['pageno'] = 0 | ||||||
|  |         params = gigablast.request(query, dicto) | ||||||
|  |         self.assertTrue('url' in params) | ||||||
|  |         self.assertTrue(query in params['url']) | ||||||
|  |         self.assertTrue('gigablast.com' in params['url']) | ||||||
|  | 
 | ||||||
|  |     def test_response(self): | ||||||
|  |         self.assertRaises(AttributeError, gigablast.response, None) | ||||||
|  |         self.assertRaises(AttributeError, gigablast.response, []) | ||||||
|  |         self.assertRaises(AttributeError, gigablast.response, '') | ||||||
|  |         self.assertRaises(AttributeError, gigablast.response, '[]') | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(content='<response></response>') | ||||||
|  |         self.assertEqual(gigablast.response(response), []) | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(content='<response></response>') | ||||||
|  |         self.assertEqual(gigablast.response(response), []) | ||||||
|  | 
 | ||||||
|  |         xml = """<?xml version="1.0" encoding="UTF-8" ?> | ||||||
|  |         <response> | ||||||
|  |             <hits>5941888</hits> | ||||||
|  |             <moreResultsFollow>1</moreResultsFollow> | ||||||
|  |             <result> | ||||||
|  |                 <title><![CDATA[This should be the title]]></title> | ||||||
|  |                 <sum><![CDATA[This should be the content.]]></sum> | ||||||
|  |                 <url><![CDATA[http://this.should.be.the.link/]]></url> | ||||||
|  |                 <size>90.5</size> | ||||||
|  |                 <docId>145414002633</docId> | ||||||
|  |                 <siteId>2660021087</siteId> | ||||||
|  |                 <domainId>2660021087</domainId> | ||||||
|  |                 <spidered>1320519373</spidered> | ||||||
|  |                 <indexed>1320519373</indexed> | ||||||
|  |                 <pubdate>4294967295</pubdate> | ||||||
|  |                 <isModDate>0</isModDate> | ||||||
|  |                 <language><![CDATA[English]]></language> | ||||||
|  |                 <charset><![CDATA[UTF-8]]></charset> | ||||||
|  |             </result> | ||||||
|  |         </response> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(content=xml) | ||||||
|  |         results = gigablast.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  |         self.assertEqual(results[0]['title'], 'This should be the title') | ||||||
|  |         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') | ||||||
|  |         self.assertEqual(results[0]['content'], 'This should be the content.') | ||||||
| @ -9,6 +9,7 @@ from searx.tests.engines.test_digg import *  # noqa | |||||||
| from searx.tests.engines.test_dummy import *  # noqa | from searx.tests.engines.test_dummy import *  # noqa | ||||||
| from searx.tests.engines.test_flickr import *  # noqa | from searx.tests.engines.test_flickr import *  # noqa | ||||||
| from searx.tests.engines.test_flickr_noapi import *  # noqa | from searx.tests.engines.test_flickr_noapi import *  # noqa | ||||||
|  | from searx.tests.engines.test_gigablast import *  # noqa | ||||||
| from searx.tests.engines.test_github import *  # noqa | from searx.tests.engines.test_github import *  # noqa | ||||||
| from searx.tests.engines.test_www1x import *  # noqa | from searx.tests.engines.test_www1x import *  # noqa | ||||||
| from searx.tests.engines.test_google_images import *  # noqa | from searx.tests.engines.test_google_images import *  # noqa | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user