Merge pull request #219 from pointhi/new_engines
New engines: gigablast and blekko_images
This commit is contained in:
		
						commit
						5f801d7ea0
					
				
							
								
								
									
										56
									
								
								searx/engines/blekko_images.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								searx/engines/blekko_images.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,56 @@ | |||||||
|  | ## Blekko (Images) | ||||||
|  | # | ||||||
|  | # @website     https://blekko.com | ||||||
|  | # @provide-api yes (inofficial) | ||||||
|  | # | ||||||
|  | # @using-api   yes | ||||||
|  | # @results     JSON | ||||||
|  | # @stable      yes | ||||||
|  | # @parse       url, title, img_src | ||||||
|  | 
 | ||||||
|  | from json import loads | ||||||
|  | from urllib import urlencode | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['images'] | ||||||
|  | paging = True | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | base_url = 'https://blekko.com' | ||||||
|  | search_url = '/api/images?{query}&c={c}' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
|  | def request(query, params): | ||||||
|  |     c = (params['pageno'] - 1) * 48 | ||||||
|  | 
 | ||||||
|  |     params['url'] = base_url +\ | ||||||
|  |         search_url.format(query=urlencode({'q': query}), | ||||||
|  |                           c=c) | ||||||
|  | 
 | ||||||
|  |     if params['pageno'] != 1: | ||||||
|  |         params['url'] += '&page={pageno}'.format(pageno=(params['pageno']-1)) | ||||||
|  | 
 | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get response from search-request | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     search_results = loads(resp.text) | ||||||
|  | 
 | ||||||
|  |     # return empty array if there are no results | ||||||
|  |     if not search_results: | ||||||
|  |         return [] | ||||||
|  | 
 | ||||||
|  |     for result in search_results: | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': result['page_url'], | ||||||
|  |                         'title': result['title'], | ||||||
|  |                         'content': '', | ||||||
|  |                         'img_src': result['url'], | ||||||
|  |                         'template': 'images.html'}) | ||||||
|  | 
 | ||||||
|  |     # return results | ||||||
|  |     return results | ||||||
							
								
								
									
										63
									
								
								searx/engines/gigablast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								searx/engines/gigablast.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,63 @@ | |||||||
|  | ## Gigablast (Web) | ||||||
|  | # | ||||||
|  | # @website     http://gigablast.com | ||||||
|  | # @provide-api yes (http://gigablast.com/api.html) | ||||||
|  | # | ||||||
|  | # @using-api   yes | ||||||
|  | # @results     XML | ||||||
|  | # @stable      yes | ||||||
|  | # @parse       url, title, content | ||||||
|  | 
 | ||||||
|  | from urllib import urlencode | ||||||
|  | from cgi import escape | ||||||
|  | from lxml import etree | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['general'] | ||||||
|  | paging = True | ||||||
|  | number_of_results = 5 | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | base_url = 'http://gigablast.com/' | ||||||
|  | search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' | ||||||
|  | 
 | ||||||
|  | # specific xpath variables | ||||||
|  | results_xpath = '//response//result' | ||||||
|  | url_xpath = './/url' | ||||||
|  | title_xpath = './/title' | ||||||
|  | content_xpath = './/sum' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
|  | def request(query, params): | ||||||
|  |     offset = (params['pageno'] - 1) * number_of_results | ||||||
|  | 
 | ||||||
|  |     search_path = search_string.format( | ||||||
|  |         query=urlencode({'q': query}), | ||||||
|  |         offset=offset, | ||||||
|  |         number_of_results=number_of_results) | ||||||
|  | 
 | ||||||
|  |     params['url'] = base_url + search_path | ||||||
|  | 
 | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get response from search-request | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     dom = etree.fromstring(resp.content) | ||||||
|  | 
 | ||||||
|  |     # parse results | ||||||
|  |     for result in dom.xpath(results_xpath): | ||||||
|  |         url = result.xpath(url_xpath)[0].text | ||||||
|  |         title = result.xpath(title_xpath)[0].text | ||||||
|  |         content = escape(result.xpath(content_xpath)[0].text) | ||||||
|  | 
 | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': url, | ||||||
|  |                         'title': title, | ||||||
|  |                         'content': content}) | ||||||
|  | 
 | ||||||
|  |     # return results | ||||||
|  |     return results | ||||||
| @ -33,6 +33,11 @@ engines: | |||||||
|     locale : en-US |     locale : en-US | ||||||
|     shortcut : bin |     shortcut : bin | ||||||
| 
 | 
 | ||||||
|  |   - name : blekko images | ||||||
|  |     engine : blekko_images | ||||||
|  |     locale : en-US | ||||||
|  |     shortcut : bli | ||||||
|  | 
 | ||||||
|   - name : btdigg |   - name : btdigg | ||||||
|     engine : btdigg |     engine : btdigg | ||||||
|     shortcut : bt |     shortcut : bt | ||||||
| @ -103,6 +108,10 @@ engines: | |||||||
|     shortcut : gf |     shortcut : gf | ||||||
|     disabled : True |     disabled : True | ||||||
| 
 | 
 | ||||||
|  |   - name : gigablast | ||||||
|  |     engine : gigablast | ||||||
|  |     shortcut : gb | ||||||
|  | 
 | ||||||
|   - name : github |   - name : github | ||||||
|     engine : github |     engine : github | ||||||
|     shortcut : gh |     shortcut : gh | ||||||
|  | |||||||
							
								
								
									
										65
									
								
								searx/tests/engines/test_blekko_images.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								searx/tests/engines/test_blekko_images.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,65 @@ | |||||||
|  | from collections import defaultdict | ||||||
|  | import mock | ||||||
|  | from searx.engines import blekko_images | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestBlekkoImagesEngine(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_request(self): | ||||||
|  |         query = 'test_query' | ||||||
|  |         dicto = defaultdict(dict) | ||||||
|  |         dicto['pageno'] = 0 | ||||||
|  |         params = blekko_images.request(query, dicto) | ||||||
|  |         self.assertTrue('url' in params) | ||||||
|  |         self.assertTrue(query in params['url']) | ||||||
|  |         self.assertTrue('blekko.com' in params['url']) | ||||||
|  | 
 | ||||||
|  |     def test_response(self): | ||||||
|  |         self.assertRaises(AttributeError, blekko_images.response, None) | ||||||
|  |         self.assertRaises(AttributeError, blekko_images.response, []) | ||||||
|  |         self.assertRaises(AttributeError, blekko_images.response, '') | ||||||
|  |         self.assertRaises(AttributeError, blekko_images.response, '[]') | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(text='[]') | ||||||
|  |         self.assertEqual(blekko_images.response(response), []) | ||||||
|  | 
 | ||||||
|  |         json = """ | ||||||
|  |         [ | ||||||
|  |             { | ||||||
|  |                 "c": 1, | ||||||
|  |                 "page_url": "http://result_url.html", | ||||||
|  |                 "title": "Photo title", | ||||||
|  |                 "tn_url": "http://ts1.mm.bing.net/th?id=HN.608050619474382748&pid=15.1", | ||||||
|  |                 "url": "http://result_image.jpg" | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "c": 2, | ||||||
|  |                 "page_url": "http://companyorange.simpsite.nl/OSM", | ||||||
|  |                 "title": "OSM", | ||||||
|  |                 "tn_url": "http://ts2.mm.bing.net/th?id=HN.608048068264919461&pid=15.1", | ||||||
|  |                 "url": "http://simpsite.nl/userdata2/58985/Home/OSM.bmp" | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "c": 3, | ||||||
|  |                 "page_url": "http://invincible.webklik.nl/page/osm", | ||||||
|  |                 "title": "OSM", | ||||||
|  |                 "tn_url": "http://ts1.mm.bing.net/th?id=HN.608024514657649476&pid=15.1", | ||||||
|  |                 "url": "http://www.webklik.nl/user_files/2009_09/65324/osm.gif" | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "c": 4, | ||||||
|  |                 "page_url": "http://www.offshorenorway.no/event/companyDetail/id/12492", | ||||||
|  |                 "title": "Go to OSM Offshore AS homepage", | ||||||
|  |                 "tn_url": "http://ts2.mm.bing.net/th?id=HN.608054265899847285&pid=15.1", | ||||||
|  |                 "url": "http://www.offshorenorway.no/firmalogo/OSM-logo.png" | ||||||
|  |             } | ||||||
|  |         ] | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=json) | ||||||
|  |         results = blekko_images.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 4) | ||||||
|  |         self.assertEqual(results[0]['title'], 'Photo title') | ||||||
|  |         self.assertEqual(results[0]['url'], 'http://result_url.html') | ||||||
|  |         self.assertEqual(results[0]['img_src'], 'http://result_image.jpg') | ||||||
							
								
								
									
										57
									
								
								searx/tests/engines/test_gigablast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								searx/tests/engines/test_gigablast.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,57 @@ | |||||||
|  | from collections import defaultdict | ||||||
|  | import mock | ||||||
|  | from searx.engines import gigablast | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestGigablastEngine(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_request(self): | ||||||
|  |         query = 'test_query' | ||||||
|  |         dicto = defaultdict(dict) | ||||||
|  |         dicto['pageno'] = 0 | ||||||
|  |         params = gigablast.request(query, dicto) | ||||||
|  |         self.assertTrue('url' in params) | ||||||
|  |         self.assertTrue(query in params['url']) | ||||||
|  |         self.assertTrue('gigablast.com' in params['url']) | ||||||
|  | 
 | ||||||
|  |     def test_response(self): | ||||||
|  |         self.assertRaises(AttributeError, gigablast.response, None) | ||||||
|  |         self.assertRaises(AttributeError, gigablast.response, []) | ||||||
|  |         self.assertRaises(AttributeError, gigablast.response, '') | ||||||
|  |         self.assertRaises(AttributeError, gigablast.response, '[]') | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(content='<response></response>') | ||||||
|  |         self.assertEqual(gigablast.response(response), []) | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(content='<response></response>') | ||||||
|  |         self.assertEqual(gigablast.response(response), []) | ||||||
|  | 
 | ||||||
|  |         xml = """<?xml version="1.0" encoding="UTF-8" ?> | ||||||
|  |         <response> | ||||||
|  |             <hits>5941888</hits> | ||||||
|  |             <moreResultsFollow>1</moreResultsFollow> | ||||||
|  |             <result> | ||||||
|  |                 <title><![CDATA[This should be the title]]></title> | ||||||
|  |                 <sum><![CDATA[This should be the content.]]></sum> | ||||||
|  |                 <url><![CDATA[http://this.should.be.the.link/]]></url> | ||||||
|  |                 <size>90.5</size> | ||||||
|  |                 <docId>145414002633</docId> | ||||||
|  |                 <siteId>2660021087</siteId> | ||||||
|  |                 <domainId>2660021087</domainId> | ||||||
|  |                 <spidered>1320519373</spidered> | ||||||
|  |                 <indexed>1320519373</indexed> | ||||||
|  |                 <pubdate>4294967295</pubdate> | ||||||
|  |                 <isModDate>0</isModDate> | ||||||
|  |                 <language><![CDATA[English]]></language> | ||||||
|  |                 <charset><![CDATA[UTF-8]]></charset> | ||||||
|  |             </result> | ||||||
|  |         </response> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(content=xml) | ||||||
|  |         results = gigablast.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  |         self.assertEqual(results[0]['title'], 'This should be the title') | ||||||
|  |         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') | ||||||
|  |         self.assertEqual(results[0]['content'], 'This should be the content.') | ||||||
| @ -1,6 +1,7 @@ | |||||||
| from searx.tests.engines.test_bing import *  # noqa | from searx.tests.engines.test_bing import *  # noqa | ||||||
| from searx.tests.engines.test_bing_images import *  # noqa | from searx.tests.engines.test_bing_images import *  # noqa | ||||||
| from searx.tests.engines.test_bing_news import *  # noqa | from searx.tests.engines.test_bing_news import *  # noqa | ||||||
|  | from searx.tests.engines.test_blekko_images import *  # noqa | ||||||
| from searx.tests.engines.test_btdigg import *  # noqa | from searx.tests.engines.test_btdigg import *  # noqa | ||||||
| from searx.tests.engines.test_dailymotion import *  # noqa | from searx.tests.engines.test_dailymotion import *  # noqa | ||||||
| from searx.tests.engines.test_deezer import *  # noqa | from searx.tests.engines.test_deezer import *  # noqa | ||||||
| @ -9,6 +10,7 @@ from searx.tests.engines.test_digg import *  # noqa | |||||||
| from searx.tests.engines.test_dummy import *  # noqa | from searx.tests.engines.test_dummy import *  # noqa | ||||||
| from searx.tests.engines.test_flickr import *  # noqa | from searx.tests.engines.test_flickr import *  # noqa | ||||||
| from searx.tests.engines.test_flickr_noapi import *  # noqa | from searx.tests.engines.test_flickr_noapi import *  # noqa | ||||||
|  | from searx.tests.engines.test_gigablast import *  # noqa | ||||||
| from searx.tests.engines.test_github import *  # noqa | from searx.tests.engines.test_github import *  # noqa | ||||||
| from searx.tests.engines.test_www1x import *  # noqa | from searx.tests.engines.test_www1x import *  # noqa | ||||||
| from searx.tests.engines.test_google_images import *  # noqa | from searx.tests.engines.test_google_images import *  # noqa | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user