Merge pull request #219 from pointhi/new_engines
New engines: gigablast and blekko_images
This commit is contained in:
		
						commit
						5f801d7ea0
					
				
							
								
								
									
										56
									
								
								searx/engines/blekko_images.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								searx/engines/blekko_images.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,56 @@ | ||||
| ## Blekko (Images) | ||||
| # | ||||
| # @website     https://blekko.com | ||||
| # @provide-api yes (inofficial) | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     JSON | ||||
| # @stable      yes | ||||
| # @parse       url, title, img_src | ||||
| 
 | ||||
| from json import loads | ||||
| from urllib import urlencode | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['images'] | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://blekko.com' | ||||
| search_url = '/api/images?{query}&c={c}' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     c = (params['pageno'] - 1) * 48 | ||||
| 
 | ||||
|     params['url'] = base_url +\ | ||||
|         search_url.format(query=urlencode({'q': query}), | ||||
|                           c=c) | ||||
| 
 | ||||
|     if params['pageno'] != 1: | ||||
|         params['url'] += '&page={pageno}'.format(pageno=(params['pageno']-1)) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     search_results = loads(resp.text) | ||||
| 
 | ||||
|     # return empty array if there are no results | ||||
|     if not search_results: | ||||
|         return [] | ||||
| 
 | ||||
|     for result in search_results: | ||||
|         # append result | ||||
|         results.append({'url': result['page_url'], | ||||
|                         'title': result['title'], | ||||
|                         'content': '', | ||||
|                         'img_src': result['url'], | ||||
|                         'template': 'images.html'}) | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
							
								
								
									
										63
									
								
								searx/engines/gigablast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										63
									
								
								searx/engines/gigablast.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,63 @@ | ||||
| ## Gigablast (Web) | ||||
| # | ||||
| # @website     http://gigablast.com | ||||
| # @provide-api yes (http://gigablast.com/api.html) | ||||
| # | ||||
| # @using-api   yes | ||||
| # @results     XML | ||||
| # @stable      yes | ||||
| # @parse       url, title, content | ||||
| 
 | ||||
| from urllib import urlencode | ||||
| from cgi import escape | ||||
| from lxml import etree | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['general'] | ||||
| paging = True | ||||
| number_of_results = 5 | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'http://gigablast.com/' | ||||
| search_string = 'search?{query}&n={number_of_results}&s={offset}&xml=1&qh=0' | ||||
| 
 | ||||
| # specific xpath variables | ||||
| results_xpath = '//response//result' | ||||
| url_xpath = './/url' | ||||
| title_xpath = './/title' | ||||
| content_xpath = './/sum' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     offset = (params['pageno'] - 1) * number_of_results | ||||
| 
 | ||||
|     search_path = search_string.format( | ||||
|         query=urlencode({'q': query}), | ||||
|         offset=offset, | ||||
|         number_of_results=number_of_results) | ||||
| 
 | ||||
|     params['url'] = base_url + search_path | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     dom = etree.fromstring(resp.content) | ||||
| 
 | ||||
|     # parse results | ||||
|     for result in dom.xpath(results_xpath): | ||||
|         url = result.xpath(url_xpath)[0].text | ||||
|         title = result.xpath(title_xpath)[0].text | ||||
|         content = escape(result.xpath(content_xpath)[0].text) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': url, | ||||
|                         'title': title, | ||||
|                         'content': content}) | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
| @ -33,6 +33,11 @@ engines: | ||||
|     locale : en-US | ||||
|     shortcut : bin | ||||
| 
 | ||||
|   - name : blekko images | ||||
|     engine : blekko_images | ||||
|     locale : en-US | ||||
|     shortcut : bli | ||||
| 
 | ||||
|   - name : btdigg | ||||
|     engine : btdigg | ||||
|     shortcut : bt | ||||
| @ -103,6 +108,10 @@ engines: | ||||
|     shortcut : gf | ||||
|     disabled : True | ||||
| 
 | ||||
|   - name : gigablast | ||||
|     engine : gigablast | ||||
|     shortcut : gb | ||||
| 
 | ||||
|   - name : github | ||||
|     engine : github | ||||
|     shortcut : gh | ||||
|  | ||||
							
								
								
									
										65
									
								
								searx/tests/engines/test_blekko_images.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								searx/tests/engines/test_blekko_images.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,65 @@ | ||||
| from collections import defaultdict | ||||
| import mock | ||||
| from searx.engines import blekko_images | ||||
| from searx.testing import SearxTestCase | ||||
| 
 | ||||
| 
 | ||||
| class TestBlekkoImagesEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 0 | ||||
|         params = blekko_images.request(query, dicto) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('blekko.com' in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         self.assertRaises(AttributeError, blekko_images.response, None) | ||||
|         self.assertRaises(AttributeError, blekko_images.response, []) | ||||
|         self.assertRaises(AttributeError, blekko_images.response, '') | ||||
|         self.assertRaises(AttributeError, blekko_images.response, '[]') | ||||
| 
 | ||||
|         response = mock.Mock(text='[]') | ||||
|         self.assertEqual(blekko_images.response(response), []) | ||||
| 
 | ||||
|         json = """ | ||||
|         [ | ||||
|             { | ||||
|                 "c": 1, | ||||
|                 "page_url": "http://result_url.html", | ||||
|                 "title": "Photo title", | ||||
|                 "tn_url": "http://ts1.mm.bing.net/th?id=HN.608050619474382748&pid=15.1", | ||||
|                 "url": "http://result_image.jpg" | ||||
|             }, | ||||
|             { | ||||
|                 "c": 2, | ||||
|                 "page_url": "http://companyorange.simpsite.nl/OSM", | ||||
|                 "title": "OSM", | ||||
|                 "tn_url": "http://ts2.mm.bing.net/th?id=HN.608048068264919461&pid=15.1", | ||||
|                 "url": "http://simpsite.nl/userdata2/58985/Home/OSM.bmp" | ||||
|             }, | ||||
|             { | ||||
|                 "c": 3, | ||||
|                 "page_url": "http://invincible.webklik.nl/page/osm", | ||||
|                 "title": "OSM", | ||||
|                 "tn_url": "http://ts1.mm.bing.net/th?id=HN.608024514657649476&pid=15.1", | ||||
|                 "url": "http://www.webklik.nl/user_files/2009_09/65324/osm.gif" | ||||
|             }, | ||||
|             { | ||||
|                 "c": 4, | ||||
|                 "page_url": "http://www.offshorenorway.no/event/companyDetail/id/12492", | ||||
|                 "title": "Go to OSM Offshore AS homepage", | ||||
|                 "tn_url": "http://ts2.mm.bing.net/th?id=HN.608054265899847285&pid=15.1", | ||||
|                 "url": "http://www.offshorenorway.no/firmalogo/OSM-logo.png" | ||||
|             } | ||||
|         ] | ||||
|         """ | ||||
|         response = mock.Mock(text=json) | ||||
|         results = blekko_images.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 4) | ||||
|         self.assertEqual(results[0]['title'], 'Photo title') | ||||
|         self.assertEqual(results[0]['url'], 'http://result_url.html') | ||||
|         self.assertEqual(results[0]['img_src'], 'http://result_image.jpg') | ||||
							
								
								
									
										57
									
								
								searx/tests/engines/test_gigablast.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								searx/tests/engines/test_gigablast.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,57 @@ | ||||
| from collections import defaultdict | ||||
| import mock | ||||
| from searx.engines import gigablast | ||||
| from searx.testing import SearxTestCase | ||||
| 
 | ||||
| 
 | ||||
| class TestGigablastEngine(SearxTestCase): | ||||
| 
 | ||||
|     def test_request(self): | ||||
|         query = 'test_query' | ||||
|         dicto = defaultdict(dict) | ||||
|         dicto['pageno'] = 0 | ||||
|         params = gigablast.request(query, dicto) | ||||
|         self.assertTrue('url' in params) | ||||
|         self.assertTrue(query in params['url']) | ||||
|         self.assertTrue('gigablast.com' in params['url']) | ||||
| 
 | ||||
|     def test_response(self): | ||||
|         self.assertRaises(AttributeError, gigablast.response, None) | ||||
|         self.assertRaises(AttributeError, gigablast.response, []) | ||||
|         self.assertRaises(AttributeError, gigablast.response, '') | ||||
|         self.assertRaises(AttributeError, gigablast.response, '[]') | ||||
| 
 | ||||
|         response = mock.Mock(content='<response></response>') | ||||
|         self.assertEqual(gigablast.response(response), []) | ||||
| 
 | ||||
|         response = mock.Mock(content='<response></response>') | ||||
|         self.assertEqual(gigablast.response(response), []) | ||||
| 
 | ||||
|         xml = """<?xml version="1.0" encoding="UTF-8" ?> | ||||
|         <response> | ||||
|             <hits>5941888</hits> | ||||
|             <moreResultsFollow>1</moreResultsFollow> | ||||
|             <result> | ||||
|                 <title><![CDATA[This should be the title]]></title> | ||||
|                 <sum><![CDATA[This should be the content.]]></sum> | ||||
|                 <url><![CDATA[http://this.should.be.the.link/]]></url> | ||||
|                 <size>90.5</size> | ||||
|                 <docId>145414002633</docId> | ||||
|                 <siteId>2660021087</siteId> | ||||
|                 <domainId>2660021087</domainId> | ||||
|                 <spidered>1320519373</spidered> | ||||
|                 <indexed>1320519373</indexed> | ||||
|                 <pubdate>4294967295</pubdate> | ||||
|                 <isModDate>0</isModDate> | ||||
|                 <language><![CDATA[English]]></language> | ||||
|                 <charset><![CDATA[UTF-8]]></charset> | ||||
|             </result> | ||||
|         </response> | ||||
|         """ | ||||
|         response = mock.Mock(content=xml) | ||||
|         results = gigablast.response(response) | ||||
|         self.assertEqual(type(results), list) | ||||
|         self.assertEqual(len(results), 1) | ||||
|         self.assertEqual(results[0]['title'], 'This should be the title') | ||||
|         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') | ||||
|         self.assertEqual(results[0]['content'], 'This should be the content.') | ||||
| @ -1,6 +1,7 @@ | ||||
| from searx.tests.engines.test_bing import *  # noqa | ||||
| from searx.tests.engines.test_bing_images import *  # noqa | ||||
| from searx.tests.engines.test_bing_news import *  # noqa | ||||
| from searx.tests.engines.test_blekko_images import *  # noqa | ||||
| from searx.tests.engines.test_btdigg import *  # noqa | ||||
| from searx.tests.engines.test_dailymotion import *  # noqa | ||||
| from searx.tests.engines.test_deezer import *  # noqa | ||||
| @ -9,6 +10,7 @@ from searx.tests.engines.test_digg import *  # noqa | ||||
| from searx.tests.engines.test_dummy import *  # noqa | ||||
| from searx.tests.engines.test_flickr import *  # noqa | ||||
| from searx.tests.engines.test_flickr_noapi import *  # noqa | ||||
| from searx.tests.engines.test_gigablast import *  # noqa | ||||
| from searx.tests.engines.test_github import *  # noqa | ||||
| from searx.tests.engines.test_www1x import *  # noqa | ||||
| from searx.tests.engines.test_google_images import *  # noqa | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user