163 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			163 lines
		
	
	
		
			6.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|  | # -*- coding: utf-8 -*- | |||
|  | from collections import defaultdict | |||
|  | import mock | |||
|  | import lxml | |||
|  | from searx.engines import google | |||
|  | from searx.testing import SearxTestCase | |||
|  | 
 | |||
|  | 
 | |||
|  | class TestGoogleEngine(SearxTestCase): | |||
|  | 
 | |||
|  |     def test_request(self): | |||
|  |         query = 'test_query' | |||
|  |         dicto = defaultdict(dict) | |||
|  |         dicto['pageno'] = 1 | |||
|  |         dicto['language'] = 'fr_FR' | |||
|  |         params = google.request(query, dicto) | |||
|  |         self.assertIn('url', params) | |||
|  |         self.assertIn(query, params['url']) | |||
|  |         self.assertIn('google.com', params['url']) | |||
|  |         self.assertIn('PREF', params['cookies']) | |||
|  |         self.assertIn('fr', params['headers']['Accept-Language']) | |||
|  | 
 | |||
|  |         dicto['language'] = 'all' | |||
|  |         params = google.request(query, dicto) | |||
|  |         self.assertIn('en', params['headers']['Accept-Language']) | |||
|  | 
 | |||
|  |     def test_response(self): | |||
|  |         self.assertRaises(AttributeError, google.response, None) | |||
|  |         self.assertRaises(AttributeError, google.response, []) | |||
|  |         self.assertRaises(AttributeError, google.response, '') | |||
|  |         self.assertRaises(AttributeError, google.response, '[]') | |||
|  | 
 | |||
|  |         response = mock.Mock(text='<html></html>') | |||
|  |         self.assertEqual(google.response(response), []) | |||
|  | 
 | |||
|  |         html = """
 | |||
|  |         <li class="g"> | |||
|  |             <h3 class="r"> | |||
|  |                 <a href="http://this.should.be.the.link/"> | |||
|  |                     <b>This</b> is <b>the</b> title | |||
|  |                 </a> | |||
|  |             </h3> | |||
|  |             <div class="s"> | |||
|  |                 <div class="kv" style="margin-bottom:2px"> | |||
|  |                     <cite> | |||
|  |                         <b>test</b>.psychologies.com/ | |||
|  |                     </cite> | |||
|  |                     <div class="_nBb"> | |||
|  |                         <div style="display:inline" onclick="google.sham(this);" aria-expanded="false" | |||
|  |                             aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA"> | |||
|  |                             <span class="_O0"> | |||
|  |                             </span> | |||
|  |                         </div> | |||
|  |                         <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1"> | |||
|  |                             <ul> | |||
|  |                                 <li class="_Ykb"> | |||
|  |                                     <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent | |||
|  |                                         .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/"> | |||
|  |                                         En cache | |||
|  |                                     </a> | |||
|  |                                 </li> | |||
|  |                                 <li class="_Ykb"> | |||
|  |                                     <a class="_Zkb" href="/search?safe=off&q=related:test.psy.com/"> | |||
|  |                                         Pages similaires | |||
|  |                                     </a> | |||
|  |                                 </li> | |||
|  |                             </ul> | |||
|  |                         </div> | |||
|  |                     </div> | |||
|  |                 </div> | |||
|  |                 <span class="st"> | |||
|  |                     This should be the content. | |||
|  |                 </span> | |||
|  |                 <br> | |||
|  |                 <div class="osl"> | |||
|  |                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/"> | |||
|  |                         Test Personnalité | |||
|  |                     </a> -  | |||
|  |                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/"> | |||
|  |                         Tests - Moi | |||
|  |                     </a> -  | |||
|  |                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple"> | |||
|  |                         Test Couple | |||
|  |                     </a> | |||
|  |                     -  | |||
|  |                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour"> | |||
|  |                         Test Amour | |||
|  |                     </a> | |||
|  |                 </div> | |||
|  |             </div> | |||
|  |         </li> | |||
|  |         <li class="g"> | |||
|  |             <h3 class="r"> | |||
|  |                 <a href="http://www.google.com/images?q=toto"> | |||
|  |                     <b>This</b> | |||
|  |                 </a> | |||
|  |             </h3> | |||
|  |         </li> | |||
|  |         <li class="g"> | |||
|  |             <h3 class="r"> | |||
|  |                 <a href="http://www.google.com/search?q=toto"> | |||
|  |                     <b>This</b> is | |||
|  |                 </a> | |||
|  |             </h3> | |||
|  |         </li> | |||
|  |         <li class="g"> | |||
|  |             <h3 class="r"> | |||
|  |                 <a href="€"> | |||
|  |                     <b>This</b> is <b>the</b> | |||
|  |                 </a> | |||
|  |             </h3> | |||
|  |         </li> | |||
|  |         <li class="g"> | |||
|  |             <h3 class="r"> | |||
|  |                 <a href="/url?q=url"> | |||
|  |                     <b>This</b> is <b>the</b> | |||
|  |                 </a> | |||
|  |             </h3> | |||
|  |         </li> | |||
|  |         <p class="_Bmc" style="margin:3px 8px"> | |||
|  |             <a href="/search?num=20&safe=off&q=t&revid=1754833769&sa=X&ei=-&ved="> | |||
|  |                 suggestion <b>title</b> | |||
|  |             </a> | |||
|  |         </p> | |||
|  |         """
 | |||
|  |         response = mock.Mock(text=html) | |||
|  |         results = google.response(response) | |||
|  |         self.assertEqual(type(results), list) | |||
|  |         self.assertEqual(len(results), 2) | |||
|  |         self.assertEqual(results[0]['title'], 'This is the title') | |||
|  |         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') | |||
|  |         self.assertEqual(results[0]['content'], 'This should be the content.') | |||
|  |         self.assertEqual(results[1]['suggestion'], 'suggestion title') | |||
|  | 
 | |||
|  |         html = """
 | |||
|  |         <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO"> | |||
|  |         </li> | |||
|  |         """
 | |||
|  |         response = mock.Mock(text=html) | |||
|  |         results = google.response(response) | |||
|  |         self.assertEqual(type(results), list) | |||
|  |         self.assertEqual(len(results), 0) | |||
|  | 
 | |||
|  |     def test_parse_images(self): | |||
|  |         html = """
 | |||
|  |         <li> | |||
|  |             <div> | |||
|  |                 <a href="http://www.google.com/url?q=http://this.is.the.url/"> | |||
|  |                     <img style="margin:3px 0;margin-right:6px;padding:0" height="90" | |||
|  |                         src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0"> | |||
|  |                 </a> | |||
|  |             </div> | |||
|  |         </li> | |||
|  |         """
 | |||
|  |         dom = lxml.html.fromstring(html) | |||
|  |         results = google.parse_images(dom) | |||
|  |         self.assertEqual(type(results), list) | |||
|  |         self.assertEqual(len(results), 1) | |||
|  |         self.assertEqual(results[0]['url'], 'http://this.is.the.url/') | |||
|  |         self.assertEqual(results[0]['title'], '') | |||
|  |         self.assertEqual(results[0]['content'], '') | |||
|  |         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') |