| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  | # -*- coding: utf-8 -*- | 
					
						
							|  |  |  |  | from collections import defaultdict | 
					
						
							|  |  |  |  | import mock | 
					
						
							|  |  |  |  | import lxml | 
					
						
							|  |  |  |  | from searx.engines import google | 
					
						
							|  |  |  |  | from searx.testing import SearxTestCase | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  | class TestGoogleEngine(SearxTestCase): | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |     def mock_response(self, text): | 
					
						
							|  |  |  |  |         response = mock.Mock(text=text, url='https://www.google.com/search?q=test&start=0&gbv=1') | 
					
						
							|  |  |  |  |         response.search_params = mock.Mock() | 
					
						
							|  |  |  |  |         response.search_params.get = mock.Mock(return_value='www.google.com') | 
					
						
							|  |  |  |  |         return response | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  |     def test_request(self): | 
					
						
							|  |  |  |  |         query = 'test_query' | 
					
						
							|  |  |  |  |         dicto = defaultdict(dict) | 
					
						
							|  |  |  |  |         dicto['pageno'] = 1 | 
					
						
							|  |  |  |  |         dicto['language'] = 'fr_FR' | 
					
						
							|  |  |  |  |         params = google.request(query, dicto) | 
					
						
							|  |  |  |  |         self.assertIn('url', params) | 
					
						
							|  |  |  |  |         self.assertIn(query, params['url']) | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |         self.assertIn('google.fr', params['url']) | 
					
						
							| 
									
										
										
										
											2015-05-02 13:21:01 +02:00
										 |  |  |  |         self.assertNotIn('PREF', params['cookies']) | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |         self.assertIn('NID', params['cookies']) | 
					
						
							| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  |         self.assertIn('fr', params['headers']['Accept-Language']) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         dicto['language'] = 'all' | 
					
						
							|  |  |  |  |         params = google.request(query, dicto) | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |         self.assertIn('google.com', params['url']) | 
					
						
							| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  |         self.assertIn('en', params['headers']['Accept-Language']) | 
					
						
							| 
									
										
										
										
											2015-05-02 13:21:01 +02:00
										 |  |  |  |         self.assertIn('PREF', params['cookies']) | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |         self.assertIn('NID', params['cookies']) | 
					
						
							| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  | 
 | 
					
						
							|  |  |  |  |     def test_response(self): | 
					
						
							|  |  |  |  |         self.assertRaises(AttributeError, google.response, None) | 
					
						
							|  |  |  |  |         self.assertRaises(AttributeError, google.response, []) | 
					
						
							|  |  |  |  |         self.assertRaises(AttributeError, google.response, '') | 
					
						
							|  |  |  |  |         self.assertRaises(AttributeError, google.response, '[]') | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |         response = self.mock_response('<html></html>') | 
					
						
							| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  |         self.assertEqual(google.response(response), []) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         html = """
 | 
					
						
							|  |  |  |  |         <li class="g"> | 
					
						
							|  |  |  |  |             <h3 class="r"> | 
					
						
							|  |  |  |  |                 <a href="http://this.should.be.the.link/"> | 
					
						
							|  |  |  |  |                     <b>This</b> is <b>the</b> title | 
					
						
							|  |  |  |  |                 </a> | 
					
						
							|  |  |  |  |             </h3> | 
					
						
							|  |  |  |  |             <div class="s"> | 
					
						
							|  |  |  |  |                 <div class="kv" style="margin-bottom:2px"> | 
					
						
							|  |  |  |  |                     <cite> | 
					
						
							|  |  |  |  |                         <b>test</b>.psychologies.com/ | 
					
						
							|  |  |  |  |                     </cite> | 
					
						
							|  |  |  |  |                     <div class="_nBb">‎ | 
					
						
							|  |  |  |  |                         <div style="display:inline" onclick="google.sham(this);" aria-expanded="false" | 
					
						
							|  |  |  |  |                             aria-haspopup="true" tabindex="0" data-ved="0CBUQ7B0wAA"> | 
					
						
							|  |  |  |  |                             <span class="_O0"> | 
					
						
							|  |  |  |  |                             </span> | 
					
						
							|  |  |  |  |                         </div> | 
					
						
							|  |  |  |  |                         <div style="display:none" class="am-dropdown-menu" role="menu" tabindex="-1"> | 
					
						
							|  |  |  |  |                             <ul> | 
					
						
							|  |  |  |  |                                 <li class="_Ykb"> | 
					
						
							|  |  |  |  |                                     <a class="_Zkb" href="http://www.google.fr/url?url=http://webcache.googleusercontent | 
					
						
							|  |  |  |  |                                         .com/search%3Fcache:R1Z_4pGXjuIJ:http://test.psychologies.com/"> | 
					
						
							|  |  |  |  |                                         En cache | 
					
						
							|  |  |  |  |                                     </a> | 
					
						
							|  |  |  |  |                                 </li> | 
					
						
							|  |  |  |  |                                 <li class="_Ykb"> | 
					
						
							|  |  |  |  |                                     <a class="_Zkb" href="/search?safe=off&q=related:test.psy.com/"> | 
					
						
							|  |  |  |  |                                         Pages similaires | 
					
						
							|  |  |  |  |                                     </a> | 
					
						
							|  |  |  |  |                                 </li> | 
					
						
							|  |  |  |  |                             </ul> | 
					
						
							|  |  |  |  |                         </div> | 
					
						
							|  |  |  |  |                     </div> | 
					
						
							|  |  |  |  |                 </div> | 
					
						
							|  |  |  |  |                 <span class="st"> | 
					
						
							|  |  |  |  |                     This should be the content. | 
					
						
							|  |  |  |  |                 </span> | 
					
						
							|  |  |  |  |                 <br> | 
					
						
							|  |  |  |  |                 <div class="osl">‎ | 
					
						
							|  |  |  |  |                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/"> | 
					
						
							|  |  |  |  |                         Test Personnalité | 
					
						
							|  |  |  |  |                     </a> - ‎ | 
					
						
							|  |  |  |  |                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/"> | 
					
						
							|  |  |  |  |                         Tests - Moi | 
					
						
							|  |  |  |  |                     </a> - ‎ | 
					
						
							|  |  |  |  |                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple"> | 
					
						
							|  |  |  |  |                         Test Couple | 
					
						
							|  |  |  |  |                     </a> | 
					
						
							|  |  |  |  |                     - ‎ | 
					
						
							|  |  |  |  |                     <a href="http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour"> | 
					
						
							|  |  |  |  |                         Test Amour | 
					
						
							|  |  |  |  |                     </a> | 
					
						
							|  |  |  |  |                 </div> | 
					
						
							|  |  |  |  |             </div> | 
					
						
							|  |  |  |  |         </li> | 
					
						
							|  |  |  |  |         <li class="g"> | 
					
						
							|  |  |  |  |             <h3 class="r"> | 
					
						
							|  |  |  |  |                 <a href="http://www.google.com/images?q=toto"> | 
					
						
							|  |  |  |  |                     <b>This</b> | 
					
						
							|  |  |  |  |                 </a> | 
					
						
							|  |  |  |  |             </h3> | 
					
						
							|  |  |  |  |         </li> | 
					
						
							|  |  |  |  |         <li class="g"> | 
					
						
							|  |  |  |  |             <h3 class="r"> | 
					
						
							|  |  |  |  |                 <a href="http://www.google.com/search?q=toto"> | 
					
						
							|  |  |  |  |                     <b>This</b> is | 
					
						
							|  |  |  |  |                 </a> | 
					
						
							|  |  |  |  |             </h3> | 
					
						
							|  |  |  |  |         </li> | 
					
						
							|  |  |  |  |         <li class="g"> | 
					
						
							|  |  |  |  |             <h3 class="r"> | 
					
						
							|  |  |  |  |                 <a href="€"> | 
					
						
							|  |  |  |  |                     <b>This</b> is <b>the</b> | 
					
						
							|  |  |  |  |                 </a> | 
					
						
							|  |  |  |  |             </h3> | 
					
						
							|  |  |  |  |         </li> | 
					
						
							|  |  |  |  |         <li class="g"> | 
					
						
							|  |  |  |  |             <h3 class="r"> | 
					
						
							|  |  |  |  |                 <a href="/url?q=url"> | 
					
						
							|  |  |  |  |                     <b>This</b> is <b>the</b> | 
					
						
							|  |  |  |  |                 </a> | 
					
						
							|  |  |  |  |             </h3> | 
					
						
							|  |  |  |  |         </li> | 
					
						
							|  |  |  |  |         <p class="_Bmc" style="margin:3px 8px"> | 
					
						
							|  |  |  |  |             <a href="/search?num=20&safe=off&q=t&revid=1754833769&sa=X&ei=-&ved="> | 
					
						
							|  |  |  |  |                 suggestion <b>title</b> | 
					
						
							|  |  |  |  |             </a> | 
					
						
							|  |  |  |  |         </p> | 
					
						
							|  |  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |         response = self.mock_response(html) | 
					
						
							| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  |         results = google.response(response) | 
					
						
							|  |  |  |  |         self.assertEqual(type(results), list) | 
					
						
							|  |  |  |  |         self.assertEqual(len(results), 2) | 
					
						
							|  |  |  |  |         self.assertEqual(results[0]['title'], 'This is the title') | 
					
						
							|  |  |  |  |         self.assertEqual(results[0]['url'], 'http://this.should.be.the.link/') | 
					
						
							|  |  |  |  |         self.assertEqual(results[0]['content'], 'This should be the content.') | 
					
						
							|  |  |  |  |         self.assertEqual(results[1]['suggestion'], 'suggestion title') | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         html = """
 | 
					
						
							|  |  |  |  |         <li class="b_algo" u="0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO"> | 
					
						
							|  |  |  |  |         </li> | 
					
						
							|  |  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |         response = self.mock_response(html) | 
					
						
							| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  |         results = google.response(response) | 
					
						
							|  |  |  |  |         self.assertEqual(type(results), list) | 
					
						
							|  |  |  |  |         self.assertEqual(len(results), 0) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |         response = mock.Mock(text='<html></html>', url='https://sorry.google.com') | 
					
						
							|  |  |  |  |         response.search_params = mock.Mock() | 
					
						
							|  |  |  |  |         response.search_params.get = mock.Mock(return_value='www.google.com') | 
					
						
							|  |  |  |  |         self.assertRaises(RuntimeWarning, google.response, response) | 
					
						
							|  |  |  |  | 
 | 
					
						
							|  |  |  |  |         response = mock.Mock(text='<html></html>', url='https://www.google.com/sorry/IndexRedirect') | 
					
						
							|  |  |  |  |         response.search_params = mock.Mock() | 
					
						
							|  |  |  |  |         response.search_params.get = mock.Mock(return_value='www.google.com') | 
					
						
							|  |  |  |  |         self.assertRaises(RuntimeWarning, google.response, response) | 
					
						
							|  |  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  |     def test_parse_images(self): | 
					
						
							|  |  |  |  |         html = """
 | 
					
						
							|  |  |  |  |         <li> | 
					
						
							|  |  |  |  |             <div> | 
					
						
							|  |  |  |  |                 <a href="http://www.google.com/url?q=http://this.is.the.url/"> | 
					
						
							|  |  |  |  |                     <img style="margin:3px 0;margin-right:6px;padding:0" height="90" | 
					
						
							|  |  |  |  |                         src="https://this.is.the.image/image.jpg" width="60" align="middle" alt="" border="0"> | 
					
						
							|  |  |  |  |                 </a> | 
					
						
							|  |  |  |  |             </div> | 
					
						
							|  |  |  |  |         </li> | 
					
						
							|  |  |  |  |         """
 | 
					
						
							|  |  |  |  |         dom = lxml.html.fromstring(html) | 
					
						
							| 
									
										
										
										
											2015-05-30 17:41:40 +02:00
										 |  |  |  |         results = google.parse_images(dom, 'www.google.com') | 
					
						
							| 
									
										
										
										
											2015-02-11 17:16:52 +01:00
										 |  |  |  |         self.assertEqual(type(results), list) | 
					
						
							|  |  |  |  |         self.assertEqual(len(results), 1) | 
					
						
							|  |  |  |  |         self.assertEqual(results[0]['url'], 'http://this.is.the.url/') | 
					
						
							|  |  |  |  |         self.assertEqual(results[0]['title'], '') | 
					
						
							|  |  |  |  |         self.assertEqual(results[0]['content'], '') | 
					
						
							|  |  |  |  |         self.assertEqual(results[0]['img_src'], 'https://this.is.the.image/image.jpg') |