# -*- coding: utf-8 -*-
from collections import defaultdict
import mock
import lxml
from searx . engines import google
from searx . testing import SearxTestCase
class TestGoogleEngine ( SearxTestCase ) :
def mock_response ( self , text ) :
response = mock . Mock ( text = text , url = ' https://www.google.com/search?q=test&start=0&gbv=1&gws_rd=cr ' )
response . search_params = mock . Mock ( )
response . search_params . get = mock . Mock ( return_value = ' www.google.com ' )
return response
def test_request ( self ) :
google . supported_languages = [ ' en ' , ' fr ' , ' zh-CN ' ]
query = ' test_query '
dicto = defaultdict ( dict )
dicto [ ' pageno ' ] = 1
dicto [ ' language ' ] = ' fr-FR '
dicto [ ' time_range ' ] = ' '
params = google . request ( query , dicto )
self . assertIn ( ' url ' , params )
self . assertIn ( query , params [ ' url ' ] )
self . assertIn ( ' google.fr ' , params [ ' url ' ] )
self . assertIn ( ' fr ' , params [ ' headers ' ] [ ' Accept-Language ' ] )
dicto [ ' language ' ] = ' en-US '
params = google . request ( query , dicto )
self . assertIn ( ' google.co ' , params [ ' url ' ] )
self . assertIn ( ' en ' , params [ ' headers ' ] [ ' Accept-Language ' ] )
dicto [ ' language ' ] = ' zh '
params = google . request ( query , dicto )
self . assertIn ( ' google.com ' , params [ ' url ' ] )
self . assertIn ( ' zh-CN ' , params [ ' headers ' ] [ ' Accept-Language ' ] )
def test_response ( self ) :
self . assertRaises ( AttributeError , google . response , None )
self . assertRaises ( AttributeError , google . response , [ ] )
self . assertRaises ( AttributeError , google . response , ' ' )
self . assertRaises ( AttributeError , google . response , ' [] ' )
response = self . mock_response ( ' <html></html> ' )
self . assertEqual ( google . response ( response ) , [ ] )
html = """
<div class= " g " >
<h3 class= " r " >
<a href= " http://this.should.be.the.link/ " >
<b>This</b> is <b>the</b> title
</a>
</h3>
<div class= " s " >
<div class= " kv " style= " margin-bottom:2px " >
<cite>
<b>test</b>.psychologies.com/
</cite>
<div class= " _nBb " >
<div style= " display:inline " onclick= " google.sham(this); " aria-expanded= " false "
aria-haspopup= " true " tabindex= " 0 " data-ved= " 0CBUQ7B0wAA " >
<span class= " _O0 " >
</span>
</div>
<div style= " display:none " class= " am-dropdown-menu " role= " menu " tabindex= " -1 " >
<ul>
<li class= " _Ykb " >
<a class= " _Zkb " href= " http://www.google.fr/url?url=http://webcache.googleusercontent
.com/search %3F cache:R1Z_4pGXjuIJ:http://test.psychologies.com/ " >
En cache
</a>
</li>
<li class= " _Ykb " >
<a class= " _Zkb " href= " /search?safe=off&q=related:test.psy.com/ " >
Pages similaires
</a>
</li>
</ul>
</div>
</div>
</div>
<span class= " st " >
This should be the content.
</span>
<br>
<div class= " osl " >
<a href= " http://www.google.fr/url?url=http://test.psychologies.com/tests/ " >
Test Personnalité
</a> -
<a href= " http://www.google.fr/url?url=http://test.psychologies.com/test/ " >
Tests - Moi
</a> -
<a href= " http://www.google.fr/url?url=http://test.psychologies.com/test/tests-couple " >
Test Couple
</a>
-
<a href= " http://www.google.fr/url?url=http://test.psychologies.com/tests/tests-amour " >
Test Amour
</a>
</div>
</div>
</div>
<div class= " g " >
<h3 class= " r " >
<a href= " http://www.google.com/images?q=toto " >
<b>This</b>
</a>
</h3>
</div>
<div class= " g " >
<h3 class= " r " >
<a href= " http://www.google.com/search?q=toto " >
<b>This</b> is
</a>
</h3>
</div>
<div class= " g " >
<h3 class= " r " >
<a href= " € " >
<b>This</b> is <b>the</b>
</a>
</h3>
</div>
<div class= " g " >
<h3 class= " r " >
<a href= " /url?q=url " >
<b>This</b> is <b>the</b>
</a>
</h3>
</div>
<p class= " _Bmc " style= " margin:3px 8px " >
<a href= " /search?num=20&safe=off&q=t&revid=1754833769&sa=X&ei=-&ved= " >
suggestion <b>title</b>
</a>
</p>
"""
response = self . mock_response ( html )
results = google . response ( response )
self . assertEqual ( type ( results ) , list )
self . assertEqual ( len ( results ) , 2 )
self . assertEqual ( results [ 0 ] [ ' title ' ] , ' This is the title ' )
self . assertEqual ( results [ 0 ] [ ' url ' ] , ' http://this.should.be.the.link/ ' )
self . assertEqual ( results [ 0 ] [ ' content ' ] , ' This should be the content. ' )
self . assertEqual ( results [ 1 ] [ ' suggestion ' ] , ' suggestion title ' )
html = """
<li class= " b_algo " u= " 0|5109|4755453613245655|UAGjXgIrPH5yh-o5oNHRx_3Zta87f_QO " >
</li>
"""
response = self . mock_response ( html )
results = google . response ( response )
self . assertEqual ( type ( results ) , list )
self . assertEqual ( len ( results ) , 0 )
response = mock . Mock ( text = ' <html></html> ' , url = ' https://sorry.google.com ' )
response . search_params = mock . Mock ( )
response . search_params . get = mock . Mock ( return_value = ' www.google.com ' )
self . assertRaises ( RuntimeWarning , google . response , response )
response = mock . Mock ( text = ' <html></html> ' , url = ' https://www.google.com/sorry/IndexRedirect ' )
response . search_params = mock . Mock ( )
response . search_params . get = mock . Mock ( return_value = ' www.google.com ' )
self . assertRaises ( RuntimeWarning , google . response , response )
def test_parse_images ( self ) :
html = """
<li>
<div>
<a href= " http://www.google.com/url?q=http://this.is.the.url/ " >
<img style= " margin:3px 0;margin-right:6px;padding:0 " height= " 90 "
src= " https://this.is.the.image/image.jpg " width= " 60 " align= " middle " alt= " " border= " 0 " >
</a>
</div>
</li>
"""
dom = lxml . html . fromstring ( html )
results = google . parse_images ( dom , ' www.google.com ' )
self . assertEqual ( type ( results ) , list )
self . assertEqual ( len ( results ) , 1 )
self . assertEqual ( results [ 0 ] [ ' url ' ] , ' http://this.is.the.url/ ' )
self . assertEqual ( results [ 0 ] [ ' title ' ] , ' ' )
self . assertEqual ( results [ 0 ] [ ' content ' ] , ' ' )
self . assertEqual ( results [ 0 ] [ ' img_src ' ] , ' https://this.is.the.image/image.jpg ' )
def test_fetch_supported_languages ( self ) :
html = """ <html></html> """
response = mock . Mock ( text = html )
languages = google . _fetch_supported_languages ( response )
self . assertEqual ( type ( languages ) , dict )
self . assertEqual ( len ( languages ) , 0 )
html = u """
<html>
<body>
<table>
<tbody>
<tr>
<td>
<font>
<label>
<span id= " ten " >English</span>
</label>
</font>
</td>
<td>
<font>
<label>
<span id= " tzh-CN " >中文 (简体)</span>
</label>
<label>
<span id= " tzh-TW " >中文 (繁體)</span>
</label>
</font>
</td>
</tr>
</tbody>
</table>
</body>
</html>
"""
response = mock . Mock ( text = html )
languages = google . _fetch_supported_languages ( response )
self . assertEqual ( type ( languages ) , dict )
self . assertEqual ( len ( languages ) , 3 )
self . assertIn ( ' en ' , languages )
self . assertIn ( ' zh-CN ' , languages )
self . assertIn ( ' zh-TW ' , languages )
self . assertEquals ( type ( languages [ ' en ' ] ) , dict )
self . assertEquals ( type ( languages [ ' zh-CN ' ] ) , dict )
self . assertEquals ( type ( languages [ ' zh-TW ' ] ) , dict )
self . assertIn ( ' name ' , languages [ ' en ' ] )
self . assertIn ( ' name ' , languages [ ' zh-CN ' ] )
self . assertIn ( ' name ' , languages [ ' zh-TW ' ] )
self . assertEquals ( languages [ ' en ' ] [ ' name ' ] , ' English ' )
self . assertEquals ( languages [ ' zh-CN ' ] [ ' name ' ] , u ' 中文 (简体) ' )
self . assertEquals ( languages [ ' zh-TW ' ] [ ' name ' ] , u ' 中文 (繁體) ' )