[fix] update yahoo engine according to the web site changes
This commit is contained in:
		
							parent
							
								
									dc036ece85
								
							
						
					
					
						commit
						57996b12fc
					
				| @ -24,11 +24,11 @@ base_url = 'https://search.yahoo.com/' | |||||||
| search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' | search_url = 'search?{query}&b={offset}&fl=1&vl=lang_{lang}' | ||||||
| 
 | 
 | ||||||
| # specific xpath variables | # specific xpath variables | ||||||
| results_xpath = '//div[@class="res"]' | results_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' Sr ')]" | ||||||
| url_xpath = './/h3/a/@href' | url_xpath = './/h3/a/@href' | ||||||
| title_xpath = './/h3/a' | title_xpath = './/h3/a' | ||||||
| content_xpath = './/div[@class="abstr"]' | content_xpath = './/div[@class="compText aAbs"]' | ||||||
| suggestion_xpath = '//div[@id="satat"]//a' | suggestion_xpath = "//div[contains(concat(' ', normalize-space(@class), ' '), ' AlsoTry ')]//a" | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # remove yahoo-specific tracking-url | # remove yahoo-specific tracking-url | ||||||
| @ -91,11 +91,12 @@ def response(resp): | |||||||
|                         'content': content}) |                         'content': content}) | ||||||
| 
 | 
 | ||||||
|     # if no suggestion found, return results |     # if no suggestion found, return results | ||||||
|     if not dom.xpath(suggestion_xpath): |     suggestions = dom.xpath(suggestion_xpath) | ||||||
|  |     if not suggestions: | ||||||
|         return results |         return results | ||||||
| 
 | 
 | ||||||
|     # parse suggestion |     # parse suggestion | ||||||
|     for suggestion in dom.xpath(suggestion_xpath): |     for suggestion in suggestions: | ||||||
|         # append suggestion |         # append suggestion | ||||||
|         results.append({'suggestion': extract_text(suggestion)}) |         results.append({'suggestion': extract_text(suggestion)}) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -55,86 +55,83 @@ class TestYahooEngine(SearxTestCase): | |||||||
|         self.assertEqual(yahoo.response(response), []) |         self.assertEqual(yahoo.response(response), []) | ||||||
| 
 | 
 | ||||||
|         html = """ |         html = """ | ||||||
|         <div class="res"> | <ol class="reg mb-15 searchCenterMiddle"> | ||||||
|             <div> |     <li class="first"> | ||||||
|                 <h3> |         <div class="dd algo fst Sr"> | ||||||
|                 <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA; |             <div class="compTitle"> | ||||||
|                     _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 |                 <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA; | ||||||
|                     /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1"> |                      _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 | ||||||
|                     <b>This</b> is the title |                      /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-" | ||||||
|                 </a> |                      target="_blank" data-bid="54e712e13671c"> | ||||||
|  |                      <b><b>This is the title</b></b></a> | ||||||
|                 </h3> |                 </h3> | ||||||
|             </div> |             </div> | ||||||
|             <span class="url" dir="ltr">www.<b>test</b>.com</span> |             <div class="compText aAbs"> | ||||||
|             <div class="abstr"> |                 <p class="lh-18"><b><b>This is the </b>content</b> | ||||||
|                 <b>This</b> is the content |                 </p> | ||||||
|             </div> |             </div> | ||||||
|         </div> |         </div> | ||||||
|         <div id="satat"  data-bns="Yahoo" data-bk="124.1"> |     </li> | ||||||
|             <h2>Also Try</h2> |     <li> | ||||||
|             <table> |         <div class="dd algo lst Sr"> | ||||||
|  |             <div class="compTitle"> | ||||||
|  |                 <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=AwrBT7zgEudUW.wAe2ZXNyoA; | ||||||
|  |                      _ylu=X3oDMTBybGY3bmpvBGNvbG8DYmYxBHBvcwMyBHZ0aWQDBHNlYwNzcg--/RV=2\/RE=1424458593/RO=10 | ||||||
|  |                      /RU=https%3a%2f%2fthis.is.the.second.url%2f/RK=0/RS=jIctjj_cBH1Efj88GCgHKp3__Qk-" | ||||||
|  |                      target="_blank" data-bid="54e712e136926"> | ||||||
|  |                      This is the second <b><b>title</b></b></a> | ||||||
|  |                 </h3> | ||||||
|  |             </div> | ||||||
|  |             <div class="compText aAbs"> | ||||||
|  |                 <p class="lh-18">This is the second content</p> | ||||||
|  |             </div> | ||||||
|  |         </div> | ||||||
|  |     </li> | ||||||
|  | </ol> | ||||||
|  | <div class="dd assist fst lst AlsoTry" data-bid="54e712e138d04"> | ||||||
|  |     <div class="compTitle mb-4 h-17"> | ||||||
|  |         <h3 class="title">Also Try</h3> </div> | ||||||
|  |     <table class="compTable m-0 ac-1st td-u fz-ms"> | ||||||
|         <tbody> |         <tbody> | ||||||
|             <tr> |             <tr> | ||||||
|                         <td> |                 <td class="w-50p pr-28"><a href="https://search.yahoo.com/"><B>This is the </B>suggestion<B></B></a> | ||||||
|                             <a id="srpnat0" class="" href="https://search.yahoo.com/search=rs-bottom" > |  | ||||||
|                                 <span> |  | ||||||
|                                     <b></b>This is <b>the suggestion</b> |  | ||||||
|                                 </span> |  | ||||||
|                             </a> |  | ||||||
|                 </td> |                 </td> | ||||||
|             </tr> |             </tr> | ||||||
|                 </tbody> |  | ||||||
|     </table> |     </table> | ||||||
| </div> | </div> | ||||||
|         """ |         """ | ||||||
|         response = mock.Mock(text=html) |         response = mock.Mock(text=html) | ||||||
|         results = yahoo.response(response) |         results = yahoo.response(response) | ||||||
|  |         print results | ||||||
|         self.assertEqual(type(results), list) |         self.assertEqual(type(results), list) | ||||||
|         self.assertEqual(len(results), 2) |         self.assertEqual(len(results), 3) | ||||||
|         self.assertEqual(results[0]['title'], 'This is the title') |         self.assertEqual(results[0]['title'], 'This is the title') | ||||||
|         self.assertEqual(results[0]['url'], 'https://this.is.the.url/') |         self.assertEqual(results[0]['url'], 'https://this.is.the.url/') | ||||||
|         self.assertEqual(results[0]['content'], 'This is the content') |         self.assertEqual(results[0]['content'], 'This is the content') | ||||||
|         self.assertEqual(results[1]['suggestion'], 'This is the suggestion') |         self.assertEqual(results[1]['title'], 'This is the second title') | ||||||
|  |         self.assertEqual(results[1]['url'], 'https://this.is.the.second.url/') | ||||||
|  |         self.assertEqual(results[1]['content'], 'This is the second content') | ||||||
|  |         self.assertEqual(results[2]['suggestion'], 'This is the suggestion') | ||||||
| 
 | 
 | ||||||
|         html = """ |         html = """ | ||||||
|         <div class="res"> | <ol class="reg mb-15 searchCenterMiddle"> | ||||||
|             <div> |     <li class="first"> | ||||||
|                 <h3> |         <div class="dd algo fst Sr"> | ||||||
|                 <a id="link-1" class="yschttl spt" href="http://r.search.yahoo.com/_ylt=A0LEVzClb9JUSKcAEGRXNyoA; |             <div class="compTitle"> | ||||||
|                     _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2JmMQR2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 |                 <h3 class="title"><a class=" td-u" href="http://r.search.yahoo.com/_ylt=A0LEb9JUSKcAEGRXNyoA; | ||||||
|                     /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-"target="_blank" data-bk="5063.1"> |                      _ylu=X3oDMTEzZm1qazYwBHNlYwNzcgRwb3MDMQRjb2xvA2Jm2dGlkA1NNRTcwM18x/RV=2/RE=1423106085/RO=10 | ||||||
|                     <b>This</b> is the title |                      /RU=https%3a%2f%2fthis.is.the.url%2f/RK=0/RS=dtcJsfP4mEeBOjnVfUQ-" | ||||||
|                 </a> |                      target="_blank" data-bid="54e712e13671c"> | ||||||
|  |                   <b><b>This is the title</b></b></a> | ||||||
|                 </h3> |                 </h3> | ||||||
|             </div> |             </div> | ||||||
|             <span class="url" dir="ltr">www.<b>test</b>.com</span> |             <div class="compText aAbs"> | ||||||
|             <div class="abstr"> |                 <p class="lh-18"><b><b>This is the </b>content</b> | ||||||
|                 <b>This</b> is the content |                 </p> | ||||||
|             </div> |  | ||||||
|         </div> |  | ||||||
|         <div class="res"> |  | ||||||
|             <div> |  | ||||||
|                 <h3> |  | ||||||
|                 <a id="link-1" class="yschttl spt"> |  | ||||||
|                     <b>This</b> is the title |  | ||||||
|                 </a> |  | ||||||
|                 </h3> |  | ||||||
|             </div> |  | ||||||
|             <span class="url" dir="ltr">www.<b>test</b>.com</span> |  | ||||||
|             <div class="abstr"> |  | ||||||
|                 <b>This</b> is the content |  | ||||||
|             </div> |  | ||||||
|         </div> |  | ||||||
|         <div class="res"> |  | ||||||
|             <div> |  | ||||||
|                 <h3> |  | ||||||
|                 </h3> |  | ||||||
|             </div> |  | ||||||
|             <span class="url" dir="ltr">www.<b>test</b>.com</span> |  | ||||||
|             <div class="abstr"> |  | ||||||
|                 <b>This</b> is the content |  | ||||||
|             </div> |             </div> | ||||||
|         </div> |         </div> | ||||||
|  |     </li> | ||||||
|  | </ol> | ||||||
|         """ |         """ | ||||||
|         response = mock.Mock(text=html) |         response = mock.Mock(text=html) | ||||||
|         results = yahoo.response(response) |         results = yahoo.response(response) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user