Adds two engines : Youtube with or without API
The API needs an API_KEY The NOAPI doesn't have the published dates.
This commit is contained in:
		
							parent
							
								
									aac8d3a7bf
								
							
						
					
					
						commit
						f965c97822
					
				
							
								
								
									
										83
									
								
								searx/engines/youtube_api.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								searx/engines/youtube_api.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,83 @@ | |||||||
|  | # Youtube (Videos) | ||||||
|  | # | ||||||
|  | # @website     https://www.youtube.com/ | ||||||
|  | # @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) | ||||||
|  | # | ||||||
|  | # @using-api   yes | ||||||
|  | # @results     JSON | ||||||
|  | # @stable      yes | ||||||
|  | # @parse       url, title, content, publishedDate, thumbnail, embedded | ||||||
|  | 
 | ||||||
|  | from json import loads | ||||||
|  | from urllib import urlencode | ||||||
|  | from dateutil import parser | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['videos', 'music'] | ||||||
|  | paging = False | ||||||
|  | language_support = True | ||||||
|  | api_key = None | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | base_url = 'https://www.googleapis.com/youtube/v3/search' | ||||||
|  | search_url = base_url + '?part=snippet&{query}&maxResults=20&key={api_key}' | ||||||
|  | 
 | ||||||
|  | embedded_url = '<iframe width="540" height="304" ' +\ | ||||||
|  |     'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\ | ||||||
|  |     'frameborder="0" allowfullscreen></iframe>' | ||||||
|  | 
 | ||||||
|  | base_youtube_url = 'https://www.youtube.com/watch?v=' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
|  | def request(query, params): | ||||||
|  |     params['url'] = search_url.format(query=urlencode({'q': query}), | ||||||
|  |                                       api_key=api_key) | ||||||
|  | 
 | ||||||
|  |     # add language tag if specified | ||||||
|  |     if params['language'] != 'all': | ||||||
|  |         params['url'] += '&relevanceLanguage=' + params['language'].split('_')[0] | ||||||
|  | 
 | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get response from search-request | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     search_results = loads(resp.text) | ||||||
|  | 
 | ||||||
|  |     # return empty array if there are no results | ||||||
|  |     if 'items' not in search_results: | ||||||
|  |         return [] | ||||||
|  | 
 | ||||||
|  |     # parse results | ||||||
|  |     for result in search_results['items']: | ||||||
|  |         videoid = result['id']['videoId'] | ||||||
|  | 
 | ||||||
|  |         title = result['snippet']['title'] | ||||||
|  |         content = '' | ||||||
|  |         thumbnail = '' | ||||||
|  | 
 | ||||||
|  |         pubdate = result['snippet']['publishedAt'] | ||||||
|  |         publishedDate = parser.parse(pubdate) | ||||||
|  | 
 | ||||||
|  |         thumbnail = result['snippet']['thumbnails']['high']['url'] | ||||||
|  | 
 | ||||||
|  |         content = result['snippet']['description'] | ||||||
|  | 
 | ||||||
|  |         url = base_youtube_url + videoid | ||||||
|  | 
 | ||||||
|  |         embedded = embedded_url.format(videoid=videoid) | ||||||
|  | 
 | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': url, | ||||||
|  |                         'title': title, | ||||||
|  |                         'content': content, | ||||||
|  |                         'template': 'videos.html', | ||||||
|  |                         'publishedDate': publishedDate, | ||||||
|  |                         'embedded': embedded, | ||||||
|  |                         'thumbnail': thumbnail}) | ||||||
|  | 
 | ||||||
|  |     # return results | ||||||
|  |     return results | ||||||
							
								
								
									
										72
									
								
								searx/engines/youtube_noapi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										72
									
								
								searx/engines/youtube_noapi.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,72 @@ | |||||||
|  | # Youtube (Videos) | ||||||
|  | # | ||||||
|  | # @website     https://www.youtube.com/ | ||||||
|  | # @provide-api yes (https://developers.google.com/apis-explorer/#p/youtube/v3/youtube.search.list) | ||||||
|  | # | ||||||
|  | # @using-api   no | ||||||
|  | # @results     HTML | ||||||
|  | # @stable      no | ||||||
|  | # @parse       url, title, content, publishedDate, thumbnail, embedded | ||||||
|  | 
 | ||||||
|  | from urllib import quote_plus | ||||||
|  | from lxml import html | ||||||
|  | from searx.engines.xpath import extract_text | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['videos', 'music'] | ||||||
|  | paging = True | ||||||
|  | language_support = False | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | base_url = 'https://www.youtube.com/results' | ||||||
|  | search_url = base_url + '?search_query={query}&page={page}' | ||||||
|  | 
 | ||||||
|  | embedded_url = '<iframe width="540" height="304" ' +\ | ||||||
|  |     'data-src="//www.youtube-nocookie.com/embed/{videoid}" ' +\ | ||||||
|  |     'frameborder="0" allowfullscreen></iframe>' | ||||||
|  | 
 | ||||||
|  | base_youtube_url = 'https://www.youtube.com/watch?v=' | ||||||
|  | 
 | ||||||
|  | # specific xpath variables | ||||||
|  | results_xpath = "//ol/li/div[contains(@class, 'yt-lockup yt-lockup-tile yt-lockup-video vve-check')]" | ||||||
|  | url_xpath = './/h3/a/@href' | ||||||
|  | title_xpath = './/div[@class="yt-lockup-content"]/h3/a' | ||||||
|  | content_xpath = './/div[@class="yt-lockup-content"]/div[@class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2"]' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # do search-request | ||||||
|  | def request(query, params): | ||||||
|  |     params['url'] = search_url.format(query=quote_plus(query), | ||||||
|  |                                       page=params['pageno']) | ||||||
|  | 
 | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | # get response from search-request | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  | 
 | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  | 
 | ||||||
|  |     # parse results | ||||||
|  |     for result in dom.xpath(results_xpath): | ||||||
|  |         videoid = result.xpath('@data-context-item-id')[0] | ||||||
|  | 
 | ||||||
|  |         url = base_youtube_url + videoid | ||||||
|  |         thumbnail = 'https://i.ytimg.com/vi/' + videoid + '/hqdefault.jpg' | ||||||
|  | 
 | ||||||
|  |         title = extract_text(result.xpath(title_xpath)[0]) | ||||||
|  |         content = extract_text(result.xpath(content_xpath)[0]) | ||||||
|  | 
 | ||||||
|  |         embedded = embedded_url.format(videoid=videoid) | ||||||
|  | 
 | ||||||
|  |         # append result | ||||||
|  |         results.append({'url': url, | ||||||
|  |                         'title': title, | ||||||
|  |                         'content': content, | ||||||
|  |                         'template': 'videos.html', | ||||||
|  |                         'embedded': embedded, | ||||||
|  |                         'thumbnail': thumbnail}) | ||||||
|  | 
 | ||||||
|  |     # return results | ||||||
|  |     return results | ||||||
| @ -242,8 +242,13 @@ engines: | |||||||
|     shortcut : yhn |     shortcut : yhn | ||||||
| 
 | 
 | ||||||
|   - name : youtube |   - name : youtube | ||||||
|     engine : youtube |  | ||||||
|     shortcut : yt |     shortcut : yt | ||||||
|  |     # You can use the engine using the official stable API, but you need an API key | ||||||
|  |     # See : https://console.developers.google.com/project | ||||||
|  |     #    engine : youtube_api | ||||||
|  |     #    api_key: 'apikey' # required! | ||||||
|  |     # Or you can use the html non-stable engine, activated by default | ||||||
|  |     engine : youtube_noapi | ||||||
| 
 | 
 | ||||||
|   - name : dailymotion |   - name : dailymotion | ||||||
|     engine : dailymotion |     engine : dailymotion | ||||||
|  | |||||||
							
								
								
									
										111
									
								
								searx/tests/engines/test_youtube_api.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										111
									
								
								searx/tests/engines/test_youtube_api.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,111 @@ | |||||||
|  | from collections import defaultdict | ||||||
|  | import mock | ||||||
|  | from searx.engines import youtube_api | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestYoutubeAPIEngine(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_request(self): | ||||||
|  |         query = 'test_query' | ||||||
|  |         dicto = defaultdict(dict) | ||||||
|  |         dicto['pageno'] = 0 | ||||||
|  |         dicto['language'] = 'fr_FR' | ||||||
|  |         params = youtube_api.request(query, dicto) | ||||||
|  |         self.assertTrue('url' in params) | ||||||
|  |         self.assertTrue(query in params['url']) | ||||||
|  |         self.assertIn('googleapis.com', params['url']) | ||||||
|  |         self.assertIn('youtube', params['url']) | ||||||
|  |         self.assertIn('fr', params['url']) | ||||||
|  | 
 | ||||||
|  |         dicto['language'] = 'all' | ||||||
|  |         params = youtube_api.request(query, dicto) | ||||||
|  |         self.assertFalse('fr' in params['url']) | ||||||
|  | 
 | ||||||
|  |     def test_response(self): | ||||||
|  |         self.assertRaises(AttributeError, youtube_api.response, None) | ||||||
|  |         self.assertRaises(AttributeError, youtube_api.response, []) | ||||||
|  |         self.assertRaises(AttributeError, youtube_api.response, '') | ||||||
|  |         self.assertRaises(AttributeError, youtube_api.response, '[]') | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(text='{}') | ||||||
|  |         self.assertEqual(youtube_api.response(response), []) | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(text='{"data": []}') | ||||||
|  |         self.assertEqual(youtube_api.response(response), []) | ||||||
|  | 
 | ||||||
|  |         json = """ | ||||||
|  |         { | ||||||
|  |          "kind": "youtube#searchListResponse", | ||||||
|  |          "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME", | ||||||
|  |          "nextPageToken": "CAUQAA", | ||||||
|  |          "pageInfo": { | ||||||
|  |           "totalResults": 1000000, | ||||||
|  |           "resultsPerPage": 20 | ||||||
|  |          }, | ||||||
|  |          "items": [ | ||||||
|  |           { | ||||||
|  |            "kind": "youtube#searchResult", | ||||||
|  |            "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/IbLO64BMhbHIgWLwLw7MDYe7Hs4", | ||||||
|  |            "id": { | ||||||
|  |             "kind": "youtube#video", | ||||||
|  |             "videoId": "DIVZCPfAOeM" | ||||||
|  |            }, | ||||||
|  |            "snippet": { | ||||||
|  |             "publishedAt": "2015-05-29T22:41:04.000Z", | ||||||
|  |             "channelId": "UCNodmx1ERIjKqvcJLtdzH5Q", | ||||||
|  |             "title": "Title", | ||||||
|  |             "description": "Description", | ||||||
|  |             "thumbnails": { | ||||||
|  |              "default": { | ||||||
|  |               "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/default.jpg" | ||||||
|  |              }, | ||||||
|  |              "medium": { | ||||||
|  |               "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg" | ||||||
|  |              }, | ||||||
|  |              "high": { | ||||||
|  |               "url": "https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg" | ||||||
|  |              } | ||||||
|  |             }, | ||||||
|  |             "channelTitle": "MinecraftUniverse", | ||||||
|  |             "liveBroadcastContent": "none" | ||||||
|  |            } | ||||||
|  |           } | ||||||
|  |           ] | ||||||
|  |         } | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=json) | ||||||
|  |         results = youtube_api.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  |         self.assertEqual(results[0]['title'], 'Title') | ||||||
|  |         self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM') | ||||||
|  |         self.assertEqual(results[0]['content'], 'Description') | ||||||
|  |         self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg') | ||||||
|  |         self.assertTrue('DIVZCPfAOeM' in results[0]['embedded']) | ||||||
|  | 
 | ||||||
|  |         json = """ | ||||||
|  |         { | ||||||
|  |          "kind": "youtube#searchListResponse", | ||||||
|  |          "etag": "xmg9xJZuZD438sF4hb-VcBBREXc/YJQDcTBCDcaBvl-sRZJoXdvy1ME", | ||||||
|  |          "nextPageToken": "CAUQAA", | ||||||
|  |          "pageInfo": { | ||||||
|  |           "totalResults": 1000000, | ||||||
|  |           "resultsPerPage": 20 | ||||||
|  |          } | ||||||
|  |         } | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=json) | ||||||
|  |         results = youtube_api.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 0) | ||||||
|  | 
 | ||||||
|  |         json = """ | ||||||
|  |         {"toto":{"entry":[] | ||||||
|  |         } | ||||||
|  |         } | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=json) | ||||||
|  |         results = youtube_api.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 0) | ||||||
							
								
								
									
										103
									
								
								searx/tests/engines/test_youtube_noapi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										103
									
								
								searx/tests/engines/test_youtube_noapi.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,103 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  | from collections import defaultdict | ||||||
|  | import mock | ||||||
|  | from searx.engines import youtube_noapi | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestYoutubeNoAPIEngine(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_request(self): | ||||||
|  |         query = 'test_query' | ||||||
|  |         dicto = defaultdict(dict) | ||||||
|  |         dicto['pageno'] = 0 | ||||||
|  |         params = youtube_noapi.request(query, dicto) | ||||||
|  |         self.assertIn('url', params) | ||||||
|  |         self.assertIn(query, params['url']) | ||||||
|  |         self.assertIn('youtube.com', params['url']) | ||||||
|  | 
 | ||||||
|  |     def test_response(self): | ||||||
|  |         self.assertRaises(AttributeError, youtube_noapi.response, None) | ||||||
|  |         self.assertRaises(AttributeError, youtube_noapi.response, []) | ||||||
|  |         self.assertRaises(AttributeError, youtube_noapi.response, '') | ||||||
|  |         self.assertRaises(AttributeError, youtube_noapi.response, '[]') | ||||||
|  | 
 | ||||||
|  |         response = mock.Mock(text='<html></html>') | ||||||
|  |         self.assertEqual(youtube_noapi.response(response), []) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <ol id="item-section-063864" class="item-section"> | ||||||
|  |             <li> | ||||||
|  |                 <div class="yt-lockup yt-lockup-tile yt-lockup-video vve-check clearfix yt-uix-tile" | ||||||
|  |                 data-context-item-id="DIVZCPfAOeM" | ||||||
|  |                 data-visibility-tracking="CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JECx_-GK5uqMpcIB"> | ||||||
|  |                 <div class="yt-lockup-dismissable"><div class="yt-lockup-thumbnail contains-addto"> | ||||||
|  |                 <a aria-hidden="true" href="/watch?v=DIVZCPfAOeM" class=" yt-uix-sessionlink pf-link" | ||||||
|  |                 data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA"> | ||||||
|  |                 <div class="yt-thumb video-thumb"><img src="//i.ytimg.com/vi/DIVZCPfAOeM/mqdefault.jpg" | ||||||
|  |                 width="196" height="110"/></div><span class="video-time" aria-hidden="true">11:35</span></a> | ||||||
|  |                 <span class="thumb-menu dark-overflow-action-menu video-actions"> | ||||||
|  |                 </span> | ||||||
|  |                 </div> | ||||||
|  |                 <div class="yt-lockup-content"> | ||||||
|  |                 <h3 class="yt-lockup-title"> | ||||||
|  |                 <a href="/watch?v=DIVZCPfAOeM" | ||||||
|  |                 class="yt-uix-tile-link yt-ui-ellipsis yt-ui-ellipsis-2 yt-uix-sessionlink spf-link" | ||||||
|  |                 data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JFIEdGVzdA" | ||||||
|  |                 title="Top Speed Test Kawasaki Ninja H2 (Thailand) By. MEHAY SUPERBIKE" | ||||||
|  |                 aria-describedby="description-id-259079" rel="spf-prefetch" dir="ltr"> | ||||||
|  |                 Title | ||||||
|  |                 </a> | ||||||
|  |                 <span class="accessible-description" id="description-id-259079"> - Durée : 11:35.</span> | ||||||
|  |                 </h3> | ||||||
|  |                 <div class="yt-lockup-byline">de | ||||||
|  |                 <a href="/user/mheejapan" class=" yt-uix-sessionlink spf-link g-hovercard" | ||||||
|  |                 data-sessionlink="itct=CBgQ3DAYACITCPGXnYau6sUCFZEIHAod-VQASCj0JA" data-ytid="UCzEesu54Hjs0uRKmpy66qeA" | ||||||
|  |                 data-name="">MEHAY SUPERBIKE</a></div><div class="yt-lockup-meta"> | ||||||
|  |                 <ul class="yt-lockup-meta-info"> | ||||||
|  |                     <li>il y a 20 heures</li> | ||||||
|  |                     <li>8 424 vues</li> | ||||||
|  |                 </ul> | ||||||
|  |                 </div> | ||||||
|  |                 <div class="yt-lockup-description yt-ui-ellipsis yt-ui-ellipsis-2" dir="ltr"> | ||||||
|  |                 Description | ||||||
|  |                 </div> | ||||||
|  |                 <div class="yt-lockup-badges"> | ||||||
|  |                 <ul class="yt-badge-list "> | ||||||
|  |                     <li class="yt-badge-item" > | ||||||
|  |                         <span class="yt-badge">Nouveauté</span> | ||||||
|  |                     </li> | ||||||
|  |                     <li class="yt-badge-item" ><span class="yt-badge " >HD</span></li> | ||||||
|  |                 </ul> | ||||||
|  |                 </div> | ||||||
|  |                 <div class="yt-lockup-action-menu yt-uix-menu-container"> | ||||||
|  |                 <div class="yt-uix-menu yt-uix-videoactionmenu hide-until-delayloaded" | ||||||
|  |                 data-video-id="DIVZCPfAOeM" data-menu-content-id="yt-uix-videoactionmenu-menu"> | ||||||
|  |                 </div> | ||||||
|  |                 </div> | ||||||
|  |                 </div> | ||||||
|  |                 </div> | ||||||
|  |                 </div> | ||||||
|  |             </li> | ||||||
|  |         </ol> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         results = youtube_noapi.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  |         self.assertEqual(results[0]['title'], 'Title') | ||||||
|  |         self.assertEqual(results[0]['url'], 'https://www.youtube.com/watch?v=DIVZCPfAOeM') | ||||||
|  |         self.assertEqual(results[0]['content'], 'Description') | ||||||
|  |         self.assertEqual(results[0]['thumbnail'], 'https://i.ytimg.com/vi/DIVZCPfAOeM/hqdefault.jpg') | ||||||
|  |         self.assertTrue('DIVZCPfAOeM' in results[0]['embedded']) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <ol id="item-section-063864" class="item-section"> | ||||||
|  |             <li> | ||||||
|  |             </li> | ||||||
|  |         </ol> | ||||||
|  |         """ | ||||||
|  |         response = mock.Mock(text=html) | ||||||
|  |         results = youtube_noapi.response(response) | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 0) | ||||||
| @ -39,4 +39,6 @@ from searx.tests.engines.test_www500px import *  # noqa | |||||||
| from searx.tests.engines.test_yacy import *  # noqa | from searx.tests.engines.test_yacy import *  # noqa | ||||||
| from searx.tests.engines.test_yahoo import *  # noqa | from searx.tests.engines.test_yahoo import *  # noqa | ||||||
| from searx.tests.engines.test_youtube import *  # noqa | from searx.tests.engines.test_youtube import *  # noqa | ||||||
|  | from searx.tests.engines.test_youtube_api import *  # noqa | ||||||
|  | from searx.tests.engines.test_youtube_noapi import *  # noqa | ||||||
| from searx.tests.engines.test_yahoo_news import *  # noqa | from searx.tests.engines.test_yahoo_news import *  # noqa | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user