| 
									
										
										
										
											2015-05-02 15:45:17 +02:00
										 |  |  | """
 | 
					
						
							|  |  |  |  Soundcloud (Music) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @website     https://soundcloud.com | 
					
						
							|  |  |  |  @provide-api yes (https://developers.soundcloud.com/) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @using-api   yes | 
					
						
							|  |  |  |  @results     JSON | 
					
						
							|  |  |  |  @stable      yes | 
					
						
							|  |  |  |  @parse       url, title, content, publishedDate, embedded | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-30 01:20:14 +01:00
										 |  |  | import re | 
					
						
							|  |  |  | from StringIO import StringIO | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  | from json import loads | 
					
						
							| 
									
										
										
										
											2015-12-30 01:20:14 +01:00
										 |  |  | from lxml import etree | 
					
						
							| 
									
										
										
										
											2015-01-05 02:04:23 +01:00
										 |  |  | from urllib import urlencode, quote_plus | 
					
						
							|  |  |  | from dateutil import parser | 
					
						
							| 
									
										
										
										
											2015-12-30 01:20:14 +01:00
										 |  |  | from searx import logger | 
					
						
							|  |  |  | from searx.poolrequests import get as http_get | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | # engine dependent config | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  | categories = ['music'] | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | paging = True | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | # search-url | 
					
						
							|  |  |  | url = 'https://api.soundcloud.com/' | 
					
						
							| 
									
										
										
										
											2014-12-16 17:26:16 +01:00
										 |  |  | search_url = url + 'search?{query}'\ | 
					
						
							|  |  |  |                          '&facet=model'\ | 
					
						
							|  |  |  |                          '&limit=20'\ | 
					
						
							|  |  |  |                          '&offset={offset}'\ | 
					
						
							|  |  |  |                          '&linked_partitioning=1'\ | 
					
						
							|  |  |  |                          '&client_id={client_id}'   # noqa | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-01-05 02:04:23 +01:00
										 |  |  | embedded_url = '<iframe width="100%" height="166" ' +\ | 
					
						
							|  |  |  |     'scrolling="no" frameborder="no" ' +\ | 
					
						
							|  |  |  |     'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-30 01:20:14 +01:00
										 |  |  | def get_client_id(): | 
					
						
							|  |  |  |     response = http_get("https://soundcloud.com") | 
					
						
							|  |  |  |     rx_namespace = {"re": "http://exslt.org/regular-expressions"} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if response.ok: | 
					
						
							|  |  |  |         tree = etree.parse(StringIO(response.content), etree.HTMLParser()) | 
					
						
							|  |  |  |         script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace) | 
					
						
							|  |  |  |         app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # extracts valid app_js urls from soundcloud.com content | 
					
						
							|  |  |  |         for app_js_url in app_js_urls: | 
					
						
							|  |  |  |             # gets app_js and searches for the clientid | 
					
						
							|  |  |  |             response = http_get(app_js_url) | 
					
						
							|  |  |  |             if response.ok: | 
					
						
							|  |  |  |                 cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I) | 
					
						
							|  |  |  |                 if cids is not None and len(cids.groups()): | 
					
						
							|  |  |  |                     return cids.groups()[0] | 
					
						
							|  |  |  |     logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") | 
					
						
							|  |  |  |     return "" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-15 19:49:23 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-30 01:20:14 +01:00
										 |  |  | # api-key | 
					
						
							|  |  |  | guest_client_id = get_client_id() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | # do search-request | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2014-01-30 01:50:15 +01:00
										 |  |  |     offset = (params['pageno'] - 1) * 20 | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-30 01:50:15 +01:00
										 |  |  |     params['url'] = search_url.format(query=urlencode({'q': query}), | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  |                                       offset=offset, | 
					
						
							|  |  |  |                                       client_id=guest_client_id) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | # get response from search-request | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  |     search_res = loads(resp.text) | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # parse results | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  |     for result in search_res.get('collection', []): | 
					
						
							| 
									
										
										
										
											2013-10-20 00:52:32 +02:00
										 |  |  |         if result['kind'] in ('track', 'playlist'): | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  |             title = result['title'] | 
					
						
							|  |  |  |             content = result['description'] | 
					
						
							| 
									
										
										
										
											2015-01-05 02:04:23 +01:00
										 |  |  |             publishedDate = parser.parse(result['last_modified']) | 
					
						
							|  |  |  |             uri = quote_plus(result['uri']) | 
					
						
							|  |  |  |             embedded = embedded_url.format(uri=uri) | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |             # append result | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  |             results.append({'url': result['permalink_url'], | 
					
						
							|  |  |  |                             'title': title, | 
					
						
							| 
									
										
										
										
											2015-01-05 02:04:23 +01:00
										 |  |  |                             'publishedDate': publishedDate, | 
					
						
							|  |  |  |                             'embedded': embedded, | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  |                             'content': content}) | 
					
						
							| 
									
										
										
										
											2014-09-02 18:12:30 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # return results | 
					
						
							| 
									
										
										
										
											2013-10-17 21:21:23 +02:00
										 |  |  |     return results |