| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | ## Yacy (Web, Images, Videos, Music, Files) | 
					
						
							|  |  |  | #  | 
					
						
							|  |  |  | # @website     http://yacy.net | 
					
						
							|  |  |  | # @provide-api yes (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch) | 
					
						
							|  |  |  | #  | 
					
						
							|  |  |  | # @using-api   yes | 
					
						
							|  |  |  | # @results     JSON | 
					
						
							|  |  |  | # @stable      yes | 
					
						
							|  |  |  | # @parse       (general)    url, title, content, publishedDate | 
					
						
							|  |  |  | # @parse       (images)     url, title, img_src | 
					
						
							|  |  |  | # | 
					
						
							|  |  |  | # @todo        parse video, audio and file results | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | from json import loads | 
					
						
							| 
									
										
										
										
											2014-01-05 00:46:42 +01:00
										 |  |  | from urllib import urlencode | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | from dateutil import parser | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # engine dependent config | 
					
						
							|  |  |  | categories = ['general', 'images'] #TODO , 'music', 'videos', 'files' | 
					
						
							|  |  |  | paging = True | 
					
						
							|  |  |  | language_support = True | 
					
						
							|  |  |  | number_of_results = 5 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # search-url | 
					
						
							|  |  |  | base_url = 'http://localhost:8090' | 
					
						
							|  |  |  | search_url = '/yacysearch.json?{query}&startRecord={offset}&maximumRecords={limit}&contentdom={search_type}&resource=global' | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | # yacy specific type-definitions | 
					
						
							|  |  |  | search_types = {'general': 'text', | 
					
						
							|  |  |  |                 'images': 'image', | 
					
						
							|  |  |  |                 'files': 'app',                | 
					
						
							|  |  |  |                 'music': 'audio', | 
					
						
							|  |  |  |                 'videos': 'video'} | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | # do search-request | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     offset = (params['pageno'] - 1) * number_of_results | 
					
						
							|  |  |  |     search_type = search_types.get(params['category'], '0') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     params['url'] = base_url + search_url.format(query=urlencode({'query': query}), | 
					
						
							|  |  |  |                                                  offset=offset, | 
					
						
							|  |  |  |                                                  limit=number_of_results, | 
					
						
							|  |  |  |                                                  search_type=search_type) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # add language tag if specified | 
					
						
							|  |  |  |     if params['language'] != 'all': | 
					
						
							|  |  |  |         params['url'] += '&lr=lang_' + params['language'].split('_')[0] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  | # get response from search-request | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | def response(resp): | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |     raw_search_results = loads(resp.text) | 
					
						
							| 
									
										
										
										
											2014-01-05 00:46:42 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     # return empty array if there are no results | 
					
						
							| 
									
										
										
										
											2014-02-11 13:13:51 +01:00
										 |  |  |     if not raw_search_results: | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     search_results = raw_search_results.get('channels', {})[0].get('items', []) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     if resp.search_params['category'] == 'general': | 
					
						
							|  |  |  |         # parse general results | 
					
						
							|  |  |  |         for result in search_results: | 
					
						
							|  |  |  |             publishedDate = parser.parse(result['pubDate']) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |             # append result | 
					
						
							|  |  |  |             results.append({'url': result['link'], | 
					
						
							|  |  |  |                         'title': result['title'], | 
					
						
							|  |  |  |                         'content': result['description'], | 
					
						
							|  |  |  |                         'publishedDate': publishedDate}) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     elif resp.search_params['category'] == 'images': | 
					
						
							|  |  |  |         # parse image results | 
					
						
							|  |  |  |         for result in search_results: | 
					
						
							|  |  |  |             # append result | 
					
						
							|  |  |  |             results.append({'url': result['url'], | 
					
						
							|  |  |  |                         'title': result['title'], | 
					
						
							|  |  |  |                         'content': '', | 
					
						
							|  |  |  |                         'img_src': result['image'], | 
					
						
							|  |  |  |                         'template': 'images.html'}) | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     #TODO parse video, audio and file results | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-03 13:47:12 +02:00
										 |  |  |     # return results | 
					
						
							| 
									
										
										
										
											2013-12-29 21:39:23 +01:00
										 |  |  |     return results |