| 
									
										
										
										
											2015-05-02 15:45:17 +02:00
										 |  |  | """
 | 
					
						
							|  |  |  |  Digg (News, Social media) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @website     https://digg.com/ | 
					
						
							|  |  |  |  @provide-api no | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @using-api   no | 
					
						
							|  |  |  |  @results     HTML (using search portal) | 
					
						
							|  |  |  |  @stable      no (HTML can change) | 
					
						
							|  |  |  |  @parse       url, title, content, publishedDate, thumbnail | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-30 18:20:43 +02:00
										 |  |  | import random | 
					
						
							|  |  |  | import string | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  | from json import loads | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from urllib.parse import urlencode | 
					
						
							| 
									
										
										
										
											2019-10-16 15:11:27 +02:00
										 |  |  | from datetime import datetime | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # engine dependent config | 
					
						
							|  |  |  | categories = ['news', 'social media'] | 
					
						
							|  |  |  | paging = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # search-url | 
					
						
							|  |  |  | base_url = 'https://digg.com/' | 
					
						
							| 
									
										
										
										
											2019-10-16 15:11:27 +02:00
										 |  |  | search_url = base_url + 'api/search/?{query}&from={position}&size=20&format=html' | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # specific xpath variables | 
					
						
							|  |  |  | results_xpath = '//article' | 
					
						
							|  |  |  | link_xpath = './/small[@class="time"]//a' | 
					
						
							|  |  |  | title_xpath = './/h2//a//text()' | 
					
						
							|  |  |  | content_xpath = './/p//text()' | 
					
						
							|  |  |  | pubdate_xpath = './/time' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-30 21:14:12 +02:00
										 |  |  | digg_cookie_chars = string.ascii_uppercase + string.ascii_lowercase +\ | 
					
						
							|  |  |  |     string.digits + "+_" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # do search-request | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2019-10-16 15:11:27 +02:00
										 |  |  |     offset = (params['pageno'] - 1) * 20 | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  |     params['url'] = search_url.format(position=offset, | 
					
						
							| 
									
										
										
										
											2019-10-16 15:11:27 +02:00
										 |  |  |                                       query=urlencode({'q': query})) | 
					
						
							| 
									
										
										
										
											2017-08-30 18:20:43 +02:00
										 |  |  |     params['cookies']['frontend.auid'] = ''.join(random.choice( | 
					
						
							| 
									
										
										
										
											2017-08-30 21:14:12 +02:00
										 |  |  |         digg_cookie_chars) for _ in range(22)) | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get response from search-request | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     search_result = loads(resp.text) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # parse results | 
					
						
							| 
									
										
										
										
											2019-10-16 15:11:27 +02:00
										 |  |  |     for result in search_result['mapped']: | 
					
						
							| 
									
										
										
										
											2015-05-02 11:43:12 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-16 15:11:27 +02:00
										 |  |  |         published = datetime.strptime(result['created']['ISO'], "%Y-%m-%d %H:%M:%S") | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  |         # append result | 
					
						
							| 
									
										
										
										
											2019-10-16 15:11:27 +02:00
										 |  |  |         results.append({'url': result['url'], | 
					
						
							|  |  |  |                         'title': result['title'], | 
					
						
							|  |  |  |                         'content': result['excerpt'], | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  |                         'template': 'videos.html', | 
					
						
							| 
									
										
										
										
											2019-10-16 15:11:27 +02:00
										 |  |  |                         'publishedDate': published, | 
					
						
							|  |  |  |                         'thumbnail': result['images']['thumbImage']}) | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # return results | 
					
						
							|  |  |  |     return results |