| 
									
										
										
										
											2015-05-02 15:45:17 +02:00
										 |  |  | """
 | 
					
						
							|  |  |  |  Google (News) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @website     https://www.google.com | 
					
						
							|  |  |  |  @provide-api yes (https://developers.google.com/web-search/docs/), | 
					
						
							|  |  |  |               deprecated! | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @using-api   yes | 
					
						
							|  |  |  |  @results     JSON | 
					
						
							|  |  |  |  @stable      yes (but deprecated) | 
					
						
							|  |  |  |  @parse       url, title, content, publishedDate | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from urllib import urlencode | 
					
						
							|  |  |  | from json import loads | 
					
						
							| 
									
										
										
										
											2014-03-18 13:19:50 +01:00
										 |  |  | from dateutil import parser | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | # search-url | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  | categories = ['news'] | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | paging = True | 
					
						
							|  |  |  | language_support = True | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | # engine dependent config | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  | url = 'https://ajax.googleapis.com/' | 
					
						
							| 
									
										
										
										
											2015-01-31 16:38:03 +01:00
										 |  |  | search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={lang}' | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | # do search-request | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  | def request(query, params): | 
					
						
							|  |  |  |     offset = (params['pageno'] - 1) * 8 | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  |     language = 'en-US' | 
					
						
							|  |  |  |     if params['language'] != 'all': | 
					
						
							|  |  |  |         language = params['language'].replace('_', '-') | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  |     params['url'] = search_url.format(offset=offset, | 
					
						
							|  |  |  |                                       query=urlencode({'q': query}), | 
					
						
							| 
									
										
										
										
											2015-01-31 16:38:03 +01:00
										 |  |  |                                       lang=language) | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | # get response from search-request | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  |     search_res = loads(resp.text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  |     # return empty array if there are no results | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  |     if not search_res.get('responseData', {}).get('results'): | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  |     # parse results | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  |     for result in search_res['responseData']['results']: | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  |         # parse publishedDate | 
					
						
							| 
									
										
										
										
											2014-03-18 13:19:50 +01:00
										 |  |  |         publishedDate = parser.parse(result['publishedDate']) | 
					
						
							| 
									
										
										
										
											2015-01-31 16:38:03 +01:00
										 |  |  |         if 'url' not in result: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2014-03-14 09:55:04 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  |         # append result | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  |         results.append({'url': result['unescapedUrl'], | 
					
						
							|  |  |  |                         'title': result['titleNoFormatting'], | 
					
						
							| 
									
										
										
										
											2014-03-15 20:20:41 +01:00
										 |  |  |                         'publishedDate': publishedDate, | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  |                         'content': result['content']}) | 
					
						
							| 
									
										
										
										
											2014-09-01 15:10:05 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # return results | 
					
						
							| 
									
										
										
										
											2014-03-04 13:11:04 +01:00
										 |  |  |     return results |