| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | from urlparse import urljoin | 
					
						
							|  |  |  | from urllib import urlencode | 
					
						
							|  |  |  | from lxml import html | 
					
						
							| 
									
										
										
										
											2013-11-09 18:39:20 +01:00
										 |  |  | from cgi import escape | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | categories = ['social media'] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | base_url = 'https://twitter.com/' | 
					
						
							|  |  |  | search_url = base_url+'search?' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     global search_url | 
					
						
							|  |  |  |     params['url'] = search_url + urlencode({'q': query}) | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     global base_url | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  |     dom = html.fromstring(resp.text) | 
					
						
							|  |  |  |     for tweet in dom.xpath('//li[@data-item-type="tweet"]'): | 
					
						
							|  |  |  |         link = tweet.xpath('.//small[@class="time"]//a')[0] | 
					
						
							|  |  |  |         url = urljoin(base_url, link.attrib.get('href')) | 
					
						
							|  |  |  |         title = ''.join(tweet.xpath('.//span[@class="username js-action-profile-name"]//text()')) | 
					
						
							| 
									
										
										
										
											2013-11-09 18:39:20 +01:00
										 |  |  |         content = escape(''.join(tweet.xpath('.//p[@class="js-tweet-text tweet-text"]//text()'))) | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  |         results.append({'url': url, 'title': title, 'content': content}) | 
					
						
							|  |  |  |     return results |