| 
									
										
										
										
											2015-05-02 15:45:17 +02:00
										 |  |  | """
 | 
					
						
							|  |  |  |  Twitter (Social media) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @website     https://twitter.com/ | 
					
						
							|  |  |  |  @provide-api yes (https://dev.twitter.com/docs/using-search) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @using-api   no | 
					
						
							|  |  |  |  @results     HTML (using search portal) | 
					
						
							|  |  |  |  @stable      no (HTML can change) | 
					
						
							|  |  |  |  @parse       url, title, content | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @todo        publishedDate | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | from lxml import html | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  | from datetime import datetime | 
					
						
							| 
									
										
										
										
											2015-02-04 19:39:31 +01:00
										 |  |  | from searx.engines.xpath import extract_text | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  | from searx.url_utils import urlencode, urljoin | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | # engine dependent config | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | categories = ['social media'] | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | language_support = True | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | # search-url | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | base_url = 'https://twitter.com/' | 
					
						
							| 
									
										
										
										
											2015-02-04 19:39:31 +01:00
										 |  |  | search_url = base_url + 'search?' | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | # specific xpath variables | 
					
						
							|  |  |  | results_xpath = '//li[@data-item-type="tweet"]' | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  | avatar_xpath = './/img[contains(@class, "avatar")]/@src' | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | link_xpath = './/small[@class="time"]//a' | 
					
						
							| 
									
										
										
										
											2015-05-02 18:20:12 +02:00
										 |  |  | title_xpath = './/span[contains(@class, "username")]' | 
					
						
							|  |  |  | content_xpath = './/p[contains(@class, "tweet-text")]' | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  | timestamp_xpath = './/span[contains(@class,"_timestamp")]' | 
					
						
							| 
									
										
										
										
											2014-01-20 02:31:20 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | # do search-request | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | def request(query, params): | 
					
						
							|  |  |  |     params['url'] = search_url + urlencode({'q': query}) | 
					
						
							| 
									
										
										
										
											2019-01-06 15:27:46 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # set language if specified | 
					
						
							|  |  |  |     if params['language'] != 'all': | 
					
						
							|  |  |  |         params['cookies']['lang'] = params['language'].split('-')[0] | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         params['cookies']['lang'] = 'en' | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | # get response from search-request | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  |     dom = html.fromstring(resp.text) | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # parse results | 
					
						
							|  |  |  |     for tweet in dom.xpath(results_xpath): | 
					
						
							| 
									
										
										
										
											2015-08-25 13:12:51 +02:00
										 |  |  |         try: | 
					
						
							|  |  |  |             link = tweet.xpath(link_xpath)[0] | 
					
						
							|  |  |  |             content = extract_text(tweet.xpath(content_xpath)[0]) | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  |             img_src = tweet.xpath(avatar_xpath)[0] | 
					
						
							|  |  |  |             img_src = img_src.replace('_bigger', '_normal') | 
					
						
							| 
									
										
										
										
											2015-08-25 13:12:51 +02:00
										 |  |  |         except Exception: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  |         url = urljoin(base_url, link.attrib.get('href')) | 
					
						
							| 
									
										
										
										
											2015-02-04 19:39:31 +01:00
										 |  |  |         title = extract_text(tweet.xpath(title_xpath)) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  |         pubdate = tweet.xpath(timestamp_xpath) | 
					
						
							|  |  |  |         if len(pubdate) > 0: | 
					
						
							| 
									
										
										
										
											2014-12-29 21:31:04 +01:00
										 |  |  |             timestamp = float(pubdate[0].attrib.get('data-time')) | 
					
						
							|  |  |  |             publishedDate = datetime.fromtimestamp(timestamp, None) | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  |             # append result | 
					
						
							|  |  |  |             results.append({'url': url, | 
					
						
							|  |  |  |                             'title': title, | 
					
						
							|  |  |  |                             'content': content, | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  |                             'img_src': img_src, | 
					
						
							| 
									
										
										
										
											2014-12-28 22:57:59 +01:00
										 |  |  |                             'publishedDate': publishedDate}) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # append result | 
					
						
							|  |  |  |             results.append({'url': url, | 
					
						
							|  |  |  |                             'title': title, | 
					
						
							| 
									
										
										
										
											2017-02-12 14:58:49 +01:00
										 |  |  |                             'content': content, | 
					
						
							|  |  |  |                             'img_src': img_src}) | 
					
						
							| 
									
										
										
										
											2014-09-02 20:14:52 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # return results | 
					
						
							| 
									
										
										
										
											2013-10-20 10:40:02 +02:00
										 |  |  |     return results |