| 
									
										
										
										
											2013-10-18 02:15:26 +02:00
										 |  |  | #!/usr/bin/env python | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-23 23:55:37 +02:00
										 |  |  | from urllib import urlencode | 
					
						
							| 
									
										
										
										
											2013-10-18 02:15:26 +02:00
										 |  |  | from lxml import html | 
					
						
							|  |  |  | from urlparse import urljoin | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-18 09:35:29 +02:00
										 |  |  | categories = ['images'] | 
					
						
							| 
									
										
										
										
											2013-10-18 02:15:26 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2013-10-23 23:55:37 +02:00
										 |  |  | url = 'https://secure.flickr.com/' | 
					
						
							| 
									
										
										
										
											2013-12-30 22:24:42 +01:00
										 |  |  | search_url = url+'search/?{query}' | 
					
						
							| 
									
										
										
										
											2013-10-18 02:15:26 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2013-10-23 23:55:37 +02:00
										 |  |  |     params['url'] = search_url.format(query=urlencode({'q': query})) | 
					
						
							| 
									
										
										
										
											2013-10-18 02:15:26 +02:00
										 |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     global base_url | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  |     dom = html.fromstring(resp.text) | 
					
						
							| 
									
										
										
										
											2013-10-18 09:35:29 +02:00
										 |  |  |     for result in dom.xpath('//div[@id="thumbnails"]//a[@class="rapidnofollow photo-click" and @data-track="photo-click"]'): | 
					
						
							| 
									
										
										
										
											2013-10-23 23:55:37 +02:00
										 |  |  |         href = urljoin(url, result.attrib.get('href')) | 
					
						
							| 
									
										
										
										
											2013-10-18 09:35:29 +02:00
										 |  |  |         img = result.xpath('.//img')[0] | 
					
						
							|  |  |  |         title = img.attrib.get('alt', '') | 
					
						
							| 
									
										
										
										
											2013-10-22 23:34:45 +02:00
										 |  |  |         img_src = img.attrib.get('data-defer-src') | 
					
						
							|  |  |  |         if not img_src: | 
					
						
							|  |  |  |             continue | 
					
						
							| 
									
										
										
										
											2013-10-23 23:55:37 +02:00
										 |  |  |         results.append({'url': href, 'title': title, 'img_src': img_src, 'template': 'images.html'}) | 
					
						
							| 
									
										
										
										
											2013-10-18 02:15:26 +02:00
										 |  |  |     return results |