| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  | """
 | 
					
						
							| 
									
										
										
										
											2020-07-26 15:56:54 +02:00
										 |  |  |  Torrentz2.is (BitTorrent meta-search engine) | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-07-26 15:56:54 +02:00
										 |  |  |  @website      https://torrentz2.is/ | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  |  @provide-api  no | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |  @using-api    no | 
					
						
							|  |  |  |  @results      HTML | 
					
						
							|  |  |  |  @stable       no (HTML can change, although unlikely, | 
					
						
							| 
									
										
										
										
											2020-07-26 15:56:54 +02:00
										 |  |  |                    see https://torrentz.is/torrentz.btsearch) | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  |  @parse        url, title, publishedDate, seed, leech, filesize, magnetlink | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from urllib.parse import urlencode | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  | from lxml import html | 
					
						
							|  |  |  | from datetime import datetime | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  | from searx.engines.xpath import extract_text | 
					
						
							| 
									
										
										
										
											2017-08-25 00:52:35 +02:00
										 |  |  | from searx.utils import get_torrent_size | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # engine dependent config | 
					
						
							|  |  |  | categories = ['files', 'videos', 'music'] | 
					
						
							|  |  |  | paging = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # search-url | 
					
						
							| 
									
										
										
										
											2020-07-26 15:56:54 +02:00
										 |  |  | # https://torrentz2.is/search?f=EXAMPLE&p=6 | 
					
						
							|  |  |  | base_url = 'https://torrentz2.is/' | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  | search_url = base_url + 'search?{query}' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # do search-request | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     page = params['pageno'] - 1 | 
					
						
							| 
									
										
										
										
											2017-08-25 00:52:35 +02:00
										 |  |  |     query = urlencode({'f': query, 'p': page}) | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  |     params['url'] = search_url.format(query=query) | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get response from search-request | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dom = html.fromstring(resp.text) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for result in dom.xpath('//div[@class="results"]/dl'): | 
					
						
							|  |  |  |         name_cell = result.xpath('./dt')[0] | 
					
						
							|  |  |  |         title = extract_text(name_cell) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # skip rows that do not contain a link to a torrent | 
					
						
							|  |  |  |         links = name_cell.xpath('./a') | 
					
						
							|  |  |  |         if len(links) != 1: | 
					
						
							|  |  |  |             continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # extract url and remove a slash in the beginning | 
					
						
							|  |  |  |         link = links[0].attrib.get('href').lstrip('/') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-25 00:52:35 +02:00
										 |  |  |         seed = 0 | 
					
						
							|  |  |  |         leech = 0 | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             seed = int(result.xpath('./dd/span[4]/text()')[0].replace(',', '')) | 
					
						
							|  |  |  |             leech = int(result.xpath('./dd/span[5]/text()')[0].replace(',', '')) | 
					
						
							|  |  |  |         except: | 
					
						
							|  |  |  |             pass | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         params = { | 
					
						
							|  |  |  |             'url': base_url + link, | 
					
						
							|  |  |  |             'title': title, | 
					
						
							| 
									
										
										
										
											2017-08-25 00:52:35 +02:00
										 |  |  |             'seed': seed, | 
					
						
							|  |  |  |             'leech': leech, | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  |             'template': 'torrent.html' | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # let's try to calculate the torrent size | 
					
						
							|  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2017-08-25 00:52:35 +02:00
										 |  |  |             filesize_info = result.xpath('./dd/span[3]/text()')[0] | 
					
						
							|  |  |  |             filesize, filesize_multiplier = filesize_info.split() | 
					
						
							|  |  |  |             filesize = get_torrent_size(filesize, filesize_multiplier) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             params['filesize'] = filesize | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  |         except: | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  |             pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # does our link contain a valid SHA1 sum? | 
					
						
							|  |  |  |         if re.compile('[0-9a-fA-F]{40}').match(link): | 
					
						
							|  |  |  |             # add a magnet link to the result | 
					
						
							|  |  |  |             params['magnetlink'] = 'magnet:?xt=urn:btih:' + link | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # extract and convert creation date | 
					
						
							|  |  |  |         try: | 
					
						
							| 
									
										
										
										
											2017-08-25 00:52:35 +02:00
										 |  |  |             date_ts = result.xpath('./dd/span[2]')[0].attrib.get('title') | 
					
						
							|  |  |  |             date = datetime.fromtimestamp(float(date_ts)) | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  |             params['publishedDate'] = date | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  |         except: | 
					
						
							| 
									
										
										
										
											2016-03-26 00:28:58 +01:00
										 |  |  |             pass | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         results.append(params) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return results |