| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | """
 | 
					
						
							| 
									
										
										
										
											2017-08-31 21:32:30 +02:00
										 |  |  |  Nyaa.si (Anime Bittorrent tracker) | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-04-27 16:55:42 +02:00
										 |  |  |  @website      https://nyaa.si/ | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  |  @provide-api  no | 
					
						
							|  |  |  |  @using-api    no | 
					
						
							|  |  |  |  @results      HTML | 
					
						
							|  |  |  |  @stable       no (HTML can change) | 
					
						
							|  |  |  |  @parse        url, title, content, seed, leech, torrentfile | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from lxml import html | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from urllib.parse import urlencode | 
					
						
							| 
									
										
										
										
											2020-10-02 18:13:56 +02:00
										 |  |  | from searx.utils import extract_text, get_torrent_size, int_or_zero | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # engine dependent config | 
					
						
							|  |  |  | categories = ['files', 'images', 'videos', 'music'] | 
					
						
							|  |  |  | paging = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # search-url | 
					
						
							| 
									
										
										
										
											2018-04-27 16:55:42 +02:00
										 |  |  | base_url = 'https://nyaa.si/' | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | search_url = base_url + '?page=search&{query}&offset={offset}' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # xpath queries | 
					
						
							| 
									
										
										
										
											2017-08-31 21:32:30 +02:00
										 |  |  | xpath_results = '//table[contains(@class, "torrent-list")]//tr[not(th)]' | 
					
						
							|  |  |  | xpath_category = './/td[1]/a[1]' | 
					
						
							|  |  |  | xpath_title = './/td[2]/a[last()]' | 
					
						
							|  |  |  | xpath_torrent_links = './/td[3]/a' | 
					
						
							|  |  |  | xpath_filesize = './/td[4]/text()' | 
					
						
							|  |  |  | xpath_seeds = './/td[6]/text()' | 
					
						
							|  |  |  | xpath_leeches = './/td[7]/text()' | 
					
						
							|  |  |  | xpath_downloads = './/td[8]/text()' | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-27 00:23:17 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | # do search-request | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							|  |  |  |     query = urlencode({'term': query}) | 
					
						
							|  |  |  |     params['url'] = search_url.format(query=query, offset=params['pageno']) | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get response from search-request | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     dom = html.fromstring(resp.text) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for result in dom.xpath(xpath_results): | 
					
						
							| 
									
										
										
										
											2017-08-31 21:32:30 +02:00
										 |  |  |         # defaults | 
					
						
							|  |  |  |         filesize = 0 | 
					
						
							|  |  |  |         magnet_link = "" | 
					
						
							|  |  |  |         torrent_link = "" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  |         # category in which our torrent belongs | 
					
						
							| 
									
										
										
										
											2017-09-04 20:05:04 +02:00
										 |  |  |         try: | 
					
						
							|  |  |  |             category = result.xpath(xpath_category)[0].attrib.get('title') | 
					
						
							|  |  |  |         except: | 
					
						
							|  |  |  |             pass | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # torrent title | 
					
						
							|  |  |  |         page_a = result.xpath(xpath_title)[0] | 
					
						
							| 
									
										
										
										
											2016-12-09 11:44:24 +01:00
										 |  |  |         title = extract_text(page_a) | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # link to the page | 
					
						
							| 
									
										
										
										
											2017-08-31 21:32:30 +02:00
										 |  |  |         href = base_url + page_a.attrib.get('href') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for link in result.xpath(xpath_torrent_links): | 
					
						
							|  |  |  |             url = link.attrib.get('href') | 
					
						
							|  |  |  |             if 'magnet' in url: | 
					
						
							|  |  |  |                 # link to the magnet | 
					
						
							|  |  |  |                 magnet_link = url | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 # link to the torrent file | 
					
						
							|  |  |  |                 torrent_link = url | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-09-04 20:05:04 +02:00
										 |  |  |         # seed count | 
					
						
							|  |  |  |         seed = int_or_zero(result.xpath(xpath_seeds)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # leech count | 
					
						
							|  |  |  |         leech = int_or_zero(result.xpath(xpath_leeches)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # torrent downloads count | 
					
						
							|  |  |  |         downloads = int_or_zero(result.xpath(xpath_downloads)) | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-31 21:32:30 +02:00
										 |  |  |         # let's try to calculate the torrent size | 
					
						
							|  |  |  |         try: | 
					
						
							|  |  |  |             filesize_info = result.xpath(xpath_filesize)[0] | 
					
						
							|  |  |  |             filesize, filesize_multiplier = filesize_info.split() | 
					
						
							|  |  |  |             filesize = get_torrent_size(filesize, filesize_multiplier) | 
					
						
							|  |  |  |         except: | 
					
						
							|  |  |  |             pass | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # content string contains all information not included into template | 
					
						
							|  |  |  |         content = 'Category: "{category}". Downloaded {downloads} times.' | 
					
						
							|  |  |  |         content = content.format(category=category, downloads=downloads) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         results.append({'url': href, | 
					
						
							|  |  |  |                         'title': title, | 
					
						
							|  |  |  |                         'content': content, | 
					
						
							|  |  |  |                         'seed': seed, | 
					
						
							|  |  |  |                         'leech': leech, | 
					
						
							| 
									
										
										
										
											2017-08-31 21:32:30 +02:00
										 |  |  |                         'filesize': filesize, | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  |                         'torrentfile': torrent_link, | 
					
						
							| 
									
										
										
										
											2017-08-31 21:32:30 +02:00
										 |  |  |                         'magnetlink': magnet_link, | 
					
						
							| 
									
										
										
										
											2016-03-24 19:24:37 +01:00
										 |  |  |                         'template': 'torrent.html'}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return results |