2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 Acgsou (Japanese Animation/Music/Comics Bittorrent tracker)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 @website      https://www.acgsou.com/
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 @provide-api  no
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 @using-api    no
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 @results      HTML
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 @stable       no (HTML can change)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								 @parse        url, title, content, seed, leech, torrentfile
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								"""
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from urllib.parse import urlencode
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								from lxml import html
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-26 15:49:33 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								from searx.utils import extract_text, get_torrent_size, eval_xpath_list, eval_xpath_getindex
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# engine dependent config
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								categories = ['files', 'images', 'videos', 'music']
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								paging = True
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# search-url
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-09 17:33:18 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								base_url = 'https://www.acgsou.com/'
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								search_url = base_url + 'search.php?{query}&page={offset}'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								# xpath queries
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								xpath_results = '//table[contains(@class, "list_style table_fixed")]//tr[not(th)]'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								xpath_category = './/td[2]/a[1]'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								xpath_title = './/td[3]/a[last()]'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								xpath_torrent_links = './/td[3]/a'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								xpath_filesize = './/td[4]/text()'
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-27 07:29:17 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def request(query, params):
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    query = urlencode({'keyword': query})
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    params['url'] = search_url.format(query=query, offset=params['pageno'])
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return params
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-27 07:29:17 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								def response(resp):
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-27 07:36:11 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    results = []
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    dom = html.fromstring(resp.text)
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-26 15:49:33 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								    for result in eval_xpath_list(dom, xpath_results):
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        # defaults
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        filesize = 0
							 | 
						
					
						
							
								
									
										
										
										
											2020-12-09 17:33:18 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        magnet_link = "magnet:?xt=urn:btih:{}&tr=https://tracker.acgsou.com:2710/announce"
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-26 15:49:33 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        category = extract_text(eval_xpath_getindex(result, xpath_category, 0, default=[]))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        page_a = eval_xpath_getindex(result, xpath_title, 0)
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        title = extract_text(page_a)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        href = base_url + page_a.attrib.get('href')
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-27 07:23:58 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        magnet_link = magnet_link.format(page_a.attrib.get('href')[5:-5])
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2020-11-26 15:49:33 +01:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        filesize_info = eval_xpath_getindex(result, xpath_filesize, 0, default=None)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        if filesize_info:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            try:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                filesize = filesize_info[:-2]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                filesize_multiplier = filesize_info[-2:]
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                filesize = get_torrent_size(filesize, filesize_multiplier)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								            except:
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                pass
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-27 14:36:15 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime
							 | 
						
					
						
							
								
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								        content = 'Category: "{category}".'
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        content = content.format(category=category)
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-27 07:29:17 +02:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2018-04-26 21:42:31 +02:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								        results.append({'url': href,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        'title': title,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        'content': content,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        'filesize': filesize,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        'magnetlink': magnet_link,
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								                        'template': 'torrent.html'})
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								    return results
							 |