Merge pull request #1283 from rinpatch/acgsou-engine
[Feature] Acgsou as a searchable engine
This commit is contained in:
		
						commit
						1a1f9852f1
					
				
							
								
								
									
										75
									
								
								searx/engines/acgsou.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								searx/engines/acgsou.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,75 @@ | |||||||
|  | """ | ||||||
|  |  Acgsou (Japanese Animation/Music/Comics Bittorrent tracker) | ||||||
|  | 
 | ||||||
|  |  @website      https://www.acgsou.com/ | ||||||
|  |  @provide-api  no | ||||||
|  |  @using-api    no | ||||||
|  |  @results      HTML | ||||||
|  |  @stable       no (HTML can change) | ||||||
|  |  @parse        url, title, content, seed, leech, torrentfile | ||||||
|  | """ | ||||||
|  | 
 | ||||||
|  | from lxml import html | ||||||
|  | from searx.engines.xpath import extract_text | ||||||
|  | from searx.url_utils import urlencode | ||||||
|  | from searx.utils import get_torrent_size, int_or_zero | ||||||
|  | 
 | ||||||
|  | # engine dependent config | ||||||
|  | categories = ['files', 'images', 'videos', 'music'] | ||||||
|  | paging = True | ||||||
|  | 
 | ||||||
|  | # search-url | ||||||
|  | base_url = 'https://www.acgsou.com/' | ||||||
|  | search_url = base_url + 'search.php?{query}&page={offset}' | ||||||
|  | # xpath queries | ||||||
|  | xpath_results = '//table[contains(@class, "list_style table_fixed")]//tr[not(th)]' | ||||||
|  | xpath_category = './/td[2]/a[1]' | ||||||
|  | xpath_title = './/td[3]/a[last()]' | ||||||
|  | xpath_torrent_links = './/td[3]/a' | ||||||
|  | xpath_filesize = './/td[4]/text()' | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def request(query, params): | ||||||
|  |     query = urlencode({'keyword': query}) | ||||||
|  |     params['url'] = search_url.format(query=query, offset=params['pageno']) | ||||||
|  |     return params | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def response(resp): | ||||||
|  |     results = [] | ||||||
|  |     dom = html.fromstring(resp.text) | ||||||
|  |     for result in dom.xpath(xpath_results): | ||||||
|  |         # defaults | ||||||
|  |         filesize = 0 | ||||||
|  |         magnet_link = "magnet:?xt=urn:btih:{}&tr=http://tracker.acgsou.com:2710/announce" | ||||||
|  |         torrent_link = "" | ||||||
|  | 
 | ||||||
|  |         try: | ||||||
|  |             category = extract_text(result.xpath(xpath_category)[0]) | ||||||
|  |         except: | ||||||
|  |             pass | ||||||
|  | 
 | ||||||
|  |         page_a = result.xpath(xpath_title)[0] | ||||||
|  |         title = extract_text(page_a) | ||||||
|  |         href = base_url + page_a.attrib.get('href') | ||||||
|  | 
 | ||||||
|  |         magnet_link = magnet_link.format(page_a.attrib.get('href')[5:-5]) | ||||||
|  | 
 | ||||||
|  |         try: | ||||||
|  |             filesize_info = result.xpath(xpath_filesize)[0] | ||||||
|  |             filesize = filesize_info[:-2] | ||||||
|  |             filesize_multiplier = filesize_info[-2:] | ||||||
|  |             filesize = get_torrent_size(filesize, filesize_multiplier) | ||||||
|  |         except: | ||||||
|  |             pass | ||||||
|  |         # I didn't add download/seed/leech count since as I figured out they are generated randomly everytime | ||||||
|  |         content = 'Category: "{category}".' | ||||||
|  |         content = content.format(category=category) | ||||||
|  | 
 | ||||||
|  |         results.append({'url': href, | ||||||
|  |                         'title': title, | ||||||
|  |                         'content': content, | ||||||
|  |                         'filesize': filesize, | ||||||
|  |                         'magnetlink': magnet_link, | ||||||
|  |                         'template': 'torrent.html'}) | ||||||
|  |     return results | ||||||
| @ -434,6 +434,12 @@ engines: | |||||||
|     shortcut : nt |     shortcut : nt | ||||||
|     disabled : True |     disabled : True | ||||||
|    |    | ||||||
|  |   - name : acgsou | ||||||
|  |     engine : acgsou | ||||||
|  |     shortcut : acg | ||||||
|  |     disabled : True | ||||||
|  |     timeout: 5.0 | ||||||
|  | 
 | ||||||
|   - name : openairedatasets |   - name : openairedatasets | ||||||
|     engine : json_engine |     engine : json_engine | ||||||
|     paging : True |     paging : True | ||||||
|  | |||||||
							
								
								
									
										77
									
								
								tests/unit/engines/test_acgsou.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										77
									
								
								tests/unit/engines/test_acgsou.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,77 @@ | |||||||
|  | from collections import defaultdict | ||||||
|  | import mock | ||||||
|  | from searx.engines import acgsou | ||||||
|  | from searx.testing import SearxTestCase | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class TestAcgsouEngine(SearxTestCase): | ||||||
|  | 
 | ||||||
|  |     def test_request(self): | ||||||
|  |         query = 'test_query' | ||||||
|  |         dic = defaultdict(dict) | ||||||
|  |         dic['pageno'] = 1 | ||||||
|  |         params = acgsou.request(query, dic) | ||||||
|  |         self.assertTrue('url' in params) | ||||||
|  |         self.assertTrue(query in params['url']) | ||||||
|  |         self.assertTrue('acgsou.com' in params['url']) | ||||||
|  | 
 | ||||||
|  |     def test_response(self): | ||||||
|  |         resp = mock.Mock(text='<html></html>') | ||||||
|  |         self.assertEqual(acgsou.response(resp), []) | ||||||
|  | 
 | ||||||
|  |         html = """ | ||||||
|  |         <html> | ||||||
|  | <table id="listTable" class="list_style table_fixed"> | ||||||
|  |   <thead class="tcat"> | ||||||
|  |       <tr> | ||||||
|  |         <th axis="string" class="l1 tableHeaderOver">test</th> | ||||||
|  |         <th axis="string" class="l2 tableHeaderOver">test</th> | ||||||
|  |         <th axis="string" class="l3 tableHeaderOver">test</th> | ||||||
|  |         <th axis="size" class="l4 tableHeaderOver">test</th> | ||||||
|  |         <th axis="number" class="l5 tableHeaderOver">test</th> | ||||||
|  |         <th axis="number" class="l6 tableHeaderOver">test</th> | ||||||
|  |         <th axis="number" class="l7 tableHeaderOver">test</th> | ||||||
|  |         <th axis="string" class="l8 tableHeaderOver">test</th> | ||||||
|  |       </tr> | ||||||
|  |   </thead> | ||||||
|  |   <tbody class="tbody" id="data_list"> | ||||||
|  |  <tr class="alt1 "> | ||||||
|  |         <td nowrap="nowrap">date</td> | ||||||
|  |         <td><a href="category.html">testcategory</a></td> | ||||||
|  |         <td style="text-align:left;"> | ||||||
|  |             <a href="show-torrentid.html" target="_blank">torrentname</a> | ||||||
|  |         </td> | ||||||
|  |         <td>1MB</td> | ||||||
|  |         <td nowrap="nowrap"> | ||||||
|  |             <span class="bts_1"> | ||||||
|  |             29 | ||||||
|  |             </span> | ||||||
|  |         </td> | ||||||
|  |         <td nowrap="nowrap"> | ||||||
|  |             <span class="btl_1"> | ||||||
|  |             211 | ||||||
|  |         </span> | ||||||
|  |         </td> | ||||||
|  |         <td nowrap="nowrap"> | ||||||
|  |         <span class="btc_"> | ||||||
|  |             168 | ||||||
|  |         </span> | ||||||
|  |         </td> | ||||||
|  |         <td><a href="random.html">user</a></td> | ||||||
|  |       </tr> | ||||||
|  |       </tbody> | ||||||
|  | </table> | ||||||
|  | </html> | ||||||
|  |         """ | ||||||
|  | 
 | ||||||
|  |         resp = mock.Mock(text=html) | ||||||
|  |         results = acgsou.response(resp) | ||||||
|  | 
 | ||||||
|  |         self.assertEqual(type(results), list) | ||||||
|  |         self.assertEqual(len(results), 1) | ||||||
|  | 
 | ||||||
|  |         r = results[0] | ||||||
|  |         self.assertEqual(r['url'], 'https://www.acgsou.com/show-torrentid.html') | ||||||
|  |         self.assertEqual(r['content'], 'Category: "testcategory".') | ||||||
|  |         self.assertEqual(r['title'], 'torrentname') | ||||||
|  |         self.assertEqual(r['filesize'], 1048576) | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user