[perf] torrents.html, files.html: don't parse and re-format filesize
This commit is contained in:
		
							parent
							
								
									16ce5612dd
								
							
						
					
					
						commit
						e9f8412a6e
					
				| @ -6,7 +6,7 @@ | ||||
| 
 | ||||
| from urllib.parse import quote, urljoin | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex | ||||
| from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
| @ -40,9 +40,7 @@ def response(resp): | ||||
|         title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]')) | ||||
|         seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]')) | ||||
|         leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]')) | ||||
|         filesize_info = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()')) | ||||
|         filesize, filesize_multiplier = filesize_info.split() | ||||
|         filesize = get_torrent_size(filesize, filesize_multiplier) | ||||
|         filesize = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()')) | ||||
| 
 | ||||
|         results.append( | ||||
|             { | ||||
|  | ||||
| @ -36,14 +36,11 @@ Implementations | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
| import re | ||||
| from datetime import datetime | ||||
| from urllib.parse import quote | ||||
| 
 | ||||
| from lxml import etree | ||||
| 
 | ||||
| from searx.utils import get_torrent_size | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
|     "website": 'https://bt4gprx.com', | ||||
| @ -103,8 +100,6 @@ def response(resp): | ||||
|         title = entry.find("title").text | ||||
|         link = entry.find("guid").text | ||||
|         fullDescription = entry.find("description").text.split('<br>') | ||||
|         filesize = fullDescription[1] | ||||
|         filesizeParsed = re.split(r"([A-Z]+)", filesize) | ||||
|         magnetlink = entry.find("link").text | ||||
|         pubDate = entry.find("pubDate").text | ||||
|         results.append( | ||||
| @ -114,7 +109,7 @@ def response(resp): | ||||
|                 'magnetlink': magnetlink, | ||||
|                 'seed': 'N/A', | ||||
|                 'leech': 'N/A', | ||||
|                 'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]), | ||||
|                 'filesize': fullDescription[1], | ||||
|                 'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'), | ||||
|                 'template': 'torrent.html', | ||||
|             } | ||||
|  | ||||
| @ -6,7 +6,7 @@ | ||||
| from urllib.parse import quote, urljoin | ||||
| 
 | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, get_torrent_size | ||||
| from searx.utils import extract_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
| @ -58,13 +58,9 @@ def response(resp): | ||||
|         content = content.strip().replace('\n', ' | ') | ||||
|         content = ' '.join(content.split()) | ||||
| 
 | ||||
|         filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[0] | ||||
|         filesize_multiplier = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[1] | ||||
|         filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0] | ||||
|         files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0] | ||||
| 
 | ||||
|         # convert filesize to byte if possible | ||||
|         filesize = get_torrent_size(filesize, filesize_multiplier) | ||||
| 
 | ||||
|         # convert files to int if possible | ||||
|         try: | ||||
|             files = int(files) | ||||
|  | ||||
| @ -5,7 +5,7 @@ | ||||
| 
 | ||||
| from urllib.parse import urljoin | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, get_torrent_size | ||||
| from searx.utils import extract_text | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
| @ -45,7 +45,7 @@ def response(resp): | ||||
|         title = extract_text(result.xpath('.//a[@title]')) | ||||
|         content = extract_text(result.xpath('.//div[@class="files"]')) | ||||
|         files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() | ||||
|         filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) | ||||
|         filesize = f"{files_data[FILESIZE]} {files_data[FILESIZE_MULTIPLIER]}" | ||||
|         magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] | ||||
| 
 | ||||
|         results.append( | ||||
|  | ||||
| @ -11,7 +11,6 @@ from searx.utils import ( | ||||
|     eval_xpath_getindex, | ||||
|     eval_xpath_list, | ||||
|     extract_text, | ||||
|     get_torrent_size, | ||||
|     int_or_zero, | ||||
| ) | ||||
| 
 | ||||
| @ -54,7 +53,7 @@ def response(resp): | ||||
|         result['content'] = extract_text(eval_xpath(tag, './/span[@class="font11px lightgrey block"]')) | ||||
|         result['seed'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "green")]'))) | ||||
|         result['leech'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "red")]'))) | ||||
|         result['filesize'] = get_torrent_size(*extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]')).split()) | ||||
|         result['filesize'] = extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]')) | ||||
| 
 | ||||
|         results.append(result) | ||||
| 
 | ||||
|  | ||||
| @ -9,7 +9,6 @@ from lxml import html | ||||
| from searx.utils import ( | ||||
|     eval_xpath_getindex, | ||||
|     extract_text, | ||||
|     get_torrent_size, | ||||
|     int_or_zero, | ||||
| ) | ||||
| 
 | ||||
| @ -99,11 +98,7 @@ def response(resp): | ||||
| 
 | ||||
|         # let's try to calculate the torrent size | ||||
| 
 | ||||
|         filesize = None | ||||
|         filesize_info = eval_xpath_getindex(result, xpath_filesize, 0, '') | ||||
|         if filesize_info: | ||||
|             filesize_info = result.xpath(xpath_filesize)[0] | ||||
|             filesize = get_torrent_size(*filesize_info.split()) | ||||
|         filesize = eval_xpath_getindex(result, xpath_filesize, 0, '') | ||||
| 
 | ||||
|         # content string contains all information not included into template | ||||
|         content = 'Category: "{category}". Downloaded {downloads} times.' | ||||
|  | ||||
| @ -8,7 +8,7 @@ from datetime import datetime | ||||
| from operator import itemgetter | ||||
| 
 | ||||
| from urllib.parse import quote | ||||
| from searx.utils import get_torrent_size | ||||
| from searx.utils import humanize_bytes | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
| @ -80,17 +80,12 @@ def response(resp): | ||||
| 
 | ||||
|         # extract and convert creation date | ||||
|         try: | ||||
|             date = datetime.fromtimestamp(float(result["added"])) | ||||
|             params['publishedDate'] = date | ||||
|             params['publishedDate'] = datetime.fromtimestamp(float(result["added"])) | ||||
|         except:  # pylint: disable=bare-except | ||||
|             pass | ||||
| 
 | ||||
|         # let's try to calculate the torrent size | ||||
|         try: | ||||
|             filesize = get_torrent_size(result["size"], "B") | ||||
|             params['filesize'] = filesize | ||||
|         except:  # pylint: disable=bare-except | ||||
|             pass | ||||
|         params['filesize'] = humanize_bytes(int(result["size"])) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append(params) | ||||
|  | ||||
| @ -14,7 +14,6 @@ from searx.utils import ( | ||||
|     eval_xpath, | ||||
|     eval_xpath_getindex, | ||||
|     eval_xpath_list, | ||||
|     get_torrent_size, | ||||
| ) | ||||
| 
 | ||||
| about = { | ||||
| @ -63,7 +62,7 @@ def response(resp): | ||||
|             'leech': extract_text(stats[2]), | ||||
|             'title': extract_text(title), | ||||
|             'url': resp.search_params['base_url'] + url, | ||||
|             'filesize': get_torrent_size(*extract_text(stats[1]).split()), | ||||
|             'filesize': extract_text(stats[1]), | ||||
|             'magnetlink': magnet, | ||||
|             'torrentfile': torrentfile, | ||||
|             'metadata': extract_text(categ), | ||||
|  | ||||
| @ -8,7 +8,7 @@ from datetime import datetime | ||||
| from urllib.parse import urlencode | ||||
| 
 | ||||
| from lxml import html | ||||
| from searx.utils import extract_text, get_torrent_size, int_or_zero | ||||
| from searx.utils import extract_text, int_or_zero | ||||
| 
 | ||||
| # about | ||||
| about = { | ||||
| @ -49,7 +49,7 @@ def response(resp): | ||||
|         return [] | ||||
| 
 | ||||
|     # regular expression for parsing torrent size strings | ||||
|     size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) | ||||
|     size_re = re.compile(r'[\d.]+(T|G|M)?B', re.IGNORECASE) | ||||
| 
 | ||||
|     # processing the results, two rows at a time | ||||
|     for i in range(0, len(rows), 2): | ||||
| @ -73,9 +73,7 @@ def response(resp): | ||||
|             item = item.strip() | ||||
|             if item.startswith('Size:'): | ||||
|                 try: | ||||
|                     # ('1.228', 'GB') | ||||
|                     groups = size_re.match(item).groups() | ||||
|                     params['filesize'] = get_torrent_size(groups[0], groups[1]) | ||||
|                     params['filesize'] = size_re.search(item).group() | ||||
|                 except:  # pylint: disable=bare-except | ||||
|                     pass | ||||
|             elif item.startswith('Date:'): | ||||
|  | ||||
| @ -56,6 +56,7 @@ from urllib.parse import quote | ||||
| from lxml import etree  # type: ignore | ||||
| 
 | ||||
| from searx.exceptions import SearxEngineAPIException | ||||
| from searx.utils import humanize_bytes | ||||
| 
 | ||||
| if TYPE_CHECKING: | ||||
|     import httpx | ||||
| @ -137,11 +138,9 @@ def build_result(item: etree.Element) -> Dict[str, Any]: | ||||
|     if enclosure is not None: | ||||
|         enclosure_url = enclosure.get('url') | ||||
| 
 | ||||
|     size = get_attribute(item, 'size') | ||||
|     if not size and enclosure: | ||||
|         size = enclosure.get('length') | ||||
|     if size: | ||||
|         size = int(size) | ||||
|     filesize = get_attribute(item, 'size') | ||||
|     if not filesize and enclosure: | ||||
|         filesize = enclosure.get('length') | ||||
| 
 | ||||
|     guid = get_attribute(item, 'guid') | ||||
|     comments = get_attribute(item, 'comments') | ||||
| @ -154,7 +153,7 @@ def build_result(item: etree.Element) -> Dict[str, Any]: | ||||
|     result: Dict[str, Any] = { | ||||
|         'template': 'torrent.html', | ||||
|         'title': get_attribute(item, 'title'), | ||||
|         'filesize': size, | ||||
|         'filesize': humanize_bytes(int(filesize)) if filesize else None, | ||||
|         'files': get_attribute(item, 'files'), | ||||
|         'seed': seeders, | ||||
|         'leech': _map_leechers(leechers, seeders, peers), | ||||
|  | ||||
| @ -35,14 +35,7 @@ | ||||
| 
 | ||||
| {%- if result.filename %}<tr><td>{{ _('Filename') }}</td><td>{{ result.filename|safe }}</td></tr>{% endif -%} | ||||
| 
 | ||||
| {%- if result.size %}<tr><td>{{ _('Filesize') }}</td><td> | ||||
|         {%- if result.size < 1024 %}{{ result.size }} {{ _('Bytes') -}} | ||||
|         {%- elif result.size < 1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024) }} {{ _('kiB') -}} | ||||
|         {%- elif result.size < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024) }} {{ _('MiB') -}} | ||||
|         {%- elif result.size < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024) }} {{ _('GiB') -}} | ||||
|         {%- else %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif -%} | ||||
|     </td></tr> | ||||
| {%- endif -%} | ||||
| {%- if result.size %}<tr><td>{{ _('Filesize') }}</td><td>{{ result.size|safe }}</td></tr>{%- endif -%} | ||||
| 
 | ||||
| {%- if result.time %}<tr><td>{{ _('Date') }}</td><td>{{ result.time|safe }}</td></tr>{% endif -%} | ||||
| 
 | ||||
|  | ||||
| @ -8,14 +8,7 @@ | ||||
| 
 | ||||
| {% if result.seed is defined %}<p class="stat"> • {{ icon_big('arrow-swap') }} {{ _('Seeder') }} <span class="badge">{{ result.seed }}</span> • {{ _('Leecher') }} <span class="badge">{{ result.leech }}</span></p>{% endif %} | ||||
| 
 | ||||
| {%- if result.filesize %}<p class="stat">{{ icon_big('floppy-disk') }} {{ _('Filesize') }}<span class="badge"> | ||||
|     {%- if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }} | ||||
|     {%- elif result.filesize < 1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024) }} {{ _('kiB') }} | ||||
|     {%- elif result.filesize < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024) }} {{ _('MiB') }} | ||||
|     {%- elif result.filesize < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024) }} {{ _('GiB') }} | ||||
|     {%- else %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif -%} | ||||
|     </span></p> | ||||
| {%- endif -%} | ||||
| {%- if result.filesize %}<p class="stat">{{ icon_big('floppy-disk') }} {{ _('Filesize') }}<span class="badge">{{ result.filesize }}</span></p>{%- endif -%} | ||||
| 
 | ||||
| {%- if result.files %}<p class="stat">{{ icon_big('file') }} {{ _('Number of Files') }} <span class="badge">{{ result.files }}</span></p>{% endif -%} | ||||
| 
 | ||||
|  | ||||
| @ -332,29 +332,6 @@ def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict: | ||||
|     return {k: dictionary[k] for k in properties if k in dictionary} | ||||
| 
 | ||||
| 
 | ||||
| def get_torrent_size(filesize: str, filesize_multiplier: str) -> Optional[int]: | ||||
|     """ | ||||
| 
 | ||||
|     Args: | ||||
|         * filesize (str): size | ||||
|         * filesize_multiplier (str): TB, GB, .... TiB, GiB... | ||||
| 
 | ||||
|     Returns: | ||||
|         * int: number of bytes | ||||
| 
 | ||||
|     Example: | ||||
|         >>> get_torrent_size('5', 'GB') | ||||
|         5368709120 | ||||
|         >>> get_torrent_size('3.14', 'MiB') | ||||
|         3140000 | ||||
|     """ | ||||
|     try: | ||||
|         multiplier = _STORAGE_UNIT_VALUE.get(filesize_multiplier, 1) | ||||
|         return int(float(filesize) * multiplier) | ||||
|     except ValueError: | ||||
|         return None | ||||
| 
 | ||||
| 
 | ||||
| def humanize_bytes(size, precision=2): | ||||
|     """Determine the *human readable* value of bytes on 1024 base (1KB=1024B).""" | ||||
|     s = ['B ', 'KB', 'MB', 'GB', 'TB'] | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user