From e9f8412a6e4b399a3335da73b3d321104bb0c4fb Mon Sep 17 00:00:00 2001 From: Bnyro Date: Wed, 12 Jun 2024 22:35:13 +0200 Subject: [PATCH] [perf] torrents.html, files.html: don't parse and re-format filesize --- searx/engines/1337x.py | 6 ++--- searx/engines/bt4g.py | 7 +----- searx/engines/btdigg.py | 8 ++----- searx/engines/digbt.py | 4 ++-- searx/engines/kickass.py | 3 +-- searx/engines/nyaa.py | 7 +----- searx/engines/piratebay.py | 11 +++------ searx/engines/solidtorrents.py | 3 +-- searx/engines/tokyotoshokan.py | 8 +++---- searx/engines/torznab.py | 11 ++++----- .../simple/result_templates/files.html | 9 +------- .../simple/result_templates/torrent.html | 9 +------- searx/utils.py | 23 ------------------- 13 files changed, 23 insertions(+), 86 deletions(-) diff --git a/searx/engines/1337x.py b/searx/engines/1337x.py index 22129750332..f9a0d0412aa 100644 --- a/searx/engines/1337x.py +++ b/searx/engines/1337x.py @@ -6,7 +6,7 @@ from urllib.parse import quote, urljoin from lxml import html -from searx.utils import extract_text, get_torrent_size, eval_xpath, eval_xpath_list, eval_xpath_getindex +from searx.utils import extract_text, eval_xpath, eval_xpath_list, eval_xpath_getindex # about about = { @@ -40,9 +40,7 @@ def response(resp): title = extract_text(eval_xpath(result, './td[contains(@class, "name")]/a[2]')) seed = extract_text(eval_xpath(result, './/td[contains(@class, "seeds")]')) leech = extract_text(eval_xpath(result, './/td[contains(@class, "leeches")]')) - filesize_info = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()')) - filesize, filesize_multiplier = filesize_info.split() - filesize = get_torrent_size(filesize, filesize_multiplier) + filesize = extract_text(eval_xpath(result, './/td[contains(@class, "size")]/text()')) results.append( { diff --git a/searx/engines/bt4g.py b/searx/engines/bt4g.py index 98a8c308773..a468124fe16 100644 --- a/searx/engines/bt4g.py +++ b/searx/engines/bt4g.py @@ -36,14 +36,11 @@ """ -import re from datetime import datetime from urllib.parse import quote from lxml import etree -from searx.utils import get_torrent_size - # about about = { "website": 'https://bt4gprx.com', @@ -103,8 +100,6 @@ def response(resp): title = entry.find("title").text link = entry.find("guid").text fullDescription = entry.find("description").text.split('
') - filesize = fullDescription[1] - filesizeParsed = re.split(r"([A-Z]+)", filesize) magnetlink = entry.find("link").text pubDate = entry.find("pubDate").text results.append( @@ -114,7 +109,7 @@ def response(resp): 'magnetlink': magnetlink, 'seed': 'N/A', 'leech': 'N/A', - 'filesize': get_torrent_size(filesizeParsed[0], filesizeParsed[1]), + 'filesize': fullDescription[1], 'publishedDate': datetime.strptime(pubDate, '%a,%d %b %Y %H:%M:%S %z'), 'template': 'torrent.html', } diff --git a/searx/engines/btdigg.py b/searx/engines/btdigg.py index 588d62093e8..19378518212 100644 --- a/searx/engines/btdigg.py +++ b/searx/engines/btdigg.py @@ -6,7 +6,7 @@ from urllib.parse import quote, urljoin from lxml import html -from searx.utils import extract_text, get_torrent_size +from searx.utils import extract_text # about about = { @@ -58,13 +58,9 @@ def response(resp): content = content.strip().replace('\n', ' | ') content = ' '.join(content.split()) - filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[0] - filesize_multiplier = result.xpath('.//span[@class="torrent_size"]/text()')[0].split()[1] + filesize = result.xpath('.//span[@class="torrent_size"]/text()')[0] files = (result.xpath('.//span[@class="torrent_files"]/text()') or ['1'])[0] - # convert filesize to byte if possible - filesize = get_torrent_size(filesize, filesize_multiplier) - # convert files to int if possible try: files = int(files) diff --git a/searx/engines/digbt.py b/searx/engines/digbt.py index ae78f1a9e6e..fd3d1fb1691 100644 --- a/searx/engines/digbt.py +++ b/searx/engines/digbt.py @@ -5,7 +5,7 @@ from urllib.parse import urljoin from lxml import html -from searx.utils import extract_text, get_torrent_size +from searx.utils import extract_text # about about = { @@ -45,7 +45,7 @@ def response(resp): title = extract_text(result.xpath('.//a[@title]')) content = extract_text(result.xpath('.//div[@class="files"]')) files_data = extract_text(result.xpath('.//div[@class="tail"]')).split() - filesize = get_torrent_size(files_data[FILESIZE], files_data[FILESIZE_MULTIPLIER]) + filesize = f"{files_data[FILESIZE]} {files_data[FILESIZE_MULTIPLIER]}" magnetlink = result.xpath('.//div[@class="tail"]//a[@class="title"]/@href')[0] results.append( diff --git a/searx/engines/kickass.py b/searx/engines/kickass.py index 12bf9c04d43..311c2885baa 100644 --- a/searx/engines/kickass.py +++ b/searx/engines/kickass.py @@ -11,7 +11,6 @@ eval_xpath_getindex, eval_xpath_list, extract_text, - get_torrent_size, int_or_zero, ) @@ -54,7 +53,7 @@ def response(resp): result['content'] = extract_text(eval_xpath(tag, './/span[@class="font11px lightgrey block"]')) result['seed'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "green")]'))) result['leech'] = int_or_zero(extract_text(eval_xpath(tag, './/td[contains(@class, "red")]'))) - result['filesize'] = get_torrent_size(*extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]')).split()) + result['filesize'] = extract_text(eval_xpath(tag, './/td[contains(@class, "nobr")]')) results.append(result) diff --git a/searx/engines/nyaa.py b/searx/engines/nyaa.py index c22339dbf55..f3862f503b6 100644 --- a/searx/engines/nyaa.py +++ b/searx/engines/nyaa.py @@ -9,7 +9,6 @@ from searx.utils import ( eval_xpath_getindex, extract_text, - get_torrent_size, int_or_zero, ) @@ -99,11 +98,7 @@ def response(resp): # let's try to calculate the torrent size - filesize = None - filesize_info = eval_xpath_getindex(result, xpath_filesize, 0, '') - if filesize_info: - filesize_info = result.xpath(xpath_filesize)[0] - filesize = get_torrent_size(*filesize_info.split()) + filesize = eval_xpath_getindex(result, xpath_filesize, 0, '') # content string contains all information not included into template content = 'Category: "{category}". Downloaded {downloads} times.' diff --git a/searx/engines/piratebay.py b/searx/engines/piratebay.py index ba58e64fcf0..e1f3f611a51 100644 --- a/searx/engines/piratebay.py +++ b/searx/engines/piratebay.py @@ -8,7 +8,7 @@ from operator import itemgetter from urllib.parse import quote -from searx.utils import get_torrent_size +from searx.utils import humanize_bytes # about about = { @@ -80,17 +80,12 @@ def response(resp): # extract and convert creation date try: - date = datetime.fromtimestamp(float(result["added"])) - params['publishedDate'] = date + params['publishedDate'] = datetime.fromtimestamp(float(result["added"])) except: # pylint: disable=bare-except pass # let's try to calculate the torrent size - try: - filesize = get_torrent_size(result["size"], "B") - params['filesize'] = filesize - except: # pylint: disable=bare-except - pass + params['filesize'] = humanize_bytes(int(result["size"])) # append result results.append(params) diff --git a/searx/engines/solidtorrents.py b/searx/engines/solidtorrents.py index 3cad8e5495d..c2f7e435e75 100644 --- a/searx/engines/solidtorrents.py +++ b/searx/engines/solidtorrents.py @@ -14,7 +14,6 @@ eval_xpath, eval_xpath_getindex, eval_xpath_list, - get_torrent_size, ) about = { @@ -63,7 +62,7 @@ def response(resp): 'leech': extract_text(stats[2]), 'title': extract_text(title), 'url': resp.search_params['base_url'] + url, - 'filesize': get_torrent_size(*extract_text(stats[1]).split()), + 'filesize': extract_text(stats[1]), 'magnetlink': magnet, 'torrentfile': torrentfile, 'metadata': extract_text(categ), diff --git a/searx/engines/tokyotoshokan.py b/searx/engines/tokyotoshokan.py index 33f03642885..ad0be1985ac 100644 --- a/searx/engines/tokyotoshokan.py +++ b/searx/engines/tokyotoshokan.py @@ -8,7 +8,7 @@ from urllib.parse import urlencode from lxml import html -from searx.utils import extract_text, get_torrent_size, int_or_zero +from searx.utils import extract_text, int_or_zero # about about = { @@ -49,7 +49,7 @@ def response(resp): return [] # regular expression for parsing torrent size strings - size_re = re.compile(r'Size:\s*([\d.]+)(TB|GB|MB|B)', re.IGNORECASE) + size_re = re.compile(r'[\d.]+(T|G|M)?B', re.IGNORECASE) # processing the results, two rows at a time for i in range(0, len(rows), 2): @@ -73,9 +73,7 @@ def response(resp): item = item.strip() if item.startswith('Size:'): try: - # ('1.228', 'GB') - groups = size_re.match(item).groups() - params['filesize'] = get_torrent_size(groups[0], groups[1]) + params['filesize'] = size_re.search(item).group() except: # pylint: disable=bare-except pass elif item.startswith('Date:'): diff --git a/searx/engines/torznab.py b/searx/engines/torznab.py index 70ba78ab476..cfe7e2b4f2e 100644 --- a/searx/engines/torznab.py +++ b/searx/engines/torznab.py @@ -56,6 +56,7 @@ from lxml import etree # type: ignore from searx.exceptions import SearxEngineAPIException +from searx.utils import humanize_bytes if TYPE_CHECKING: import httpx @@ -137,11 +138,9 @@ def build_result(item: etree.Element) -> Dict[str, Any]: if enclosure is not None: enclosure_url = enclosure.get('url') - size = get_attribute(item, 'size') - if not size and enclosure: - size = enclosure.get('length') - if size: - size = int(size) + filesize = get_attribute(item, 'size') + if not filesize and enclosure: + filesize = enclosure.get('length') guid = get_attribute(item, 'guid') comments = get_attribute(item, 'comments') @@ -154,7 +153,7 @@ def build_result(item: etree.Element) -> Dict[str, Any]: result: Dict[str, Any] = { 'template': 'torrent.html', 'title': get_attribute(item, 'title'), - 'filesize': size, + 'filesize': humanize_bytes(int(filesize)) if filesize else None, 'files': get_attribute(item, 'files'), 'seed': seeders, 'leech': _map_leechers(leechers, seeders, peers), diff --git a/searx/templates/simple/result_templates/files.html b/searx/templates/simple/result_templates/files.html index 0a1424da6f1..01dfc3535a9 100644 --- a/searx/templates/simple/result_templates/files.html +++ b/searx/templates/simple/result_templates/files.html @@ -35,14 +35,7 @@ {%- if result.filename %}{{ _('Filename') }}{{ result.filename|safe }}{% endif -%} -{%- if result.size %}{{ _('Filesize') }} - {%- if result.size < 1024 %}{{ result.size }} {{ _('Bytes') -}} - {%- elif result.size < 1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024) }} {{ _('kiB') -}} - {%- elif result.size < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024) }} {{ _('MiB') -}} - {%- elif result.size < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024) }} {{ _('GiB') -}} - {%- else %}{{ '{0:0.2f}'.format(result.size/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif -%} - -{%- endif -%} +{%- if result.size %}{{ _('Filesize') }}{{ result.size|safe }}{%- endif -%} {%- if result.time %}{{ _('Date') }}{{ result.time|safe }}{% endif -%} diff --git a/searx/templates/simple/result_templates/torrent.html b/searx/templates/simple/result_templates/torrent.html index 46cde2a6519..befad2c89be 100644 --- a/searx/templates/simple/result_templates/torrent.html +++ b/searx/templates/simple/result_templates/torrent.html @@ -8,14 +8,7 @@ {% if result.seed is defined %}

• {{ icon_big('arrow-swap') }} {{ _('Seeder') }} {{ result.seed }} • {{ _('Leecher') }} {{ result.leech }}

{% endif %} -{%- if result.filesize %}

{{ icon_big('floppy-disk') }} {{ _('Filesize') }} - {%- if result.filesize < 1024 %}{{ result.filesize }} {{ _('Bytes') }} - {%- elif result.filesize < 1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024) }} {{ _('kiB') }} - {%- elif result.filesize < 1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024) }} {{ _('MiB') }} - {%- elif result.filesize < 1024*1024*1024*1024 %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024) }} {{ _('GiB') }} - {%- else %}{{ '{0:0.2f}'.format(result.filesize/1024/1024/1024/1024) }} {{ _('TiB') }}{% endif -%} -

-{%- endif -%} +{%- if result.filesize %}

{{ icon_big('floppy-disk') }} {{ _('Filesize') }}{{ result.filesize }}

{%- endif -%} {%- if result.files %}

{{ icon_big('file') }} {{ _('Number of Files') }} {{ result.files }}

{% endif -%} diff --git a/searx/utils.py b/searx/utils.py index 58ff72bb9eb..0c11ccc655a 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -332,29 +332,6 @@ def dict_subset(dictionary: MutableMapping, properties: Set[str]) -> Dict: return {k: dictionary[k] for k in properties if k in dictionary} -def get_torrent_size(filesize: str, filesize_multiplier: str) -> Optional[int]: - """ - - Args: - * filesize (str): size - * filesize_multiplier (str): TB, GB, .... TiB, GiB... - - Returns: - * int: number of bytes - - Example: - >>> get_torrent_size('5', 'GB') - 5368709120 - >>> get_torrent_size('3.14', 'MiB') - 3140000 - """ - try: - multiplier = _STORAGE_UNIT_VALUE.get(filesize_multiplier, 1) - return int(float(filesize) * multiplier) - except ValueError: - return None - - def humanize_bytes(size, precision=2): """Determine the *human readable* value of bytes on 1024 base (1KB=1024B).""" s = ['B ', 'KB', 'MB', 'GB', 'TB']