From 9e7f363aebe01542210633dd4027ce777bf31e3c Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 31 Oct 2024 15:48:27 +0000 Subject: [PATCH 1/5] use sets --- rich/cells.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/rich/cells.py b/rich/cells.py index 17793eb5e..06a57b2c2 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -1,15 +1,19 @@ from __future__ import annotations -import re from functools import lru_cache from typing import Callable from ._cell_widths import CELL_WIDTHS -# Regex to match sequence of the most common character ranges -_is_single_cell_widths = re.compile( - "^[\u0020-\u007e\u00a0-\u02ff\u0370-\u0482\u2500-\u25FF]*$" -).match +_SINGLE_CELLS = frozenset( + [ + *map(chr, range(0x20, 0x7E + 1)), + *map(chr, range(0xA0, 0x02FF + 1)), + *map(chr, range(0x0370, 0x0482 + 1)), + *map(chr, range(0x2500, 0x25FF + 1)), + ] +) +_is_single_cell_widths = _SINGLE_CELLS.issuperset @lru_cache(4096) From 46150cdbf61426c4683c59a0e4f45dca23d38202 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Thu, 31 Oct 2024 18:45:13 +0000 Subject: [PATCH 2/5] sum and map is faster --- rich/cells.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/rich/cells.py b/rich/cells.py index 06a57b2c2..3fa58c3e9 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -13,6 +13,7 @@ *map(chr, range(0x2500, 0x25FF + 1)), ] ) + _is_single_cell_widths = _SINGLE_CELLS.issuperset @@ -29,9 +30,9 @@ def cached_cell_len(text: str) -> int: Returns: int: Get the number of cells required to display text. """ - _get_size = get_character_cell_size - total_size = sum(_get_size(character) for character in text) - return total_size + if _is_single_cell_widths(text): + return len(text) + return sum(map(get_character_cell_size, text)) def cell_len(text: str, _cell_len: Callable[[str], int] = cached_cell_len) -> int: @@ -45,9 +46,9 @@ def cell_len(text: str, _cell_len: Callable[[str], int] = cached_cell_len) -> in """ if len(text) < 512: return _cell_len(text) - _get_size = get_character_cell_size - total_size = sum(_get_size(character) for character in text) - return total_size + if _is_single_cell_widths(text): + return len(text) + return sum(map(get_character_cell_size, text)) @lru_cache(maxsize=4096) From 6cef0bcb0e584eac1eb6021cc2202ecad70b6b11 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Fri, 1 Nov 2024 12:03:20 +0000 Subject: [PATCH 3/5] leaner cell_len --- rich/cells.py | 52 +++++++++++++++++++++++++++++---------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/rich/cells.py b/rich/cells.py index 3fa58c3e9..64bf468cb 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -5,16 +5,37 @@ from ._cell_widths import CELL_WIDTHS -_SINGLE_CELLS = frozenset( - [ - *map(chr, range(0x20, 0x7E + 1)), - *map(chr, range(0xA0, 0x02FF + 1)), - *map(chr, range(0x0370, 0x0482 + 1)), - *map(chr, range(0x2500, 0x25FF + 1)), +# Ranges of unicode ordinals that produce a 1-cell wide character +_SINGLE_CELL_UNICODE_RANGES: list[tuple[int, int]] = [ + (0x20, 0x7E), # Latin (excluding non-printable) + (0xA0, 0xAC), + (0xAE, 0x002FF), + (0x00370, 0x00482), # Greek / Cyrillic + (0x02500, 0x025FC), # Box drawing, box elements, geometric shapes + (0x02800, 0x028FF), # Braille +] + + +def _make_single_cell_set() -> frozenset[str]: + """Combine ranges of ordinals in to a frozen set of strings. + + Returns: + A frozenset of single cell characters. + + """ + character_range_lists = [ + list(map(chr, range(_start, _end + 1))) + for _start, _end in _SINGLE_CELL_UNICODE_RANGES ] -) + return frozenset(sum(character_range_lists, start=[])) + + +# A set of characters that are a single cell wide +_SINGLE_CELLS = _make_single_cell_set() -_is_single_cell_widths = _SINGLE_CELLS.issuperset +# When called with a string this will return True if all +# characters are single-cell, otherwise False +_is_single_cell_widths: Callable[[str], bool] = _SINGLE_CELLS.issuperset @lru_cache(4096) @@ -61,20 +82,7 @@ def get_character_cell_size(character: str) -> int: Returns: int: Number of cells (0, 1 or 2) occupied by that character. """ - return _get_codepoint_cell_size(ord(character)) - - -@lru_cache(maxsize=4096) -def _get_codepoint_cell_size(codepoint: int) -> int: - """Get the cell size of a character. - - Args: - codepoint (int): Codepoint of a character. - - Returns: - int: Number of cells (0, 1 or 2) occupied by that character. - """ - + codepoint = ord(character) _table = CELL_WIDTHS lower_bound = 0 upper_bound = len(_table) - 1 From aaaef278be38ebadea3d6f47dedd89fd910078ca Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Fri, 1 Nov 2024 12:14:20 +0000 Subject: [PATCH 4/5] leaner syntax --- rich/cells.py | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/rich/cells.py b/rich/cells.py index 64bf468cb..52245df32 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -15,23 +15,14 @@ (0x02800, 0x028FF), # Braille ] - -def _make_single_cell_set() -> frozenset[str]: - """Combine ranges of ordinals in to a frozen set of strings. - - Returns: - A frozenset of single cell characters. - - """ - character_range_lists = [ - list(map(chr, range(_start, _end + 1))) +# A set of characters that are a single cell wide +_SINGLE_CELLS = frozenset( + [ + character for _start, _end in _SINGLE_CELL_UNICODE_RANGES + for character in map(chr, range(_start, _end + 1)) ] - return frozenset(sum(character_range_lists, start=[])) - - -# A set of characters that are a single cell wide -_SINGLE_CELLS = _make_single_cell_set() +) # When called with a string this will return True if all # characters are single-cell, otherwise False From 02f3d148e8f7143519272ed6404cc6894dc13ec6 Mon Sep 17 00:00:00 2001 From: Will McGugan Date: Fri, 1 Nov 2024 12:16:56 +0000 Subject: [PATCH 5/5] comment --- rich/cells.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rich/cells.py b/rich/cells.py index 52245df32..a85462271 100644 --- a/rich/cells.py +++ b/rich/cells.py @@ -6,6 +6,7 @@ from ._cell_widths import CELL_WIDTHS # Ranges of unicode ordinals that produce a 1-cell wide character +# This is non-exhaustive, but covers most common Western characters _SINGLE_CELL_UNICODE_RANGES: list[tuple[int, int]] = [ (0x20, 0x7E), # Latin (excluding non-printable) (0xA0, 0xAC),