Update wrapping logic to fix issues with CJK charcters disappearing w…

…hen the "fold" location sat *within* a double-width character. Ensure we retain browser logic of: if there is no space on the current line, move to a new line, and if theres not enough space on the entire new line, fold the text over multiple lines at appropriate locations.
Textualize · Nov 1, 2023 · 9f93126 · 9f93126
1 parent 85d89d0
commit 9f93126
Show file tree

Hide file tree

Showing 3 changed files with 56 additions and 74 deletions.
diff --git a/rich/_wrap.py b/rich/_wrap.py
@@ -1,15 +1,15 @@
 from __future__ import annotations
 
 import re
-from typing import Iterable, List, Tuple
+from typing import Iterable
 
 from ._loop import loop_last
-from .cells import cell_len, fit_to_width
+from .cells import cell_len, fold_to_width
 
 re_word = re.compile(r"\s*\S+\s*")
 
 
-def words(text: str) -> Iterable[Tuple[int, int, str]]:
+def words(text: str) -> Iterable[tuple[int, int, str]]:
     """Yields each word from the text as a tuple containing (start_index, end_index, word)."""
     position = 0
     word_match = re_word.match(text, position)
@@ -20,37 +20,6 @@ def words(text: str) -> Iterable[Tuple[int, int, str]]:
         word_match = re_word.match(text, end)
 
 
-def divide_line(text: str, width: int, fold: bool = True) -> List[int]:
-    divides: List[int] = []
-    append = divides.append
-    line_position = 0
-    _cell_len = cell_len
-    for start, _end, word in words(text):
-        word_length = _cell_len(word.rstrip())
-        if line_position + word_length > width:
-            if word_length > width:
-                if fold:
-                    chopped_words = fit_to_width(word, width=width)
-                    for last, line in loop_last(chopped_words):
-                        if start:
-                            append(start)
-
-                        if last:
-                            line_position = _cell_len(line)
-                        else:
-                            start += len(line)
-                else:
-                    if start:
-                        append(start)
-                    line_position = _cell_len(word)
-            elif line_position and start:
-                append(start)
-                line_position = _cell_len(word)
-        else:
-            line_position += _cell_len(word)
-    return divides
-
-
 def divide_line(text: str, width: int, fold: bool = True) -> list[int]:
     """Given a string of text, and a width (measured in cells), return a list
     of cell offsets which the string should be split at in order for it to fit
@@ -62,35 +31,62 @@ def divide_line(text: str, width: int, fold: bool = True) -> list[int]:
         fold: If True, words longer than `width` will be folded onto a new line.
 
     Returns:
-        A list of cell offsets to break the line at.
+        A list of indices to break the line at.
     """
-
-    break_offsets: list[int] = []  # offsets to insert the breaks at
-    append = break_offsets.append
-    line_position = 0
+    break_positions: list[int] = []  # offsets to insert the breaks at
+    append = break_positions.append
+    cell_offset = 0
     _cell_len = cell_len
 
     for start, _end, word in words(text):
         word_length = _cell_len(word.rstrip())
-        remaining_space = width - line_position
+        remaining_space = width - cell_offset
         word_fits_remaining_space = remaining_space - word_length >= 0
-        if not word_fits_remaining_space:
+
+        if word_fits_remaining_space:
+            # Simplest case - the word fits within the remaining width for this line.
+            cell_offset += _cell_len(word)
+        else:
+            # Not enough space remaining for this word on the current line.
             if word_length > width:
                 # The word doesn't fit on any line, so we can't simply
                 # place it on the next line...
                 if fold:
-                    # ... fold the long word it across multiple lines
-
+                    # ... fold the long word across multiple lines.
                     # We need to fit as much as possible of the word into the remaining
                     # space on the current line.
+                    folded_word = fold_to_width(word, width=width)
+                    for last, line in loop_last(folded_word):
+                        if start:
+                            append(start)
+                        if last:
+                            cell_offset = _cell_len(line)
+                        else:
+                            start += len(line)
+                else:
+                    # Folding isn't allowed, so crop the word.
+                    if start:
+                        append(start)
+                    cell_offset = _cell_len(word)
+            elif cell_offset and start:
+                # The word doesn't fit within the remaining space on the current
+                # line, but it *can* fit on to the next (empty) line.
+                append(start)
+                cell_offset = _cell_len(word)
 
-                    # Take characters from the word until we run out of remaining space.
-                    pass
+    return break_positions
 
 
 if __name__ == "__main__":  # pragma: no cover
     from .console import Console
 
     console = Console(width=10)
     console.print("12345 abcdefghijklmnopqrstuvwyxzABCDEFGHIJKLMNOPQRSTUVWXYZ 12345")
-    print(fit_to_width("abcdefghijklmnopqrstuvwxyz", 10))
+    print(fold_to_width("abcdefghijklmnopqrstuvwxyz", 10))
+
+    console = Console(width=20)
+    console.rule()
+    console.print("TextualはPythonの高速アプリケーション開発フレームワークです")
+
+    console.rule()
+    console.print("アプリケーションは1670万色を使用でき")
diff --git a/rich/cells.py b/rich/cells.py
@@ -123,15 +123,15 @@ def set_cell_size(text: str, total: int) -> str:
 
 # TODO: This is inefficient
 # TODO: This might not work with CWJ type characters
-def fit_to_width(
-    text: str, width: int, *, first_line_width: int | None = None
+def fold_to_width(
+    text: str,
+    width: int,
 ) -> list[str]:
     """Split text into lines such that each line fits within the available (cell) width.
 
     Args:
-        text: The text to fit.
-        width: The width available.
-        first_line_width: The width available on the first line.
+        text: The text to fold such that it fits in the given width.
+        width: The width available (number of cells).
 
     Returns:
         A list of strings such that each string in the list has cell width
@@ -146,12 +146,8 @@ def fit_to_width(
     total_width = 0
 
     for character in text:
-        available_width = (
-            first_line_width if len(lines) == 1 and first_line_width else width
-        )
-
         cell_width = _get_character_cell_size(character)
-        char_doesnt_fit = total_width + cell_width > available_width
+        char_doesnt_fit = total_width + cell_width > width
 
         if char_doesnt_fit:
             start_new_line([character])
@@ -167,7 +163,7 @@ def fit_to_width(
 if __name__ == "__main__":  # pragma: no cover
 
     print(get_character_cell_size("😽"))
-    for line in fit_to_width("""这是对亚洲语言支持的测试。面对模棱两可的想法，拒绝猜测的诱惑。""", 8):
+    for line in fold_to_width("""这是对亚洲语言支持的测试。面对模棱两可的想法，拒绝猜测的诱惑。""", 8):
         print(line)
     for n in range(80, 1, -1):
         print(set_cell_size("""这是对亚洲语言支持的测试。面对模棱两可的想法，拒绝猜测的诱惑。""", n) + "|")

diff --git a/tests/test_cells.py b/tests/test_cells.py
@@ -1,5 +1,5 @@
 from rich import cells
-from rich.cells import fit_to_width
+from rich.cells import fold_to_width
 
 
 def test_cell_len_long_string():
@@ -43,29 +43,19 @@ def test_set_cell_size_infinite():
         )
 
 
-def test_fit_to_width():
+def test_fold_to_width():
     """Simple example of splitting cells into lines of width 3."""
     text = "abcdefghijk"
-    assert fit_to_width(text, 3) == ["abc", "def", "ghi", "jk"]
+    assert fold_to_width(text, 3) == ["abc", "def", "ghi", "jk"]
 
 
-def test_fit_to_width_double_width_boundary():
+def test_fold_to_width_double_width_boundary():
     """The available width lies within a double-width character."""
     text = "ありがとう"
-    assert fit_to_width(text, 3) == ["あ", "り", "が", "と", "う"]
+    assert fold_to_width(text, 3) == ["あ", "り", "が", "と", "う"]
 
 
-def test_fit_to_width_mixed_width():
+def test_fold_to_width_mixed_width():
     """Mixed single and double-width characters."""
     text = "あ1り2が3と4う56"
-    assert fit_to_width(text, 3) == ["あ1", "り2", "が3", "と4", "う5", "6"]
-
-
-def test_fit_to_width_first_line_width():
-    """Text being passed into this function is often appearing near the end
-    of a line in a document, and so the first line has a different amount of
-    width available."""
-    text = "あ1り2が3と4う56"
-    fitted_lines = fit_to_width(text, 3, first_line_width=2)
-    # Only 2 cells available on the 1st line, so the 1 gets folded to the 2nd line.
-    assert fitted_lines == ["あ", "1り", "2が", "3と", "4う", "56"]
+    assert fold_to_width(text, 3) == ["あ1", "り2", "が3", "と4", "う5", "6"]