Merge pull request #684 from googlefonts/disable-flake8-b905

Disable warning B905 until we require Python >= 3.10
googlefonts · Dec 8, 2022 · dd738cd · dd738cd
2 parents e0b810b + f302d43
commit dd738cd
Showing 7 changed files with 120 additions and 16 deletions.
diff --git a/Lib/ufo2ft/constants.py b/Lib/ufo2ft/constants.py
@@ -1,3 +1,5 @@
+from types import MappingProxyType
+
 SPARSE_TTF_MASTER_TABLES = frozenset(
     ["glyf", "head", "hmtx", "loca", "maxp", "post", "vmtx"]
 )
@@ -40,6 +42,8 @@
 
 COMMON_SCRIPT = "Zyyy"
 
+UNICODE_SCRIPT_ALIASES = MappingProxyType({"Hira": "Hrkt", "Kana": "Hrkt"})
+
 INDIC_SCRIPTS = [
     "Beng",  # Bengali
     "Cham",  # Cham

diff --git a/Lib/ufo2ft/featureWriters/baseFeatureWriter.py b/Lib/ufo2ft/featureWriters/baseFeatureWriter.py
@@ -2,11 +2,10 @@
 from collections import OrderedDict, namedtuple
 from types import SimpleNamespace
 
-from fontTools import unicodedata
-
 from ufo2ft.constants import OPENTYPE_CATEGORIES_KEY
 from ufo2ft.errors import InvalidFeaturesData
 from ufo2ft.featureWriters import ast
+from ufo2ft.util import unicodeScriptExtensions
 
 INSERT_FEATURE_MARKER = r"\s*# Automatic Code.*"
 
@@ -414,7 +413,7 @@ def guessFontScripts(self):
             if glyph.name not in glyphSet or glyph.unicodes is None:
                 continue
             for codepoint in glyph.unicodes:
-                scripts = unicodedata.script_extension(chr(codepoint))
+                scripts = unicodeScriptExtensions(codepoint)
                 if len(scripts) == 1:
                     single_scripts.update(scripts)
 

diff --git a/Lib/ufo2ft/featureWriters/kernFeatureWriter.py b/Lib/ufo2ft/featureWriters/kernFeatureWriter.py
@@ -11,7 +11,7 @@
 
 from ufo2ft.constants import COMMON_SCRIPT, INDIC_SCRIPTS, USE_SCRIPTS
 from ufo2ft.featureWriters import BaseFeatureWriter, ast
-from ufo2ft.util import DFLT_SCRIPTS, classifyGlyphs, quantize
+from ufo2ft.util import DFLT_SCRIPTS, classifyGlyphs, quantize, unicodeScriptExtensions
 
 LOGGER = logging.getLogger(__name__)
 
@@ -130,6 +130,9 @@ class KernFeatureWriter(BaseFeatureWriter):
           pairs that would mix RTL and LTR glyphs, which will not occur in
           applications. Unicode BiDi classes L, AN and EN are considered L, R
           and AL are considered R.
+    * Note: the glyph script determination has the quirk of declaring "Hira" and
+      "Kana" scripts as "Hrkt" so that they are considered one script and can be
+      kerned against each other.
     * Get the kerning groups from the UFO and filter out glyphs not in the
       glyphset and empty groups. Remember which group a glyph is a member of,
       for kern1 and kern2, so we can later reconstruct per-script groups.
@@ -357,7 +360,7 @@ def knownScriptsPerCodepoint(self, uv: int) -> set[str]:
             # anyway.
             return {COMMON_SCRIPT}
         else:
-            script_extension = unicodedata.script_extension(chr(uv))
+            script_extension = unicodeScriptExtensions(uv)
             return script_extension & (self.context.knownScripts | DFLT_SCRIPTS)
 
     def _makeKerningLookups(self):

diff --git a/Lib/ufo2ft/featureWriters/markFeatureWriter.py b/Lib/ufo2ft/featureWriters/markFeatureWriter.py
@@ -4,11 +4,15 @@
 from functools import partial
 
 from fontTools.misc.fixedTools import otRound
-from fontTools.unicodedata import script_extension
 
 from ufo2ft.constants import INDIC_SCRIPTS, USE_SCRIPTS
 from ufo2ft.featureWriters import BaseFeatureWriter, ast
-from ufo2ft.util import classifyGlyphs, quantize, unicodeInScripts
+from ufo2ft.util import (
+    classifyGlyphs,
+    quantize,
+    unicodeInScripts,
+    unicodeScriptExtensions,
+)
 
 
 class AbstractMarkPos:
@@ -867,7 +871,7 @@ def _getAbvmGlyphs(self):
             unicodeIsAbvm = partial(unicodeInScripts, scripts=scriptsUsingAbvm)
 
             def unicodeIsNotAbvm(uv):
-                return bool(script_extension(chr(uv)) - self.scriptsUsingAbvm)
+                return bool(unicodeScriptExtensions(uv) - self.scriptsUsingAbvm)
 
             if any(unicodeIsAbvm(uv) for uv in cmap):
                 # If there are any characters from Indic/USE/Khmer scripts in

diff --git a/Lib/ufo2ft/util.py b/Lib/ufo2ft/util.py
@@ -1,9 +1,11 @@
+from __future__ import annotations
+
 import importlib
 import logging
 import re
 from copy import deepcopy
 from inspect import currentframe, getfullargspec
-from typing import Set
+from typing import Mapping, Set
 
 from fontTools import subset, ttLib, unicodedata
 from fontTools.designspaceLib import DesignSpaceDocument
@@ -13,6 +15,8 @@
 from fontTools.pens.reverseContourPen import ReverseContourPen
 from fontTools.pens.transformPen import TransformPen
 
+from ufo2ft.constants import UNICODE_SCRIPT_ALIASES
+
 logger = logging.getLogger(__name__)
 
 
@@ -321,7 +325,7 @@ def unicodeInScripts(uv, scripts):
     False if it does not intersect.
     Return None for 'Common' script ('Zyyy').
     """
-    sx = unicodedata.script_extension(chr(uv))
+    sx = unicodeScriptExtensions(uv)
     if "Zyyy" in sx:
         return None
     return not sx.isdisjoint(scripts)
@@ -595,3 +599,16 @@ def getMaxComponentDepth(glyph, glyphSet, maxComponentDepth=0):
         maxComponentDepth = max(maxComponentDepth, componentDepth)
 
     return maxComponentDepth
+
+
+def unicodeScriptExtensions(
+    codepoint: int, aliases: Mapping[str, str] = UNICODE_SCRIPT_ALIASES
+) -> set[str]:
+    """Returns the Unicode script extensions for a codepoint, optionally
+    aliasing some scripts.
+
+    This allows lookups to contain more than one script. The most prominent case
+    is being able to kern Hiragana and Katakana against each other, Unicode
+    defines "Hrkt" as an alias for both scripts.
+    """
+    return {aliases.get(s, s) for s in unicodedata.script_extension(chr(codepoint))}
diff --git a/tests/featureWriters/kernFeatureWriter_test.py b/tests/featureWriters/kernFeatureWriter_test.py
@@ -4,10 +4,11 @@
 import pytest
 from fontTools import unicodedata
 
+from ufo2ft.constants import UNICODE_SCRIPT_ALIASES
 from ufo2ft.errors import InvalidFeaturesData
 from ufo2ft.featureCompiler import parseLayoutFeatures
 from ufo2ft.featureWriters import KernFeatureWriter, ast
-from ufo2ft.util import DFLT_SCRIPTS
+from ufo2ft.util import DFLT_SCRIPTS, unicodeScriptExtensions
 
 from . import FeatureWriterTest
 
@@ -1651,13 +1652,27 @@ def test_kern_mixed_bidis(caplog, FontClass):
     assert "<one-ar alef-ar 8> with ambiguous direction" in caplog.text
 
 
+def unicodeScript(codepoint: int) -> str:
+    """Returns the Unicode script for a codepoint, combining some
+    scripts into the same bucket.
+
+    This allows lookups to contain more than one script. The most prominent case
+    is being able to kern Hiragana and Katakana against each other, Unicode
+    defines "Hrkt" as an alias for both scripts.
+
+    Note: Keep in sync with unicodeScriptExtensions!
+    """
+    script = unicodedata.script(chr(codepoint))
+    return UNICODE_SCRIPT_ALIASES.get(script, script)
+
+
 def test_kern_zyyy_zinh(FontClass):
     """Test that a sampling of glyphs with a common or inherited script, but a
     disjoint set of explicit script extensions end up in the correct lookups."""
     glyphs = {}
     for i in range(0, 0x110000, 0x10):
-        script = unicodedata.script(chr(i))
-        script_extension = unicodedata.script_extension(chr(i))
+        script = unicodeScript(i)
+        script_extension = unicodeScriptExtensions(i)
         if script not in script_extension:
             assert script in DFLT_SCRIPTS
             name = f"uni{i:04X}"
@@ -1713,6 +1728,14 @@ def test_kern_zyyy_zinh(FontClass):
             pos uniA700 uniA700 27;
         } kern_Hani;
 
+        lookup kern_Hrkt {
+            lookupflag IgnoreMarks;
+            pos uni3010 uni3010 8;
+            pos uni3030 uni3030 9;
+            pos uni30A0 uni30A0 10;
+            pos uniFF70 uniFF70 29;
+        } kern_Hrkt;
+
         lookup kern_Default {
             lookupflag IgnoreMarks;
             pos uni0640 uni0640 0;
@@ -1724,8 +1747,6 @@ def test_kern_zyyy_zinh(FontClass):
             pos uni10130 uni10130 33;
             pos uni102E0 uni102E0 34;
             pos uni102F0 uni102F0 35;
-            pos uni30A0 uni30A0 10;
-            pos uniFF70 uniFF70 29;
         } kern_Default;
 
         feature kern {
@@ -1742,6 +1763,11 @@ def test_kern_zyyy_zinh(FontClass):
             language dflt;
             lookup kern_Default;
             lookup kern_Hani;
+
+            script kana;
+            language dflt;
+            lookup kern_Default;
+            lookup kern_Hrkt;
         } kern;
 
         feature dist {
@@ -1764,6 +1790,57 @@ def test_kern_zyyy_zinh(FontClass):
     )
 
 
+def test_kern_hira_kana_hrkt(FontClass):
+    """Test that Hiragana and Katakana lands in the same lookup and can be
+    kerned against each other and common glyphs are kerned just once."""
+    glyphs = {"a-hira": 0x3042, "a-kana": 0x30A2, "period": ord(".")}
+    kerning = {
+        ("a-hira", "a-hira"): 1,
+        ("a-hira", "a-kana"): 2,
+        ("a-kana", "a-hira"): 3,
+        ("a-kana", "a-kana"): 4,
+        ("period", "period"): 5,
+        ("a-hira", "period"): 6,
+        ("period", "a-hira"): 7,
+        ("a-kana", "period"): 8,
+        ("period", "a-kana"): 9,
+    }
+    ufo = makeUFO(FontClass, glyphs, None, kerning)
+    newFeatures = KernFeatureWriterTest.writeFeatures(ufo)
+
+    assert dedent(str(newFeatures)) == dedent(
+        """\
+        lookup kern_Hrkt {
+            lookupflag IgnoreMarks;
+            pos a-hira a-hira 1;
+            pos a-hira a-kana 2;
+            pos a-hira period 6;
+            pos a-kana a-hira 3;
+            pos a-kana a-kana 4;
+            pos a-kana period 8;
+            pos period a-hira 7;
+            pos period a-kana 9;
+        } kern_Hrkt;
+
+        lookup kern_Default {
+            lookupflag IgnoreMarks;
+            pos period period 5;
+        } kern_Default;
+
+        feature kern {
+            script DFLT;
+            language dflt;
+            lookup kern_Default;
+
+            script kana;
+            language dflt;
+            lookup kern_Default;
+            lookup kern_Hrkt;
+        } kern;
+        """
+    )
+
+
 if __name__ == "__main__":
     import sys
 

diff --git a/tox.ini b/tox.ini
@@ -48,7 +48,7 @@ commands =
 
 [flake8]
 select = C, E, F, W, B, B9
-ignore = E203, E266, E501, W503
+ignore = E203, E266, E501, W503, B905
 max-line-length = 88
 exclude = .git, __pycache__, build, dist, .eggs, .tox, venv, venv*, .venv, .venv*