Merge pull request #302 from dkpro/refactoring/301-Make-better-use-of…

…-type-constants-in-code #301 - Make better use of type constants in code
dkpro · Feb 4, 2024 · 90b6bdb · 90b6bdb
2 parents 19b096e + c9df9d4
commit 90b6bdb
Show file tree

Hide file tree

Showing 5 changed files with 56 additions and 56 deletions.
diff --git a/cassis/typesystem.py b/cassis/typesystem.py
@@ -111,7 +111,7 @@
     "uima.cas.DoubleArray",
     "uima.cas.Sofa",
     "uima.cas.AnnotationBase",
-    "uima.tcas.Annotation",
+    TYPE_NAME_ANNOTATION,
 }
 
 _PRIMITIVE_TYPES = {
@@ -828,7 +828,7 @@ def __init__(self, add_document_annotation_type: bool = True):
         self.create_feature(t, name="sofa", rangeType="uima.cas.Sofa")
 
         # Annotation
-        t = self.create_type(name="uima.tcas.Annotation", supertypeName="uima.cas.AnnotationBase")
+        t = self.create_type(name=TYPE_NAME_ANNOTATION, supertypeName="uima.cas.AnnotationBase")
         self.create_feature(t, name="begin", rangeType="uima.cas.Integer")
         self.create_feature(t, name="end", rangeType="uima.cas.Integer")
 
@@ -846,7 +846,7 @@ def contains_type(self, typename: str):
         """
         return typename in self._types
 
-    def create_type(self, name: str, supertypeName: str = "uima.tcas.Annotation", description: str = None) -> Type:
+    def create_type(self, name: str, supertypeName: str = TYPE_NAME_ANNOTATION, description: str = None) -> Type:
         """Creates a new type and return it.
 
         Args:

diff --git a/cassis/xmi.py b/cassis/xmi.py
@@ -315,7 +315,7 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b
                 fs = feature_structures[member_id]
 
                 # Map from offsets in UIMA UTF-16 based offsets to Unicode codepoints
-                if typesystem.is_instance_of(fs.type.name, "uima.tcas.Annotation"):
+                if typesystem.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION):
                     fs.begin = sofa._offset_converter.external_to_python(fs.begin)
                     fs.end = sofa._offset_converter.external_to_python(fs.end)
 

diff --git a/tests/test_cas.py b/tests/test_cas.py
@@ -133,7 +133,7 @@ def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sent
     cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
     cas.add_all(annotations)
 
-    actual_annotations = list(cas.select("uima.tcas.Annotation"))
+    actual_annotations = list(cas.select(TYPE_NAME_ANNOTATION))
 
     assert set(actual_annotations) == set(annotations)
 
@@ -399,13 +399,13 @@ def test_removing_of_existing_fs_works(small_typesystem_xml, tokens, sentences):
     for token in tokens:
         cas.remove(token)
 
-    actual_annotations = list(cas.select("uima.tcas.Annotation"))
+    actual_annotations = list(cas.select(TYPE_NAME_ANNOTATION))
     assert set(actual_annotations) == set(sentences)
 
     for sentence in sentences:
         cas.remove(sentence)
 
-    actual_annotations = list(cas.select("uima.tcas.Annotation"))
+    actual_annotations = list(cas.select(TYPE_NAME_ANNOTATION))
     assert set(actual_annotations) == set()
 
 
@@ -420,8 +420,8 @@ def test_removing_removes_from_view(small_typesystem_xml, tokens, sentences):
     for annotation in annotations:
         cas.remove(annotation)
 
-    assert set(cas.select("uima.tcas.Annotation")) == set()
-    assert set(view.select("uima.tcas.Annotation")) == set(annotations)
+    assert set(cas.select(TYPE_NAME_ANNOTATION)) == set()
+    assert set(view.select(TYPE_NAME_ANNOTATION)) == set(annotations)
 
 
 def test_removing_throws_if_fs_not_found(small_typesystem_xml, tokens, sentences):

diff --git a/tests/test_json.py b/tests/test_json.py
@@ -1,6 +1,6 @@
 import json
 
-from cassis.typesystem import TYPE_NAME_ANNOTATION, TypeSystemMode
+from cassis.typesystem import TYPE_NAME_ANNOTATION, TypeSystemMode, TYPE_NAME_DOCUMENT_ANNOTATION
 from tests.fixtures import *
 from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
 from tests.util import assert_json_equal
@@ -13,69 +13,69 @@
     (os.path.join(SER_REF_DIR, "casWithSofaDataArray"), []),
     (os.path.join(SER_REF_DIR, "casWithSofaDataURI"), []),
     (os.path.join(SER_REF_DIR, "casWithFloatingPointSpecialValues"), []),
-    (os.path.join(SER_REF_DIR, "casWithText"), [["uima.tcas.DocumentAnnotation", 0, 15, "This is a test."]]),
+    (os.path.join(SER_REF_DIR, "casWithText"), [[TYPE_NAME_DOCUMENT_ANNOTATION, 0, 15, "This is a test."]]),
     (
         os.path.join(SER_REF_DIR, "casWithoutTextButWithAnnotations"),
         [
-            ["uima.tcas.Annotation", 0, 4, None],
-            ["uima.tcas.Annotation", 5, 7, None],
-            ["uima.tcas.Annotation", 8, 9, None],
-            ["uima.tcas.Annotation", 10, 14, None],
+            [TYPE_NAME_ANNOTATION, 0, 4, None],
+            [TYPE_NAME_ANNOTATION, 5, 7, None],
+            [TYPE_NAME_ANNOTATION, 8, 9, None],
+            [TYPE_NAME_ANNOTATION, 10, 14, None],
         ],
     ),
     (
         os.path.join(SER_REF_DIR, "casWithTextAndAnnotations"),
         [
-            ["uima.tcas.Annotation", 0, 4, "This"],
-            ["uima.tcas.Annotation", 5, 7, "is"],
-            ["uima.tcas.Annotation", 8, 9, "a"],
-            ["uima.tcas.Annotation", 10, 14, "test"],
-            ["uima.tcas.DocumentAnnotation", 0, 14, "This is a test"],
+            [TYPE_NAME_ANNOTATION, 0, 4, "This"],
+            [TYPE_NAME_ANNOTATION, 5, 7, "is"],
+            [TYPE_NAME_ANNOTATION, 8, 9, "a"],
+            [TYPE_NAME_ANNOTATION, 10, 14, "test"],
+            [TYPE_NAME_DOCUMENT_ANNOTATION, 0, 14, "This is a test"],
         ],
     ),
     (
         os.path.join(SER_REF_DIR, "casWithEmojiUnicodeTextAndAnnotations"),
         [
-            ["uima.tcas.Annotation", 0, 1, "🥳", b"\xf0\x9f\xa5\xb3"],
-            ["uima.tcas.Annotation", 2, 6, "This"],
+            [TYPE_NAME_ANNOTATION, 0, 1, "🥳", b"\xf0\x9f\xa5\xb3"],
+            [TYPE_NAME_ANNOTATION, 2, 6, "This"],
             [
-                "uima.tcas.Annotation",
+                TYPE_NAME_ANNOTATION,
                 7,
                 12,
                 "👳🏻\u200d♀️",
                 b"\xf0\x9f\x91\xb3\xf0\x9f\x8f\xbb\xe2\x80\x8d\xe2\x99\x80\xef\xb8\x8f",
             ],
-            ["uima.tcas.Annotation", 13, 15, "is"],
-            ["uima.tcas.Annotation", 16, 17, "✆", b"\xe2\x9c\x86"],
-            ["uima.tcas.Annotation", 18, 19, "a"],
+            [TYPE_NAME_ANNOTATION, 13, 15, "is"],
+            [TYPE_NAME_ANNOTATION, 16, 17, "✆", b"\xe2\x9c\x86"],
+            [TYPE_NAME_ANNOTATION, 18, 19, "a"],
             [
-                "uima.tcas.Annotation",
+                TYPE_NAME_ANNOTATION,
                 20,
                 25,
                 "🧔🏾\u200d♂️",
                 b"\xf0\x9f\xa7\x94\xf0\x9f\x8f\xbe\xe2\x80\x8d\xe2\x99\x82\xef\xb8\x8f",
             ],
-            ["uima.tcas.Annotation", 26, 30, "test"],
-            ["uima.tcas.Annotation", 31, 32, "👻", b"\xf0\x9f\x91\xbb"],
-            ["uima.tcas.DocumentAnnotation", 0, 32, "🥳 This 👳🏻\u200d♀️ is ✆ a 🧔🏾\u200d♂️ test 👻"],
+            [TYPE_NAME_ANNOTATION, 26, 30, "test"],
+            [TYPE_NAME_ANNOTATION, 31, 32, "👻", b"\xf0\x9f\x91\xbb"],
+            [TYPE_NAME_DOCUMENT_ANNOTATION, 0, 32, "🥳 This 👳🏻\u200d♀️ is ✆ a 🧔🏾\u200d♂️ test 👻"],
         ],
     ),
     (
         os.path.join(SER_REF_DIR, "casWithLeftToRightTextAndAnnotations"),
         [
-            ["uima.tcas.Annotation", 0, 3, "هذا"],
-            ["uima.tcas.Annotation", 4, 10, "اختبار"],
-            ["uima.tcas.DocumentAnnotation", 0, 10, "هذا اختبار"],
+            [TYPE_NAME_ANNOTATION, 0, 3, "هذا"],
+            [TYPE_NAME_ANNOTATION, 4, 10, "اختبار"],
+            [TYPE_NAME_DOCUMENT_ANNOTATION, 0, 10, "هذا اختبار"],
         ],
     ),
     (
         os.path.join(SER_REF_DIR, "casWithTraditionalChineseTextAndAnnotations"),
         [
-            ["uima.tcas.Annotation", 0, 1, "這"],
-            ["uima.tcas.Annotation", 1, 2, "是"],
-            ["uima.tcas.Annotation", 2, 4, "一個"],
-            ["uima.tcas.Annotation", 4, 6, "測試"],
-            ["uima.tcas.DocumentAnnotation", 0, 6, "這是一個測試"],
+            [TYPE_NAME_ANNOTATION, 0, 1, "這"],
+            [TYPE_NAME_ANNOTATION, 1, 2, "是"],
+            [TYPE_NAME_ANNOTATION, 2, 4, "一個"],
+            [TYPE_NAME_ANNOTATION, 4, 6, "測試"],
+            [TYPE_NAME_DOCUMENT_ANNOTATION, 0, 6, "這是一個測試"],
         ],
     ),
     (

diff --git a/tests/test_typesystem.py b/tests/test_typesystem.py
@@ -16,7 +16,7 @@
     TYPE_NAME_STRING_ARRAY,
     TYPE_NAME_TOP,
     TypeCheckError,
-    is_predefined,
+    is_predefined, TYPE_NAME_DOCUMENT_ANNOTATION,
 )
 from tests.fixtures import *
 from tests.util import assert_xml_equal
@@ -184,7 +184,7 @@ def test_type_can_be_created():
     test_type = typesystem.create_type(name="test.Type")
 
     assert test_type.name == "test.Type"
-    assert test_type.supertype.name == "uima.tcas.Annotation"
+    assert test_type.supertype.name == TYPE_NAME_ANNOTATION
 
 
 def test_type_can_create_instances():
@@ -307,7 +307,7 @@ def test_type_inherits_from_annotation():
         ("uima.cas.DoubleArray", True),
         ("uima.cas.Sofa", True),
         ("uima.cas.AnnotationBase", True),
-        ("uima.tcas.Annotation", True),
+        (TYPE_NAME_ANNOTATION, True),
         ("example.TypeA", False),
         ("example.TypeB", False),
         ("example.TypeC", False),
@@ -327,8 +327,8 @@ def test_is_predefined(type_name: str, expected: bool):
             True,
         ),
         ("de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink", "uima.cas.String", False),
-        ("de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.VC", "uima.tcas.Annotation", True),
-        ("de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation", "uima.tcas.Annotation", True),
+        ("de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.VC", TYPE_NAME_ANNOTATION, True),
+        ("de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation", TYPE_NAME_ANNOTATION, True),
         (
             "de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.SpellingAnomaly",
             "de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.GrammarAnomaly",
@@ -581,9 +581,9 @@ def test_deserializing_small_typesystem(small_typesystem_xml):
     assert len(list(typesystem.get_types())) == 3
 
     # Assert annotation type
-    annotation_type = typesystem.get_type("uima.tcas.DocumentAnnotation")
-    assert annotation_type.name == "uima.tcas.DocumentAnnotation"
-    assert annotation_type.supertype.name == "uima.tcas.Annotation"
+    annotation_type = typesystem.get_type(TYPE_NAME_DOCUMENT_ANNOTATION)
+    assert annotation_type.name == TYPE_NAME_DOCUMENT_ANNOTATION
+    assert annotation_type.supertype.name == TYPE_NAME_ANNOTATION
 
     language_feature = annotation_type.get_feature("language")
     assert language_feature.name == "language"
@@ -592,7 +592,7 @@ def test_deserializing_small_typesystem(small_typesystem_xml):
     # Assert token type
     token_type = typesystem.get_type("cassis.Token")
     assert token_type.name == "cassis.Token"
-    assert token_type.supertype.name == "uima.tcas.Annotation"
+    assert token_type.supertype.name == TYPE_NAME_ANNOTATION
 
     token_id_feature = token_type.get_feature("id")
     assert token_id_feature.name == "id"
@@ -606,7 +606,7 @@ def test_deserializing_small_typesystem(small_typesystem_xml):
     # Assert sentence type
     sentence_type = typesystem.get_type("cassis.Sentence")
     assert sentence_type.name == "cassis.Sentence"
-    assert sentence_type.supertype.name == "uima.tcas.Annotation"
+    assert sentence_type.supertype.name == TYPE_NAME_ANNOTATION
 
     sentence_type_id_feature = sentence_type.get_feature("id")
     assert sentence_type_id_feature.name == "id"
@@ -690,10 +690,10 @@ def test_that_typesystem_with_redefined_documentation_annotation_works(
 @pytest.mark.parametrize(
     "name, rangeTypeName, elementType, multipleReferencesAllowed",
     [
-        ("arrayMultiRefsOk", "uima.cas.FSArray", "uima.tcas.Annotation", True),  # Same multiref
-        ("arrayNoMultiRefs", "uima.cas.FSArray", "uima.tcas.Annotation", None),  # Default multiref
-        ("arrayNoMultiRefs", "uima.cas.FSArray", "uima.tcas.Annotation", None),  # Same elementType
-        ("listMultiRefsOk", "uima.cas.FSList", "uima.tcas.Annotation", None),  # Default elementType
+        ("arrayMultiRefsOk", "uima.cas.FSArray", TYPE_NAME_ANNOTATION, True),  # Same multiref
+        ("arrayNoMultiRefs", "uima.cas.FSArray", TYPE_NAME_ANNOTATION, None),  # Default multiref
+        ("arrayNoMultiRefs", "uima.cas.FSArray", TYPE_NAME_ANNOTATION, None),  # Same elementType
+        ("listMultiRefsOk", "uima.cas.FSList", TYPE_NAME_ANNOTATION, None),  # Default elementType
         ("arrayTop", "uima.cas.FSArray", None, None),  # No elementType,
     ],
 )
@@ -723,9 +723,9 @@ def test_that_merging_compatible_typesystem_works(name, rangeTypeName, elementTy
 @pytest.mark.parametrize(
     "name, rangeTypeName, elementType, multipleReferencesAllowed",
     [
-        ("arrayNoElementType", "uima.cas.FSArray", "uima.tcas.Annotation", None),  # Different elementTypes
+        ("arrayNoElementType", "uima.cas.FSArray", TYPE_NAME_ANNOTATION, None),  # Different elementTypes
         ("arrayMultiRefsOk", "uima.cas.FSArray", "uima.cas.AnnotationBase", True),  # Different elementTypes
-        ("arrayMultiRefsOk", "uima.cas.FSList", "uima.tcas.Annotation", True),  # Incompatible rangeTypes
+        ("arrayMultiRefsOk", "uima.cas.FSList", TYPE_NAME_ANNOTATION, True),  # Incompatible rangeTypes
         ("arrayMultiRefsOk", "uima.cas.FSArray", "uima.cas.TOP", False),  # Different multiref
         ("arrayNoMultiRefs", "uima.cas.FSArray", "uima.cas.TOP", True),  # Different multiref
         ("arrayMultiRefsOk", "uima.cas.FSArray", "uima.cas.TOP", None),  # Different multiref default
@@ -754,10 +754,10 @@ def test_that_merging_incompatible_typesystem_throws(name, rangeTypeName, elemen
 @pytest.mark.filterwarnings("ignore:Feature with name")
 def test_that_merging_types_with_different_compatible_supertypes_works():
     ts1 = TypeSystem()
-    ts1.create_type("test.Sub", description="Example type.", supertypeName="uima.tcas.Annotation")
+    ts1.create_type("test.Sub", description="Example type.", supertypeName=TYPE_NAME_ANNOTATION)
 
     ts2 = TypeSystem()
-    ts2.create_type("test.Super", description="Example type.", supertypeName="uima.tcas.Annotation")
+    ts2.create_type("test.Super", description="Example type.", supertypeName=TYPE_NAME_ANNOTATION)
     ts2.create_type("test.Sub", description="Example type.", supertypeName="test.Super")
 
     result = merge_typesystems(ts1, ts2)