Skip to content

Commit

Permalink
Merge pull request #302 from dkpro/refactoring/301-Make-better-use-of…
Browse files Browse the repository at this point in the history
…-type-constants-in-code

#301 - Make better use of type constants in code
  • Loading branch information
reckart authored Feb 4, 2024
2 parents 19b096e + c9df9d4 commit 90b6bdb
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 56 deletions.
6 changes: 3 additions & 3 deletions cassis/typesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
"uima.cas.DoubleArray",
"uima.cas.Sofa",
"uima.cas.AnnotationBase",
"uima.tcas.Annotation",
TYPE_NAME_ANNOTATION,
}

_PRIMITIVE_TYPES = {
Expand Down Expand Up @@ -828,7 +828,7 @@ def __init__(self, add_document_annotation_type: bool = True):
self.create_feature(t, name="sofa", rangeType="uima.cas.Sofa")

# Annotation
t = self.create_type(name="uima.tcas.Annotation", supertypeName="uima.cas.AnnotationBase")
t = self.create_type(name=TYPE_NAME_ANNOTATION, supertypeName="uima.cas.AnnotationBase")
self.create_feature(t, name="begin", rangeType="uima.cas.Integer")
self.create_feature(t, name="end", rangeType="uima.cas.Integer")

Expand All @@ -846,7 +846,7 @@ def contains_type(self, typename: str):
"""
return typename in self._types

def create_type(self, name: str, supertypeName: str = "uima.tcas.Annotation", description: str = None) -> Type:
def create_type(self, name: str, supertypeName: str = TYPE_NAME_ANNOTATION, description: str = None) -> Type:
"""Creates a new type and return it.
Args:
Expand Down
2 changes: 1 addition & 1 deletion cassis/xmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def deserialize(self, source: Union[IO, str], typesystem: TypeSystem, lenient: b
fs = feature_structures[member_id]

# Map from offsets in UIMA UTF-16 based offsets to Unicode codepoints
if typesystem.is_instance_of(fs.type.name, "uima.tcas.Annotation"):
if typesystem.is_instance_of(fs.type.name, TYPE_NAME_ANNOTATION):
fs.begin = sofa._offset_converter.external_to_python(fs.begin)
fs.end = sofa._offset_converter.external_to_python(fs.end)

Expand Down
10 changes: 5 additions & 5 deletions tests/test_cas.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def test_select_also_returns_parent_instances(small_typesystem_xml, tokens, sent
cas = Cas(typesystem=load_typesystem(small_typesystem_xml))
cas.add_all(annotations)

actual_annotations = list(cas.select("uima.tcas.Annotation"))
actual_annotations = list(cas.select(TYPE_NAME_ANNOTATION))

assert set(actual_annotations) == set(annotations)

Expand Down Expand Up @@ -399,13 +399,13 @@ def test_removing_of_existing_fs_works(small_typesystem_xml, tokens, sentences):
for token in tokens:
cas.remove(token)

actual_annotations = list(cas.select("uima.tcas.Annotation"))
actual_annotations = list(cas.select(TYPE_NAME_ANNOTATION))
assert set(actual_annotations) == set(sentences)

for sentence in sentences:
cas.remove(sentence)

actual_annotations = list(cas.select("uima.tcas.Annotation"))
actual_annotations = list(cas.select(TYPE_NAME_ANNOTATION))
assert set(actual_annotations) == set()


Expand All @@ -420,8 +420,8 @@ def test_removing_removes_from_view(small_typesystem_xml, tokens, sentences):
for annotation in annotations:
cas.remove(annotation)

assert set(cas.select("uima.tcas.Annotation")) == set()
assert set(view.select("uima.tcas.Annotation")) == set(annotations)
assert set(cas.select(TYPE_NAME_ANNOTATION)) == set()
assert set(view.select(TYPE_NAME_ANNOTATION)) == set(annotations)


def test_removing_throws_if_fs_not_found(small_typesystem_xml, tokens, sentences):
Expand Down
58 changes: 29 additions & 29 deletions tests/test_json.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json

from cassis.typesystem import TYPE_NAME_ANNOTATION, TypeSystemMode
from cassis.typesystem import TYPE_NAME_ANNOTATION, TypeSystemMode, TYPE_NAME_DOCUMENT_ANNOTATION
from tests.fixtures import *
from tests.test_files.test_cas_generators import MultiFeatureRandomCasGenerator, MultiTypeRandomCasGenerator
from tests.util import assert_json_equal
Expand All @@ -13,69 +13,69 @@
(os.path.join(SER_REF_DIR, "casWithSofaDataArray"), []),
(os.path.join(SER_REF_DIR, "casWithSofaDataURI"), []),
(os.path.join(SER_REF_DIR, "casWithFloatingPointSpecialValues"), []),
(os.path.join(SER_REF_DIR, "casWithText"), [["uima.tcas.DocumentAnnotation", 0, 15, "This is a test."]]),
(os.path.join(SER_REF_DIR, "casWithText"), [[TYPE_NAME_DOCUMENT_ANNOTATION, 0, 15, "This is a test."]]),
(
os.path.join(SER_REF_DIR, "casWithoutTextButWithAnnotations"),
[
["uima.tcas.Annotation", 0, 4, None],
["uima.tcas.Annotation", 5, 7, None],
["uima.tcas.Annotation", 8, 9, None],
["uima.tcas.Annotation", 10, 14, None],
[TYPE_NAME_ANNOTATION, 0, 4, None],
[TYPE_NAME_ANNOTATION, 5, 7, None],
[TYPE_NAME_ANNOTATION, 8, 9, None],
[TYPE_NAME_ANNOTATION, 10, 14, None],
],
),
(
os.path.join(SER_REF_DIR, "casWithTextAndAnnotations"),
[
["uima.tcas.Annotation", 0, 4, "This"],
["uima.tcas.Annotation", 5, 7, "is"],
["uima.tcas.Annotation", 8, 9, "a"],
["uima.tcas.Annotation", 10, 14, "test"],
["uima.tcas.DocumentAnnotation", 0, 14, "This is a test"],
[TYPE_NAME_ANNOTATION, 0, 4, "This"],
[TYPE_NAME_ANNOTATION, 5, 7, "is"],
[TYPE_NAME_ANNOTATION, 8, 9, "a"],
[TYPE_NAME_ANNOTATION, 10, 14, "test"],
[TYPE_NAME_DOCUMENT_ANNOTATION, 0, 14, "This is a test"],
],
),
(
os.path.join(SER_REF_DIR, "casWithEmojiUnicodeTextAndAnnotations"),
[
["uima.tcas.Annotation", 0, 1, "🥳", b"\xf0\x9f\xa5\xb3"],
["uima.tcas.Annotation", 2, 6, "This"],
[TYPE_NAME_ANNOTATION, 0, 1, "🥳", b"\xf0\x9f\xa5\xb3"],
[TYPE_NAME_ANNOTATION, 2, 6, "This"],
[
"uima.tcas.Annotation",
TYPE_NAME_ANNOTATION,
7,
12,
"👳🏻\u200d♀️",
b"\xf0\x9f\x91\xb3\xf0\x9f\x8f\xbb\xe2\x80\x8d\xe2\x99\x80\xef\xb8\x8f",
],
["uima.tcas.Annotation", 13, 15, "is"],
["uima.tcas.Annotation", 16, 17, "✆", b"\xe2\x9c\x86"],
["uima.tcas.Annotation", 18, 19, "a"],
[TYPE_NAME_ANNOTATION, 13, 15, "is"],
[TYPE_NAME_ANNOTATION, 16, 17, "✆", b"\xe2\x9c\x86"],
[TYPE_NAME_ANNOTATION, 18, 19, "a"],
[
"uima.tcas.Annotation",
TYPE_NAME_ANNOTATION,
20,
25,
"🧔🏾\u200d♂️",
b"\xf0\x9f\xa7\x94\xf0\x9f\x8f\xbe\xe2\x80\x8d\xe2\x99\x82\xef\xb8\x8f",
],
["uima.tcas.Annotation", 26, 30, "test"],
["uima.tcas.Annotation", 31, 32, "👻", b"\xf0\x9f\x91\xbb"],
["uima.tcas.DocumentAnnotation", 0, 32, "🥳 This 👳🏻\u200d♀️ is ✆ a 🧔🏾\u200d♂️ test 👻"],
[TYPE_NAME_ANNOTATION, 26, 30, "test"],
[TYPE_NAME_ANNOTATION, 31, 32, "👻", b"\xf0\x9f\x91\xbb"],
[TYPE_NAME_DOCUMENT_ANNOTATION, 0, 32, "🥳 This 👳🏻\u200d♀️ is ✆ a 🧔🏾\u200d♂️ test 👻"],
],
),
(
os.path.join(SER_REF_DIR, "casWithLeftToRightTextAndAnnotations"),
[
["uima.tcas.Annotation", 0, 3, "هذا"],
["uima.tcas.Annotation", 4, 10, "اختبار"],
["uima.tcas.DocumentAnnotation", 0, 10, "هذا اختبار"],
[TYPE_NAME_ANNOTATION, 0, 3, "هذا"],
[TYPE_NAME_ANNOTATION, 4, 10, "اختبار"],
[TYPE_NAME_DOCUMENT_ANNOTATION, 0, 10, "هذا اختبار"],
],
),
(
os.path.join(SER_REF_DIR, "casWithTraditionalChineseTextAndAnnotations"),
[
["uima.tcas.Annotation", 0, 1, "這"],
["uima.tcas.Annotation", 1, 2, "是"],
["uima.tcas.Annotation", 2, 4, "一個"],
["uima.tcas.Annotation", 4, 6, "測試"],
["uima.tcas.DocumentAnnotation", 0, 6, "這是一個測試"],
[TYPE_NAME_ANNOTATION, 0, 1, "這"],
[TYPE_NAME_ANNOTATION, 1, 2, "是"],
[TYPE_NAME_ANNOTATION, 2, 4, "一個"],
[TYPE_NAME_ANNOTATION, 4, 6, "測試"],
[TYPE_NAME_DOCUMENT_ANNOTATION, 0, 6, "這是一個測試"],
],
),
(
Expand Down
36 changes: 18 additions & 18 deletions tests/test_typesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
TYPE_NAME_STRING_ARRAY,
TYPE_NAME_TOP,
TypeCheckError,
is_predefined,
is_predefined, TYPE_NAME_DOCUMENT_ANNOTATION,
)
from tests.fixtures import *
from tests.util import assert_xml_equal
Expand Down Expand Up @@ -184,7 +184,7 @@ def test_type_can_be_created():
test_type = typesystem.create_type(name="test.Type")

assert test_type.name == "test.Type"
assert test_type.supertype.name == "uima.tcas.Annotation"
assert test_type.supertype.name == TYPE_NAME_ANNOTATION


def test_type_can_create_instances():
Expand Down Expand Up @@ -307,7 +307,7 @@ def test_type_inherits_from_annotation():
("uima.cas.DoubleArray", True),
("uima.cas.Sofa", True),
("uima.cas.AnnotationBase", True),
("uima.tcas.Annotation", True),
(TYPE_NAME_ANNOTATION, True),
("example.TypeA", False),
("example.TypeB", False),
("example.TypeC", False),
Expand All @@ -327,8 +327,8 @@ def test_is_predefined(type_name: str, expected: bool):
True,
),
("de.tudarmstadt.ukp.dkpro.core.api.semantics.type.SemArgLink", "uima.cas.String", False),
("de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.VC", "uima.tcas.Annotation", True),
("de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation", "uima.tcas.Annotation", True),
("de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.VC", TYPE_NAME_ANNOTATION, True),
("de.tudarmstadt.ukp.dkpro.core.api.transform.type.SofaChangeAnnotation", TYPE_NAME_ANNOTATION, True),
(
"de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.SpellingAnomaly",
"de.tudarmstadt.ukp.dkpro.core.api.anomaly.type.GrammarAnomaly",
Expand Down Expand Up @@ -581,9 +581,9 @@ def test_deserializing_small_typesystem(small_typesystem_xml):
assert len(list(typesystem.get_types())) == 3

# Assert annotation type
annotation_type = typesystem.get_type("uima.tcas.DocumentAnnotation")
assert annotation_type.name == "uima.tcas.DocumentAnnotation"
assert annotation_type.supertype.name == "uima.tcas.Annotation"
annotation_type = typesystem.get_type(TYPE_NAME_DOCUMENT_ANNOTATION)
assert annotation_type.name == TYPE_NAME_DOCUMENT_ANNOTATION
assert annotation_type.supertype.name == TYPE_NAME_ANNOTATION

language_feature = annotation_type.get_feature("language")
assert language_feature.name == "language"
Expand All @@ -592,7 +592,7 @@ def test_deserializing_small_typesystem(small_typesystem_xml):
# Assert token type
token_type = typesystem.get_type("cassis.Token")
assert token_type.name == "cassis.Token"
assert token_type.supertype.name == "uima.tcas.Annotation"
assert token_type.supertype.name == TYPE_NAME_ANNOTATION

token_id_feature = token_type.get_feature("id")
assert token_id_feature.name == "id"
Expand All @@ -606,7 +606,7 @@ def test_deserializing_small_typesystem(small_typesystem_xml):
# Assert sentence type
sentence_type = typesystem.get_type("cassis.Sentence")
assert sentence_type.name == "cassis.Sentence"
assert sentence_type.supertype.name == "uima.tcas.Annotation"
assert sentence_type.supertype.name == TYPE_NAME_ANNOTATION

sentence_type_id_feature = sentence_type.get_feature("id")
assert sentence_type_id_feature.name == "id"
Expand Down Expand Up @@ -690,10 +690,10 @@ def test_that_typesystem_with_redefined_documentation_annotation_works(
@pytest.mark.parametrize(
"name, rangeTypeName, elementType, multipleReferencesAllowed",
[
("arrayMultiRefsOk", "uima.cas.FSArray", "uima.tcas.Annotation", True), # Same multiref
("arrayNoMultiRefs", "uima.cas.FSArray", "uima.tcas.Annotation", None), # Default multiref
("arrayNoMultiRefs", "uima.cas.FSArray", "uima.tcas.Annotation", None), # Same elementType
("listMultiRefsOk", "uima.cas.FSList", "uima.tcas.Annotation", None), # Default elementType
("arrayMultiRefsOk", "uima.cas.FSArray", TYPE_NAME_ANNOTATION, True), # Same multiref
("arrayNoMultiRefs", "uima.cas.FSArray", TYPE_NAME_ANNOTATION, None), # Default multiref
("arrayNoMultiRefs", "uima.cas.FSArray", TYPE_NAME_ANNOTATION, None), # Same elementType
("listMultiRefsOk", "uima.cas.FSList", TYPE_NAME_ANNOTATION, None), # Default elementType
("arrayTop", "uima.cas.FSArray", None, None), # No elementType,
],
)
Expand Down Expand Up @@ -723,9 +723,9 @@ def test_that_merging_compatible_typesystem_works(name, rangeTypeName, elementTy
@pytest.mark.parametrize(
"name, rangeTypeName, elementType, multipleReferencesAllowed",
[
("arrayNoElementType", "uima.cas.FSArray", "uima.tcas.Annotation", None), # Different elementTypes
("arrayNoElementType", "uima.cas.FSArray", TYPE_NAME_ANNOTATION, None), # Different elementTypes
("arrayMultiRefsOk", "uima.cas.FSArray", "uima.cas.AnnotationBase", True), # Different elementTypes
("arrayMultiRefsOk", "uima.cas.FSList", "uima.tcas.Annotation", True), # Incompatible rangeTypes
("arrayMultiRefsOk", "uima.cas.FSList", TYPE_NAME_ANNOTATION, True), # Incompatible rangeTypes
("arrayMultiRefsOk", "uima.cas.FSArray", "uima.cas.TOP", False), # Different multiref
("arrayNoMultiRefs", "uima.cas.FSArray", "uima.cas.TOP", True), # Different multiref
("arrayMultiRefsOk", "uima.cas.FSArray", "uima.cas.TOP", None), # Different multiref default
Expand Down Expand Up @@ -754,10 +754,10 @@ def test_that_merging_incompatible_typesystem_throws(name, rangeTypeName, elemen
@pytest.mark.filterwarnings("ignore:Feature with name")
def test_that_merging_types_with_different_compatible_supertypes_works():
ts1 = TypeSystem()
ts1.create_type("test.Sub", description="Example type.", supertypeName="uima.tcas.Annotation")
ts1.create_type("test.Sub", description="Example type.", supertypeName=TYPE_NAME_ANNOTATION)

ts2 = TypeSystem()
ts2.create_type("test.Super", description="Example type.", supertypeName="uima.tcas.Annotation")
ts2.create_type("test.Super", description="Example type.", supertypeName=TYPE_NAME_ANNOTATION)
ts2.create_type("test.Sub", description="Example type.", supertypeName="test.Super")

result = merge_typesystems(ts1, ts2)
Expand Down

0 comments on commit 90b6bdb

Please sign in to comment.