Skip to content

Commit

Permalink
Import Document - Store ISO language in settings
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Jan 26, 2024
1 parent 41583c9 commit 8805947
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 15 deletions.
20 changes: 13 additions & 7 deletions orangecontrib/text/widgets/owimportdocuments.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,7 @@
from orangecontrib.text.corpus import Corpus
from orangecontrib.text.import_documents import ImportDocuments, NoDocumentsException
from orangecontrib.text.language import (
ISO2LANG,
detect_language,
LANG2ISO,
LanguageModel,
detect_language, LanguageModel, DEFAULT_LANGUAGE, LANG2ISO, migrate_language_name
)

# domain for skipped images output
Expand Down Expand Up @@ -124,6 +121,7 @@ class Outputs:
skipped_documents = Output("Skipped documents", Table)

settingsHandler = ImportDocumentContextHandler()
settings_version = 2

LOCAL_FILE, URL = range(2)
source = settings.Setting(LOCAL_FILE)
Expand All @@ -134,7 +132,7 @@ class Outputs:
lemma_cb = settings.Setting(True)
pos_cb = settings.Setting(False)
ner_cb = settings.Setting(False)
language: str = settings.ContextSetting("English")
language: str = settings.ContextSetting(DEFAULT_LANGUAGE)

want_main_area = False
resizing_enabled = False
Expand Down Expand Up @@ -665,7 +663,7 @@ def __onRunFinished(self):
self.n_text_data = len(corpus)
self.n_text_categories = len(corpus.domain.class_var.values) \
if corpus.domain.class_var else 0
self.language = ISO2LANG[corpus.language or detect_language(corpus)]
self.language = corpus.language or detect_language(corpus)
self.openContext(corpus)
else:
self.language = None
Expand Down Expand Up @@ -727,7 +725,7 @@ def commit(self):
if self.is_conllu:
self.add_features()
if self.corpus:
self.corpus.attributes["language"] = LANG2ISO[self.language]
self.corpus.attributes["language"] = self.language
self.Outputs.data.send(self.corpus)
if self.skipped_documents:
skipped_table = (
Expand Down Expand Up @@ -791,6 +789,14 @@ def send_report(self):
items += [('Number of skipped', len(self.skipped_documents))]
self.report_items(items, )

@classmethod
def migrate_context(cls, context, version):
if version < 2:
if "language" in context.values:
language = LANG2ISO[migrate_language_name(context.values["language"])]
context.values["language"] = language
print(context.values["language"])


class UserInterruptError(BaseException):
"""
Expand Down
40 changes: 32 additions & 8 deletions orangecontrib/text/widgets/tests/test_owimportdocuments.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@
class TestOWImportDocuments(WidgetTest):
def setUp(self) -> None:
self.widget: OWImportDocuments = self.create_widget(OWImportDocuments)
path = os.path.join(os.path.dirname(__file__), DATA_PATH)
self.widget.setCurrentPath(path)
self.path = os.path.join(os.path.dirname(__file__), DATA_PATH)
self.widget.setCurrentPath(self.path)
self.widget.reload()
self.wait_until_finished()

def test_current_path(self):
path = os.path.join(os.path.dirname(__file__), DATA_PATH)
self.assertEqual(path, self.widget.currentPath)
self.assertEqual(self.path, self.widget.currentPath)

def test_no_skipped(self):
path = os.path.join(DATA_PATH, "good")
Expand Down Expand Up @@ -132,19 +131,19 @@ def test_load_empty_folder(self):
def tests_context(self):
self.widget: OWImportDocuments = self.create_widget(OWImportDocuments)
# change default to something else to see if language is changed
self.widget.language = "Slovenian"
self.widget.language = "sl"

path = os.path.join(DATA_PATH, "good")
self.widget.setCurrentPath(path)
self.widget.reload()
self.wait_until_finished()

# english is recognized for selected documents
self.assertEqual(self.widget.language, "English")
self.assertEqual(self.widget.language, "en")
self.assertEqual("en", self.get_output(self.widget.Outputs.data).language)
simulate.combobox_activate_item(self.widget.controls.language, "Dutch")

self.assertEqual(self.widget.language, "Dutch")
self.assertEqual(self.widget.language, "nl")
self.assertEqual("nl", self.get_output(self.widget.Outputs.data).language)

# read something else
Expand All @@ -157,9 +156,34 @@ def tests_context(self):
self.widget.setCurrentPath(path)
self.widget.reload()
self.wait_until_finished()
self.assertEqual(self.widget.language, "Dutch")
self.assertEqual(self.widget.language, "nl")
self.assertEqual("nl", self.get_output(self.widget.Outputs.data).language)

def test_migrate_settings(self):
packed_data = self.widget.settingsHandler.pack_data(self.widget)
packed_data["context_settings"][0].values["language"] = "French"
packed_data["context_settings"][0].values["__version__"] = 1

widget = self.create_widget(OWImportDocuments, stored_settings=packed_data)
widget.setCurrentPath(self.path)
widget.reload()
self.wait_until_finished(widget=widget)
self.assertEqual("fr", widget.language)

packed_data["context_settings"][0].values["language"] = "Ancient greek"
widget = self.create_widget(OWImportDocuments, stored_settings=packed_data)
widget.setCurrentPath(self.path)
widget.reload()
self.wait_until_finished(widget=widget)
self.assertEqual("grc", widget.language)

packed_data["context_settings"][0].values["language"] = None
widget = self.create_widget(OWImportDocuments, stored_settings=packed_data)
widget.setCurrentPath(self.path)
widget.reload()
self.wait_until_finished(widget=widget)
self.assertIsNone(widget.language)


if __name__ == "__main__":
unittest.main()

0 comments on commit 8805947

Please sign in to comment.