Skip to content

Commit

Permalink
Preprocess - Store settings for unselected preprocessors
Browse files Browse the repository at this point in the history
  • Loading branch information
PrimozGodec committed Mar 14, 2024
1 parent e36fdea commit cc7f4d0
Show file tree
Hide file tree
Showing 2 changed files with 146 additions and 30 deletions.
45 changes: 15 additions & 30 deletions orangecontrib/text/widgets/owpreprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,7 @@ def __edit_finished(self):
pattern = self.__edit.text()
if self.__pattern != pattern:
self.__set_pattern(pattern)
if self.method == self.Regexp:
self.edited.emit()
self.edited.emit()

def parameters(self) -> Dict:
params = super().parameters()
Expand Down Expand Up @@ -555,32 +554,28 @@ def __set_snowball_lang(self, language: str):
self.__snowball_lang = language
self.__combo_sbl.set_current_language(language)
self.changed.emit()
if self.method == self.Snowball:
self.edited.emit()
self.edited.emit()

def __set_udpipe_lang(self, language: str):
if self.__udpipe_lang != language:
self.__udpipe_lang = language
self.__combo_udl.set_current_language(language)
self.changed.emit()
if self.method == self.UDPipe:
self.edited.emit()
self.edited.emit()

def __set_lemmagen_lang(self, language: str):
if self.__lemmagen_lang != language:
self.__lemmagen_lang = language
self.__combo_lemm.set_current_language(language)
self.changed.emit()
if self.method == self.Lemmagen:
self.edited.emit()
self.edited.emit()

def __set_use_tokenizer(self, use: bool):
if self.__use_tokenizer != use:
self.__use_tokenizer = use
self.__check_use.setChecked(use)
self.changed.emit()
if self.method == self.UDPipe:
self.edited.emit()
self.edited.emit()

def parameters(self) -> Dict:
params = super().parameters()
Expand Down Expand Up @@ -738,8 +733,7 @@ def __init__(self, parent=None, **kwargs):
def __sw_loader_activated(self):
self.__sw_file = self.__sw_loader.get_current_file()
self.changed.emit()
if self.Stopwords in self.methods:
self.edited.emit()
self.edited.emit()

def __sw_invalidate(self):
if self.Stopwords in self.methods and self.__sw_file:
Expand All @@ -749,8 +743,7 @@ def __sw_invalidate(self):
def __lx_loader_activated(self):
self.__lx_file = self.__lx_loader.get_current_file()
self.changed.emit()
if self.Lexicon in self.methods:
self.edited.emit()
self.edited.emit()

def __lx_invalidate(self):
if self.Lexicon in self.methods and self.__lx_file:
Expand All @@ -761,37 +754,31 @@ def __edit_finished(self):
pattern = self.__edit.text()
if self.__pattern != pattern:
self.__set_pattern(pattern)
if self.Regexp in self.methods:
self.edited.emit()
self.edited.emit()

def __pos_edit_finished(self):
tags = self.__pos_edit.text()
if self.__pos_tag != tags:
self.__set_tags(tags)
if self.PosTag in self.methods:
self.edited.emit()
self.edited.emit()

def __freq_group_clicked(self):
i = self.__freq_group.checkedId()
if self.__freq_type != i:
self.__set_freq_type(i)
if self.DocFreq in self.methods:
self.edited.emit()
self.edited.emit()

def __rel_spins_edited(self):
if self.DocFreq in self.methods and self.__freq_type == 0:
self.edited.emit()
self.edited.emit()

def __abs_spins_edited(self):
if self.DocFreq in self.methods and self.__freq_type == 1:
self.edited.emit()
self.edited.emit()

def __spin_n_edited(self):
n = self.__spin_n.value()
if self.__n_token != n:
self.__set_n_tokens(n)
if self.MostFreq in self.methods:
self.edited.emit()
self.edited.emit()

def setParameters(self, params: Dict):
super().setParameters(params)
Expand Down Expand Up @@ -822,8 +809,7 @@ def __set_language(self, language: Optional[str]):
self.__sw_lang = language
self.__combo.set_current_language(language)
self.changed.emit()
if self.Stopwords in self.methods:
self.edited.emit()
self.edited.emit()

def __set_sw_path(self, path: RecentPath, paths: List[RecentPath] = []):
self.__sw_loader.recent_paths = paths
Expand All @@ -842,8 +828,7 @@ def __set_includes_numbers(self, includes: bool):
self.__incl_num = includes
self.__check_numbers.setChecked(includes)
self.changed.emit()
if self.Numbers in self.methods:
self.edited.emit()
self.edited.emit()

def __set_pattern(self, pattern: str):
if self.__pattern != pattern:
Expand Down
131 changes: 131 additions & 0 deletions orangecontrib/text/widgets/tests/test_owpreprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from unittest.mock import patch, PropertyMock, MagicMock, Mock

import numpy as np
from AnyQt.QtWidgets import QCheckBox, QLineEdit, QButtonGroup, QDoubleSpinBox, QSpinBox
from Orange.data import Domain, StringVariable
from orangewidget.utils.filedialogs import RecentPath
from Orange.widgets.tests.base import WidgetTest
Expand All @@ -24,6 +25,7 @@
LanguageComboBox,
_DEFAULT_NONE,
UDPipeComboBox,
ValidatedLineEdit,
)


Expand Down Expand Up @@ -180,6 +182,135 @@ def test_no_tokens_left(self):
self.wait_until_finished()
self.assertFalse(self.widget.Warning.no_token_left.is_shown())

def test_all_changes_saved_tokenize(self):
"""Test settings stored also when tokenizer not selected"""
st_settings = {"preprocessors": [("preprocess.tokenize", {"method": 0})]}
settings = {"__version__": 4, "storedsettings": st_settings}
widget = self.create_widget(OWPreprocess, stored_settings=settings)
self.send_signal(self.widget.Inputs.corpus, self.corpus, widget=widget)
self.wait_until_finished()
pattern_le = widget.findChild(ValidatedLineEdit)
pattern_le.setText("abc")
pattern_le.editingFinished.emit()
widget.apply()
self.assertEqual("abc", widget.storedsettings["preprocessors"][0][1]["pattern"])

def test_all_changes_saved_normalize(self):
"""Test settings stored also when normalizer not selected"""
st_settings = {"preprocessors": [("preprocess.normalize", {"method": 0})]}
settings = {"__version__": 4, "storedsettings": st_settings}
widget = self.create_widget(OWPreprocess, stored_settings=settings)
self.send_signal(self.widget.Inputs.corpus, self.corpus, widget=widget)
self.wait_until_finished()

widget.findChildren(LanguageComboBox)[0].setCurrentText("French")
widget.apply()
value = widget.storedsettings["preprocessors"][0][1]["snowball_language"]
self.assertEqual("fr", value)
self.assertEqual(0, widget.storedsettings["preprocessors"][0][1]["method"])

widget.findChild(UDPipeComboBox).setCurrentText("Slovenian")
widget.apply()
value = widget.storedsettings["preprocessors"][0][1]["udpipe_language"]
self.assertEqual("sl", value)
self.assertEqual(0, widget.storedsettings["preprocessors"][0][1]["method"])

widget.findChildren(LanguageComboBox)[2].setCurrentText("Spanish")
widget.apply()
value = widget.storedsettings["preprocessors"][0][1]["lemmagen_language"]
self.assertEqual("es", value)
self.assertEqual(0, widget.storedsettings["preprocessors"][0][1]["method"])

# test UDPIPE checkbox
widget.findChild(QCheckBox).click()
widget.apply()
self.assertTrue(
widget.storedsettings["preprocessors"][0][1]["udpipe_tokenizer"]
)
self.assertEqual(0, widget.storedsettings["preprocessors"][0][1]["method"])

def test_all_changes_saved_filtering(self):
"""Test settings stored also when filter not selected"""
st_settings = {"preprocessors": [("preprocess.filter", {"methods": [1]})]}
settings = {"__version__": 4, "storedsettings": st_settings}
w = self.create_widget(OWPreprocess, stored_settings=settings)
self.send_signal(self.widget.Inputs.corpus, self.corpus, widget=w)
self.wait_until_finished()

# test language combo
w.findChild(LanguageComboBox).setCurrentText("French")
w.apply()
self.assertEqual("fr", w.storedsettings["preprocessors"][0][1]["language"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])

# test include numbers checkbox
w.findChildren(QCheckBox)[8].click()
w.apply()
self.assertTrue(w.storedsettings["preprocessors"][0][1]["incl_num"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])

# test regexp line edit
pattern_le = w.findChild(QLineEdit)
pattern_le.setText("abc")
pattern_le.editingFinished.emit()
w.apply()
self.assertEqual("abc", w.storedsettings["preprocessors"][0][1]["pattern"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])

# test relative, absolute radios
w.findChild(QButtonGroup).button(1).click()
w.apply()
self.assertEqual(1, w.storedsettings["preprocessors"][0][1]["freq_type"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])

# test relative from spin
spin = w.findChild(QDoubleSpinBox)
spin.setValue(0.22)
spin.editingFinished.emit()
w.apply()
self.assertEqual(0.22, w.storedsettings["preprocessors"][0][1]["rel_start"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])

# test relative to spin
spin = w.findChildren(QDoubleSpinBox)[1]
spin.setValue(0.33)
spin.editingFinished.emit()
w.apply()
self.assertEqual(0.33, w.storedsettings["preprocessors"][0][1]["rel_end"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])

# test absolute from spin
spin = w.findChild(QSpinBox)
spin.setValue(22)
spin.editingFinished.emit()
w.apply()
self.assertEqual(22, w.storedsettings["preprocessors"][0][1]["abs_start"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])

# test absolute to spin
spin = w.findChildren(QSpinBox)[1]
spin.setValue(33)
spin.editingFinished.emit()
w.apply()
self.assertEqual(33, w.storedsettings["preprocessors"][0][1]["abs_end"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])

# test most frequent tokens spin
spin = w.findChildren(QSpinBox)[2]
spin.setValue(44)
spin.editingFinished.emit()
w.apply()
self.assertEqual(44, w.storedsettings["preprocessors"][0][1]["n_tokens"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])

# test POS tags line edit
pos_le = w.findChildren(QLineEdit)[-1]
pos_le.setText("JJ")
pos_le.editingFinished.emit()
w.apply()
self.assertEqual("JJ", w.storedsettings["preprocessors"][0][1]["pos_tags"])
self.assertListEqual([1], w.storedsettings["preprocessors"][0][1]["methods"])


@patch(SF_LIST, new=Mock(return_value=SERVER_FILES))
class TestOWPreprocessMigrateSettings(WidgetTest):
Expand Down

0 comments on commit cc7f4d0

Please sign in to comment.