From 3e92396914a083115f204264df10e398960d7467 Mon Sep 17 00:00:00 2001 From: Ajda Date: Fri, 5 Jul 2024 13:47:15 +0200 Subject: [PATCH] Add Spacy utils --- orangecontrib/text/corpus.py | 4 ++++ orangecontrib/text/language.py | 2 ++ requirements.txt | 1 + 3 files changed, 7 insertions(+) diff --git a/orangecontrib/text/corpus.py b/orangecontrib/text/corpus.py index b0660a830..994ac0111 100644 --- a/orangecontrib/text/corpus.py +++ b/orangecontrib/text/corpus.py @@ -414,6 +414,10 @@ def has_tokens(self): """ Return whether corpus is preprocessed or not. """ return self._tokens is not None + def has_tags(self): + """ Return whether corpus is POS tagged or not. """ + return self._pos_tags is not None + def _base_tokens(self): from orangecontrib.text.preprocess import BASE_TRANSFORMER, \ BASE_TOKENIZER, PreprocessorList diff --git a/orangecontrib/text/language.py b/orangecontrib/text/language.py index fb64ddde3..37a5361aa 100644 --- a/orangecontrib/text/language.py +++ b/orangecontrib/text/language.py @@ -99,6 +99,8 @@ "uk": "Ukrainian", "ur": "Urdu", "vi": "Vietnamese", + # Spacy code for multi-language model + "xx": "Multi-language", "zh": "Chinese", "zh_char": "Chinese - Chinese Characters", None: None, diff --git a/requirements.txt b/requirements.txt index 36c489f1d..b9ab4c5b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,6 +25,7 @@ serverfiles simhash >=1.11 shapely >=2.0 six +spacy tweepy >=4.0.0 ufal.udpipe >=1.2.0.3 trimesh >=3.9.8 # required by alphashape