Skip to content

Commit

Permalink
Import Documents: add ID options for CoNLL-U
Browse files Browse the repository at this point in the history
  • Loading branch information
ajdapretnar committed Dec 5, 2024
1 parent 15852b8 commit 8d171fd
Showing 1 changed file with 7 additions and 3 deletions.
10 changes: 7 additions & 3 deletions orangecontrib/text/import_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ def make_text_data(self):
class ImportDocuments:
META_DATA_FILE_KEY = "Text file"
# this is what we will merge meta data on, change to user-set variable
CONLLU_META_DATA = "ID"
CONLLU_META_DATA = ["ID", "Text_ID"]

def __init__(
self,
Expand Down Expand Up @@ -513,13 +513,17 @@ def _add_metadata(self, corpus: Corpus) -> Corpus:
or self._meta_data is None
or (
self.META_DATA_FILE_KEY not in self._meta_data.columns
and self.CONLLU_META_DATA not in self._meta_data.columns
and not any(i in self._meta_data.columns for i in
self.CONLLU_META_DATA)
)
):
return corpus

if self.is_conllu:
df = self._meta_data.set_index(self.CONLLU_META_DATA)
# find the first matching column
match_id = next((idx for idx in self.CONLLU_META_DATA if idx in self._meta_data.columns),
None)
df = self._meta_data.set_index(match_id)
path_column = corpus.get_column("utterance")
else:
df = self._meta_data.set_index(
Expand Down

0 comments on commit 8d171fd

Please sign in to comment.