Skip to content

Commit

Permalink
Extend to allow for only 1 corpus to be processed.
Browse files Browse the repository at this point in the history
  • Loading branch information
TomazErjavec committed Sep 12, 2023
1 parent 266d290 commit b4d08c5
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
4 changes: 4 additions & 0 deletions Corpora/Sources-TEI/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
###### Preparing the received corpora
CORPUS = HU
one:
../../Scripts/parlamint-factorize-corpora.pl ../Taxonomies 'Orig/ParlaMint-${CORPUS}.TEI*'
../../Scripts/add-metadata.pl ../Orientations ../Ministers 'Orig/ParlaMint-${CORPUS}.TEI*' '.'

nohup:
nohup time make prep > preparation.log &
Expand Down
7 changes: 5 additions & 2 deletions Scripts/parlamint-factorize-corpora.pl
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@

binmode(STDERR, 'utf8');

foreach $corpDir (sort glob "$inDir/ParlaMint-*.TEI*") {
if ($inDir =~ /ParlaMint-[A-Z-]+\.TEI/) {$corpDirs = $inDir}
else {$corpDirs = "$inDir/ParlaMint-*.TEI*"}
foreach $corpDir (sort glob($corpDirs)) {
my $param = '';
($country, $anaSuffix) = $corpDir =~ /ParlaMint-([A-Z-]+)\.TEI(\..+)?/ or die;
$anaSuffix = '' unless $anaSuffix;
Expand All @@ -76,7 +78,8 @@
# Factorise
if ($anaSuffix) {
#For .ana we will also need the .TEI root file
my $teiRoot = "$inDir/ParlaMint-$country.TEI/ParlaMint-$country.xml";
my $teiRoot = "$corpDir/ParlaMint-$country.xml";
$teiRoot =~ s|\Q$anaSuffix\E||;
if (-e $teiRoot){$param = " teiRoot=$teiRoot "}
else {print STDERR "WARN: $teiRoot not found\n"}
}
Expand Down

0 comments on commit b4d08c5

Please sign in to comment.