Skip to content

Commit

Permalink
Rework how enclosure/removing/ignoring cohorts works (fixes #143). Th…
Browse files Browse the repository at this point in the history
…is may yield different results in cases where multiple such methods overlapped, but the new results are semantically better and easier to understand. May need new rule flags for finer control.
  • Loading branch information
TinoDidriksen committed May 22, 2024
1 parent 06fb613 commit 9eb7e54
Show file tree
Hide file tree
Showing 17 changed files with 150 additions and 190 deletions.
25 changes: 8 additions & 17 deletions src/ApertiumApplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,13 @@ void ApertiumApplicator::printReading(Reading* reading, std::ostream& output) {
}

void ApertiumApplicator::printCohort(Cohort* cohort, std::ostream& output, bool profiling) {
if (cohort->local_number == 0 || (cohort->type & CT_REMOVED)) {
if (!cohort->text.empty()) {
u_fprintf(output, "%S", cohort->text.data());
}
return;
}

if (!profiling) {
cohort->unignoreAll();

Expand Down Expand Up @@ -964,11 +971,6 @@ void ApertiumApplicator::printCohort(Cohort* cohort, std::ostream& output, bool
if (!cohort->text.empty()) {
u_fprintf(output, "%S", cohort->text.data());
}
for (auto& c : cohort->removed) {
if (!c->text.empty()) {
u_fprintf(output, "%S", c->text.data());
}
}
}

void ApertiumApplicator::printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling) {
Expand All @@ -977,18 +979,7 @@ void ApertiumApplicator::printSingleWindow(SingleWindow* window, std::ostream& o
u_fprintf(output, "%S", window->text.data());
}

for (uint32_t c = 0; c < window->cohorts.size(); c++) {
Cohort* cohort = window->cohorts[c];

if (c == 0) { // Skip magic cohort
for (auto& c : cohort->removed) {
if (!c->text.empty()) {
u_fprintf(output, "%S", c->text.data());
}
}
continue;
}

for (auto& cohort : window->all_cohorts) {
printCohort(cohort, output, profiling);
u_fflush(output);
}
Expand Down
9 changes: 0 additions & 9 deletions src/Cohort.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,6 @@ Cohort::~Cohort() {
}
free_reading(wread);

for (auto iter : removed) {
free_cohort(iter);
}
if (parent) {
parent->parent->cohort_map.erase(global_number);
parent->parent->dep_window.erase(global_number);
Expand Down Expand Up @@ -125,12 +122,6 @@ void Cohort::clear() {
deleted.clear();
delayed.clear();
wread = nullptr;

for (auto iter : removed) {
free_cohort(iter);
}
removed.clear();
assert(enclosed.empty() && "Enclosed was not empty!");
}

void Cohort::detach() {
Expand Down
5 changes: 2 additions & 3 deletions src/Cohort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ enum {
CT_NUM_CURRENT = (1 << 3),
CT_DEP_DONE = (1 << 4),
CT_AP_UNKNOWN = (1 << 5),
CT_IGNORED = (1 << 6),
};

constexpr auto DEP_NO_PARENT = std::numeric_limits<uint32_t>::max();
Expand All @@ -50,6 +51,7 @@ class Cohort {
// ToDo: Get rid of global_number in favour of Cohort* relations
uint32_t global_number = 0;
uint32_t local_number = 0;
uint32_t enclosed = 0;
Tag* wordform = nullptr;
uint32_t dep_self = 0;
uint32_t dep_parent = DEP_NO_PARENT;
Expand All @@ -69,9 +71,6 @@ class Cohort {
num_t num_max, num_min;
uint32SortedVector dep_children;
boost::dynamic_bitset<> possible_sets;
CohortVector enclosed;
CohortVector removed;
CohortVector ignored_cohorts;
RelationCtn relations;
RelationCtn relations_input;
uint32_t line_number = 0;
Expand Down
4 changes: 1 addition & 3 deletions src/FSTApplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -515,9 +515,7 @@ void FSTApplicator::printSingleWindow(SingleWindow* window, std::ostream& output
}
}

uint32_t cs = UI32(window->cohorts.size());
for (uint32_t c = 0; c < cs; c++) {
Cohort* cohort = window->cohorts[c];
for (auto& cohort : window->all_cohorts) {
printCohort(cohort, output, profiling);
}

Expand Down
8 changes: 1 addition & 7 deletions src/GrammarApplicator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -535,10 +535,6 @@ void GrammarApplicator::printCohort(Cohort* cohort, std::ostream& output, bool p
}
}

for (auto iter : cohort->removed) {
printCohort(iter, output, profiling);
}

if (profiling && cohort == rule_target) {
u_fprintf(output, "# RULE TARGET END\n");
}
Expand Down Expand Up @@ -569,9 +565,7 @@ void GrammarApplicator::printSingleWindow(SingleWindow* window, std::ostream& ou
}
}

uint32_t cs = UI32(window->cohorts.size());
for (uint32_t c = 0; c < cs; c++) {
Cohort* cohort = window->cohorts[c];
for (auto& cohort : window->all_cohorts) {
printCohort(cohort, output, profiling);
}

Expand Down
5 changes: 4 additions & 1 deletion src/GrammarApplicator_matchSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,10 @@ uint32_t GrammarApplicator::doesTagMatchReading(const Reading& reading, const Ta
}
}
else if (tag.type & T_ENCL) {
if (!reading.parent->enclosed.empty()) {
auto sw = reading.parent->parent;
auto c = std::find(sw->all_cohorts.begin() + reading.parent->local_number, sw->all_cohorts.end(), reading.parent);
++c;
if (c != sw->all_cohorts.end() && (*c)->enclosed) {
match = true;
}
}
Expand Down
35 changes: 18 additions & 17 deletions src/GrammarApplicator_reflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -812,19 +812,23 @@ Cohort* GrammarApplicator::delimitAt(SingleWindow& current, Cohort* cohort) {
addTagToReading(*cReading, begintag);

cCohort->appendReading(cReading);

nwin->appendCohort(cCohort);

uint32_t c = cohort->local_number;
size_t nc = c + 1;
for (; nc < current.cohorts.size(); nc++) {
current.cohorts[nc]->parent = nwin;
nwin->appendCohort(current.cohorts[nc]);
}
c = UI32(current.cohorts.size() - c);
for (nc = 0; nc < c - 1; nc++) {
current.cohorts.pop_back();
auto lc = cohort->local_number;
auto nc = std::find(current.all_cohorts.begin() + lc, current.all_cohorts.end(), cohort);
++nc;
auto from = nc;
for (; nc != current.all_cohorts.end(); ++nc) {
(*nc)->parent = nwin;
if ((*nc)->type & (CT_ENCLOSED | CT_REMOVED | CT_IGNORED)) {
nwin->all_cohorts.push_back(*nc);
}
else {
nwin->appendCohort(*nc);
}
}
current.cohorts.erase(current.cohorts.begin() + lc + 1, current.cohorts.end());
current.all_cohorts.erase(from, current.all_cohorts.end());

cohort = current.cohorts.back();
for (auto reading : cohort->readings) {
Expand All @@ -849,25 +853,22 @@ void GrammarApplicator::reflowTextuals_Reading(Reading& r) {
}

void GrammarApplicator::reflowTextuals_Cohort(Cohort& c) {
for (auto it : c.enclosed) {
reflowTextuals_Cohort(*it);
}
for (auto it : c.removed) {
reflowTextuals_Cohort(*it);
}
for (auto it : c.readings) {
reflowTextuals_Reading(*it);
}
for (auto it : c.deleted) {
reflowTextuals_Reading(*it);
}
for (auto it : c.ignored) {
reflowTextuals_Reading(*it);
}
for (auto it : c.delayed) {
reflowTextuals_Reading(*it);
}
}

void GrammarApplicator::reflowTextuals_SingleWindow(SingleWindow& sw) {
for (auto it : sw.cohorts) {
for (auto it : sw.all_cohorts) {
reflowTextuals_Cohort(*it);
}
}
Expand Down
Loading

0 comments on commit 9eb7e54

Please sign in to comment.