Skip to content

Commit

Permalink
Resolves [BUG] tot_len_ratio being inf when src.size() is zero
Browse files Browse the repository at this point in the history
Resolves ["[BUG] tot_len_ratio being inf when src.size() is zero"](clab#39) by skipping lines with zero length.
  • Loading branch information
Zae Myung Kim committed Jan 10, 2019
1 parent 7c2bbca commit af6b8e0
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/fast_align.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ void InitialPass(const unsigned kNULL, const bool use_null, TTable* s2t,
string line;
bool flag = false;
int lc = 0;
int skipped_lines = 0;
cerr << "INITIAL PASS " << endl;
while (true) {
getline(in, line);
Expand All @@ -259,7 +260,9 @@ void InitialPass(const unsigned kNULL, const bool use_null, TTable* s2t,
if (is_reverse)
swap(src, trg);
if (src.size() == 0 || trg.size() == 0) {
cerr << "Error in line " << lc << "\n" << line << endl;
cerr << "Error in line " << lc << ". Skipped.\n" << line << endl;
skipped_lines++;
continue;
}
*tot_len_ratio += static_cast<double>(trg.size()) / static_cast<double>(src.size());
*n_target_tokens += trg.size();
Expand Down Expand Up @@ -288,10 +291,11 @@ void InitialPass(const unsigned kNULL, const bool use_null, TTable* s2t,
}
AddTranslationOptions(insert_buffer, s2t);

mean_srclen_multiplier = (*tot_len_ratio) / lc;
mean_srclen_multiplier = (*tot_len_ratio) / (lc - skipped_lines);
if (flag) {
cerr << endl;
}
cerr << "number of skipped lines = " << skipped_lines << endl;
cerr << "expected target length = source length * " << mean_srclen_multiplier << endl;
}

Expand Down

0 comments on commit af6b8e0

Please sign in to comment.