Skip to content

Commit

Permalink
Fixes OCR Highlight 0.7 dislike for empty <w> tags
Browse files Browse the repository at this point in the history
  • Loading branch information
DiegoPino authored Nov 28, 2021
1 parent 0537d88 commit f6cfcf7
Showing 1 changed file with 10 additions and 7 deletions.
17 changes: 10 additions & 7 deletions src/Plugin/StrawberryRunnersPostProcessor/OcrPostProcessor.php
Original file line number Diff line number Diff line change
Expand Up @@ -573,13 +573,16 @@ protected function hOCRtoMiniOCR($output, $pageid) {
$miniocr->text(' ');
}
$notFirstWord = TRUE;
$miniocr->startElement("w");
$miniocr->writeAttribute("x", $l . ' ' . $t . ' ' . $w . ' ' . $h);
$miniocr->text($text);
// Only assume we have at least one word for <w> tags
// Since lines? could end empty?
$atleastone_word = TRUE;
$miniocr->endElement();
// New OCR Highlight does not like empty <w> tags at all
if (strlen(trim($text)) > 0) {
$miniocr->startElement("w");
$miniocr->writeAttribute("x", $l . ' ' . $t . ' ' . $w . ' ' . $h);
$miniocr->text($text);
// Only assume we have at least one word for <w> tags
// Since lines? could end empty?
$atleastone_word = TRUE;
$miniocr->endElement();
}
}
}
$miniocr->endElement();
Expand Down

0 comments on commit f6cfcf7

Please sign in to comment.