diff --git a/src/core/annotation.js b/src/core/annotation.js index 825e9247a9a72..454003cab8726 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -1292,7 +1292,26 @@ class Annotation { bbox, matrix ); - this.data.textContent = text; + + if (this instanceof TextWidgetAnnotation) { + // The text extractor doesn't handle empty lines correctly, so if the + // content we get is more or less (modulo whitespaces) the same as the + // field value we just ignore it. + const allText = text.join("\n"); + if (allText === this.data.fieldValue) { + this.data.textContent = text; + } else { + const regex = allText.replaceAll( + /([.*+?^${}()|[\]\\])|(\s+)/g, + (_m, p1) => (p1 ? `\\${p1}` : "\\s+") + ); + this.data.textContent = new RegExp(regex).test(this.data.fieldValue) + ? this.data.fieldValue.split("\n") + : text; + } + } else { + this.data.textContent = text; + } } } diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index d74a670c356d7..c4ef395eb0990 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -674,3 +674,4 @@ !issue18072.pdf !stamps.pdf !issue15096.pdf +!issue18036.pdf diff --git a/test/pdfs/issue18036.pdf b/test/pdfs/issue18036.pdf new file mode 100755 index 0000000000000..d6e8d8923ac19 Binary files /dev/null and b/test/pdfs/issue18036.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 044fb9825838b..a1f6b90a127df 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -10684,5 +10684,13 @@ "value": false } } + }, + { + "id": "issue18036", + "file": "pdfs/issue18036.pdf", + "md5": "940ad97fc1a6f8d288a213af80313c7e", + "rounds": 1, + "type": "eq", + "annotations": true } ]