diff --git a/src/core/annotation.js b/src/core/annotation.js index 825e9247a9a72..e156c3621d7f0 100644 --- a/src/core/annotation.js +++ b/src/core/annotation.js @@ -1292,6 +1292,7 @@ class Annotation { bbox, matrix ); + this.data.textContent = text; } } @@ -2299,7 +2300,6 @@ class WidgetAnnotation extends Annotation { } assert(typeof value === "string", "Expected `value` to be a string."); - value = value.trim(); if (this.data.combo) { // The value can be one of the exportValue or any other values. @@ -2934,6 +2934,30 @@ class TextWidgetAnnotation extends WidgetAnnotation { return chunks; } + async extractTextContent(evaluator, task, viewBox) { + await super.extractTextContent(evaluator, task, viewBox); + const text = this.data.textContent; + if (!text) { + return; + } + + // The text extractor doesn't handle empty lines correctly, so if the + // content we get is more or less (modulo whitespaces) the same as the + // field value we just ignore it. + const allText = text.join("\n"); + if (allText === this.data.fieldValue) { + return; + } + const regex = allText.replaceAll(/([.*+?^${}()|[\]\\])|(\s+)/g, (_m, p1) => + p1 ? `\\${p1}` : "\\s+" + ); + this.data.textContent = new RegExp(`^\\s*${regex}\\s*$`).test( + this.data.fieldValue + ) + ? this.data.fieldValue.split("\n") + : text; + } + getFieldObject() { return { id: this.data.id, diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index d74a670c356d7..c4ef395eb0990 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -674,3 +674,4 @@ !issue18072.pdf !stamps.pdf !issue15096.pdf +!issue18036.pdf diff --git a/test/pdfs/issue18036.pdf b/test/pdfs/issue18036.pdf new file mode 100755 index 0000000000000..d6e8d8923ac19 Binary files /dev/null and b/test/pdfs/issue18036.pdf differ diff --git a/test/test_manifest.json b/test/test_manifest.json index 044fb9825838b..294903fff2ba9 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -8477,13 +8477,13 @@ "value": "Hello World" }, "33R": { - "value": "Hello World\nDlrow Olleh\nHello World" + "value": "\n\nHello World\nDlrow Olleh\nHello World" }, "36R": { - "value": "Hello World\nDlrow Olleh\nHello World" + "value": "\n\nHello World\nDlrow Olleh\nHello World" }, "39R": { - "value": "Hello World\nDlrow Olleh\nHello World" + "value": "\n\nHello World\nDlrow Olleh\nHello World" } } }, @@ -10684,5 +10684,13 @@ "value": false } } + }, + { + "id": "issue18036", + "file": "pdfs/issue18036.pdf", + "md5": "940ad97fc1a6f8d288a213af80313c7e", + "rounds": 1, + "type": "eq", + "annotations": true } ]