Skip to content

Commit

Permalink
Keep the empty lines in the text fields
Browse files Browse the repository at this point in the history
It fixes #18036.
  • Loading branch information
calixteman committed Oct 4, 2024
1 parent 6aef332 commit 8a37b34
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 1 deletion.
21 changes: 20 additions & 1 deletion src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -1292,7 +1292,26 @@ class Annotation {
bbox,
matrix
);
this.data.textContent = text;

if (this instanceof TextWidgetAnnotation) {
// The text extractor doesn't handle empty lines correctly, so if the
// content we get is more or less (modulo whitespaces) the same as the
// field value we just ignore it.
const allText = text.join("\n");
if (allText === this.data.fieldValue) {
this.data.textContent = text;
} else {
const regex = allText.replaceAll(
/([.*+?^${}()|[\]\\])|(\s+)/g,
(_m, p1) => (p1 ? `\\${p1}` : "\\s+")
);
this.data.textContent = new RegExp(regex).test(this.data.fieldValue)
? this.data.fieldValue.split("\n")
: text;
}
} else {
this.data.textContent = text;
}
}
}

Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -674,3 +674,4 @@
!issue18072.pdf
!stamps.pdf
!issue15096.pdf
!issue18036.pdf
Binary file added test/pdfs/issue18036.pdf
Binary file not shown.
8 changes: 8 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -10684,5 +10684,13 @@
"value": false
}
}
},
{
"id": "issue18036",
"file": "pdfs/issue18036.pdf",
"md5": "940ad97fc1a6f8d288a213af80313c7e",
"rounds": 1,
"type": "eq",
"annotations": true
}
]

0 comments on commit 8a37b34

Please sign in to comment.