Skip to content

Commit

Permalink
Keep the empty lines in the text fields
Browse files Browse the repository at this point in the history
It fixes #18036.
  • Loading branch information
calixteman committed Oct 4, 2024
1 parent 6aef332 commit 0fb51fd
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 1 deletion.
17 changes: 16 additions & 1 deletion src/core/annotation.js
Original file line number Diff line number Diff line change
Expand Up @@ -1292,7 +1292,22 @@ class Annotation {
bbox,
matrix
);
this.data.textContent = text;

if (this instanceof TextWidgetAnnotation) {
// The text extractor doesn't handle empty lines correctly, so if the
// content we get is more or less (modulo whitespaces) the same as the
// field value we just ignore it.
const regex = text
.join("\n")
.replaceAll(/([.*+?^${}()|[\]\\])|(\s+)/g, (_m, p1) =>
p1 ? `\\${p1}` : "\\s+"
);
if (!new RegExp(regex).test(this.data.fieldValue)) {
this.data.textContent = text;
}
} else {
this.data.textContent = text;
}
}
}

Expand Down
1 change: 1 addition & 0 deletions test/pdfs/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -674,3 +674,4 @@
!issue18072.pdf
!stamps.pdf
!issue15096.pdf
!issue18036.pdf
Binary file added test/pdfs/issue18036.pdf
Binary file not shown.
8 changes: 8 additions & 0 deletions test/test_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -10684,5 +10684,13 @@
"value": false
}
}
},
{
"id": "issue18036",
"file": "pdfs/issue18036.pdf",
"md5": "940ad97fc1a6f8d288a213af80313c7e",
"rounds": 1,
"type": "eq",
"annotations": true
}
]

0 comments on commit 0fb51fd

Please sign in to comment.