Skip to content

Commit

Permalink
Verify that we don't have pdfminer.six#1059 (and warn about it) (#23)
Browse files Browse the repository at this point in the history
* test: verify that we don not have pdfminer.six#1059 (and warn about it)

* fix: warn rather than throwing TypeError on bogus text objects
  • Loading branch information
dhdaines authored Nov 27, 2024
1 parent 1f6c44e commit 3ef10b1
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 1 deletion.
7 changes: 7 additions & 0 deletions playa/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""

import argparse
import logging
import csv
from pathlib import Path

Expand All @@ -26,12 +27,18 @@ def make_argparse() -> argparse.ArgumentParser:
choices=["screen", "page", "user"],
default="screen",
)
parser.add_argument(
"--debug",
help="Very verbose debugging output",
action="store_true",
)
return parser


def main() -> None:
parser = make_argparse()
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG if args.debug else logging.WARNING)
writer = csv.DictWriter(args.outfile, fieldnames=playa.fieldnames)
writer.writeheader()
for path in args.pdfs:
Expand Down
4 changes: 3 additions & 1 deletion playa/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1640,6 +1640,7 @@ def render_string(
if isinstance(obj, str):
obj = make_compat_bytes(obj)
if not isinstance(obj, bytes):
log.warning("Found non-string %r in text object", obj)
continue
for cid in self.textstate.font.decode(obj):
if needcharspace:
Expand Down Expand Up @@ -2333,7 +2334,8 @@ def do_TJ(self, strings: PDFObject) -> None:
positioning"""
args = list_value(strings)
if not all(isinstance(s, (int, float, bytes)) for s in args):
raise TypeError("TJ takes only strings and numbers, not %r" % args)
log.warning("Found non-string in text object %r", args)
return
self.textobj.append(make_txt("TJ", *args))

def do_Tj(self, s: PDFObject) -> None:
Expand Down
Binary file added samples/contrib/issue-1059-cmap-decode.pdf
Binary file not shown.

0 comments on commit 3ef10b1

Please sign in to comment.