Skip to content

Commit

Permalink
When writing JSON to a utf-8 encoded file, disable ASCII escaping (#85)
Browse files Browse the repository at this point in the history
  • Loading branch information
0xabu authored Aug 23, 2023
1 parent 9496e93 commit c298bf7
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 5 deletions.
4 changes: 3 additions & 1 deletion pdfannots/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ def main() -> None:
mdargs = {k: getattr(args, k) for k in MD_FORMAT_ARGS}
printer = (GroupedMarkdownPrinter if args.group else MarkdownPrinter)(**mdargs)
elif args.format == "json":
printer = JsonPrinter(remove_hyphens=args.remove_hyphens)
printer = JsonPrinter(
remove_hyphens=args.remove_hyphens,
output_codec=args.output.encoding)

def write_if_nonempty(s: str) -> None:
if s:
Expand Down
15 changes: 12 additions & 3 deletions pdfannots/printer/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,19 @@ def annot_to_dict(


class JsonPrinter(Printer):
def __init__(self, *, remove_hyphens: bool) -> None:
self.remove_hyphens = remove_hyphens # Whether to remove hyphens across a line break
def __init__(
self,
*,
remove_hyphens: bool, # Whether to remove hyphens across a line break
output_codec: str # Text codec in use for output
) -> None:
self.remove_hyphens = remove_hyphens
self.seen_first = False

# JSON must be represented as UTF-8, UTF-16, or UTF-32. If the output codec is
# one of these, we can disable ASCII string escaping in the JSON encoder.
self.ensure_ascii = output_codec not in ['utf-8', 'utf-16', 'utf-32']

def end(self) -> str:
return '\n'

Expand All @@ -51,4 +60,4 @@ def print_file(
self.seen_first = True

annots = [annot_to_dict(document, a, self.remove_hyphens) for a in document.iter_annots()]
yield from json.JSONEncoder(indent=2).iterencode(annots)
yield from json.JSONEncoder(indent=2, ensure_ascii=self.ensure_ascii).iterencode(annots)
2 changes: 1 addition & 1 deletion tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def test_multicolorgrouping(self) -> None:

class JsonPrinterTest(PrinterTestBase):
def test_flat(self) -> None:
p = JsonPrinter(remove_hyphens=False)
p = JsonPrinter(remove_hyphens=False, output_codec='utf-8')

j = json.loads(
p.begin()
Expand Down

0 comments on commit c298bf7

Please sign in to comment.