From 7203f7f660ba4725ed7535c5d4b24d54143776c0 Mon Sep 17 00:00:00 2001 From: Andrew Baumann Date: Thu, 10 Aug 2023 18:13:11 +0200 Subject: [PATCH 1/2] Add --no-page-labels switch to ignore page labels in markdown output Fixes issue #65 --- pdfannots/cli.py | 3 +++ pdfannots/printer/markdown.py | 9 ++++++--- pdfannots/types.py | 5 ++++- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pdfannots/cli.py b/pdfannots/cli.py index 57ba206..05f8a24 100644 --- a/pdfannots/cli.py +++ b/pdfannots/cli.py @@ -16,6 +16,7 @@ 'group_highlights_by_color', 'print_filename', 'sections', + 'use_page_labels', 'wrap_column', ] """Names of arguments passed to the markdown printer.""" @@ -74,6 +75,8 @@ def parse_args() -> typ.Tuple[argparse.Namespace, LAParams]: ', '.join(GroupedMarkdownPrinter.ALL_SECTIONS))) g.add_argument("--no-condense", dest="condense", default=True, action="store_false", help="Emit annotations as a blockquote regardless of length.") + g.add_argument("--no-page-labels", dest="use_page_labels", default=True, action="store_false", + help="Ignore page labels if present, just print 1-based page numbers.") g.add_argument("--print-filename", dest="print_filename", default=False, action="store_true", help="Print the name of each file with annotations.") g.add_argument("-w", "--wrap", dest="wrap_column", metavar="COLS", type=int, diff --git a/pdfannots/printer/markdown.py b/pdfannots/printer/markdown.py index b3d0410..594f6fd 100644 --- a/pdfannots/printer/markdown.py +++ b/pdfannots/printer/markdown.py @@ -89,12 +89,14 @@ def __init__( print_filename: bool = False, # Whether to print file names group_highlights_by_color: bool = False, # Whether to group highlights by color remove_hyphens: bool = True, # Whether to remove hyphens across a line break + use_page_labels: bool = True, # Whether to use page labels wrap_column: typ.Optional[int] = None, # Column at which output is word-wrapped **kwargs: typ.Any # Other args, ignored ) -> None: self.print_filename = print_filename self.group_highlights_by_color = group_highlights_by_color self.remove_hyphens = remove_hyphens + self.use_page_labels = use_page_labels self.wrap_column = wrap_column self.condense = condense @@ -139,10 +141,11 @@ def print_file( @staticmethod def format_pos( pos: Pos, - document: Document + document: Document, + use_page_label: bool ) -> str: - result = str(pos.page).title() + result = pos.page.pretty_name(use_label=use_page_label).title() o = document.nearest_outline(pos) if o: @@ -229,7 +232,7 @@ def format_annot( # compute the formatted position (and extra bit if needed) as a label assert annot.pos is not None - label = self.format_pos(annot.pos, document) + \ + label = self.format_pos(annot.pos, document, self.use_page_labels) + \ (" " + extra if extra else "") + ":" # If we have short (few words) text with a short or no comment, and the diff --git a/pdfannots/types.py b/pdfannots/types.py index 6a1c59a..d4f5859 100644 --- a/pdfannots/types.py +++ b/pdfannots/types.py @@ -131,7 +131,10 @@ def __repr__(self) -> str: return '' % self.pageno # zero-based page index def __str__(self) -> str: - if self.label: + return self.pretty_name() + + def pretty_name(self, use_label=True) -> str: + if self.label and use_label: return 'page %s' % self.label else: # + 1 for 1-based page numbers in normal program output (error messages, etc.) From d8cec715b1a5bc728955dd634e50b458911967eb Mon Sep 17 00:00:00 2001 From: Andrew Baumann Date: Thu, 10 Aug 2023 20:17:26 +0200 Subject: [PATCH 2/2] fix type, rename fn --- pdfannots/printer/markdown.py | 2 +- pdfannots/types.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pdfannots/printer/markdown.py b/pdfannots/printer/markdown.py index 594f6fd..0b30c28 100644 --- a/pdfannots/printer/markdown.py +++ b/pdfannots/printer/markdown.py @@ -145,7 +145,7 @@ def format_pos( use_page_label: bool ) -> str: - result = pos.page.pretty_name(use_label=use_page_label).title() + result = pos.page.format_name(use_label=use_page_label).title() o = document.nearest_outline(pos) if o: diff --git a/pdfannots/types.py b/pdfannots/types.py index d4f5859..f2d5165 100644 --- a/pdfannots/types.py +++ b/pdfannots/types.py @@ -131,9 +131,9 @@ def __repr__(self) -> str: return '' % self.pageno # zero-based page index def __str__(self) -> str: - return self.pretty_name() + return self.format_name() - def pretty_name(self, use_label=True) -> str: + def format_name(self, use_label: bool = True) -> str: if self.label and use_label: return 'page %s' % self.label else: