From f2dc37fff795ab91bd639310a00b5e998c328ffb Mon Sep 17 00:00:00 2001 From: "Charles K. Neimog" Date: Sat, 29 Jul 2023 18:25:10 -0300 Subject: [PATCH 1/4] add color when using json --- pdfannots/__init__.py | 14 ++++++++++++-- pdfannots/printer/json.py | 6 ++++++ pdfannots/types.py | 16 +++++++++++++++- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/pdfannots/__init__.py b/pdfannots/__init__.py index da4d6f5..42ff469 100644 --- a/pdfannots/__init__.py +++ b/pdfannots/__init__.py @@ -22,7 +22,7 @@ import pdfminer.settings import pdfminer.utils -from .types import Page, Outline, AnnotationType, Annotation, Document +from .types import Page, Outline, AnnotationType, Annotation, Document, RGB from .utils import cleanup_text, decode_datetime pdfminer.settings.STRICT = False @@ -71,6 +71,16 @@ def _mkannotation( # decode as string, normalise line endings, replace special characters contents = cleanup_text(pdfminer.utils.decode_text(contents)) + color = pa.get('C') + + + if not (isinstance(color, list) and len(color) == 3 and all(isinstance(e, float) for e in color)): + rgb = RGB(*color) + else: + logger.warning("Invalid color %s in annotation on %s", color, page) + rgb = None + + # Rect defines the location of the annotation on the page rect = pdftypes.resolve1(pa.get('Rect')) @@ -94,7 +104,7 @@ def _mkannotation( created = decode_datetime(createds) return Annotation(page, annot_type, quadpoints, rect, - contents, author=author, created=created) + contents, author=author, created=created, color=rgb) def _get_outlines(doc: PDFDocument) -> typ.Iterator[Outline]: diff --git a/pdfannots/printer/json.py b/pdfannots/printer/json.py index 13e128c..3420eb5 100644 --- a/pdfannots/printer/json.py +++ b/pdfannots/printer/json.py @@ -35,6 +35,12 @@ def annot_to_dict( if annot.created: result['created'] = annot.created.strftime('%Y-%m-%dT%H:%M:%S') + if annot.color: + try: + result['color'] = annot.color.ashex() + except: + pass + return result diff --git a/pdfannots/types.py b/pdfannots/types.py index 2f2dc82..3d84757 100644 --- a/pdfannots/types.py +++ b/pdfannots/types.py @@ -297,7 +297,8 @@ def __init__( rect: typ.Optional[BoxCoords] = None, contents: typ.Optional[str] = None, author: typ.Optional[str] = None, - created: typ.Optional[datetime.datetime] = None): + created: typ.Optional[datetime.datetime] = None, + color: typ.Optional[RGB] = None): # RGB color expressed as a 3-tuple of floats in [0, 1] # Construct boxes from quadpoints boxes = [] @@ -324,6 +325,7 @@ def __init__( self.author = author self.created = created self.text = [] + self.color = color self.pre_context = None self.post_context = None self.boxes = boxes @@ -479,3 +481,15 @@ def nearest_outline( return page.outlines[idx - 1] return None + + +class RGB(typ.NamedTuple): + red: float + green: float + blue: float + + def ashex(self) -> str: + red_hex = format(int(self.red * 255), '02x') + green_hex = format(int(self.green * 255), '02x') + blue_hex = format(int(self.blue * 255), '02x') + return f"#{str(red_hex)}{str(green_hex)}{(blue_hex)}" From e3fe7acd5ce4957938afc79656cb4bab1a9dc674 Mon Sep 17 00:00:00 2001 From: "Charles K. Neimog" Date: Sun, 30 Jul 2023 14:35:29 -0300 Subject: [PATCH 2/4] minor fixes --- pdfannots/__init__.py | 13 +++++++------ pdfannots/printer/json.py | 5 +---- pdfannots/types.py | 3 ++- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/pdfannots/__init__.py b/pdfannots/__init__.py index 42ff469..40efa80 100644 --- a/pdfannots/__init__.py +++ b/pdfannots/__init__.py @@ -72,15 +72,16 @@ def _mkannotation( contents = cleanup_text(pdfminer.utils.decode_text(contents)) color = pa.get('C') - - - if not (isinstance(color, list) and len(color) == 3 and all(isinstance(e, float) for e in color)): - rgb = RGB(*color) + if color is not None: + if not (isinstance(color, list) and len(color) == 3 and all(isinstance(e, float) for e in color)): + rgb = RGB(*color) + else: + logger.warning("Invalid color %s in annotation on %s", color, page) + rgb = None else: - logger.warning("Invalid color %s in annotation on %s", color, page) + # Handle the case when 'C' attribute is None (no color information available) rgb = None - # Rect defines the location of the annotation on the page rect = pdftypes.resolve1(pa.get('Rect')) diff --git a/pdfannots/printer/json.py b/pdfannots/printer/json.py index 3420eb5..fc0b2c5 100644 --- a/pdfannots/printer/json.py +++ b/pdfannots/printer/json.py @@ -36,10 +36,7 @@ def annot_to_dict( result['created'] = annot.created.strftime('%Y-%m-%dT%H:%M:%S') if annot.color: - try: - result['color'] = annot.color.ashex() - except: - pass + result['color'] = annot.color.ashex() return result diff --git a/pdfannots/types.py b/pdfannots/types.py index 3d84757..5751ea3 100644 --- a/pdfannots/types.py +++ b/pdfannots/types.py @@ -276,6 +276,7 @@ class Annotation(ObjectWithPos): text Text in the order captured (use gettext() for a cleaner form) author Author of the annotation created Timestamp the annotation was created + color RGB color of the annotation last_charseq Sequence number of the most recent character in text Attributes updated only for StrikeOut annotations: @@ -298,7 +299,7 @@ def __init__( contents: typ.Optional[str] = None, author: typ.Optional[str] = None, created: typ.Optional[datetime.datetime] = None, - color: typ.Optional[RGB] = None): # RGB color expressed as a 3-tuple of floats in [0, 1] + color: typ.Optional[RGB] = None): # Construct boxes from quadpoints boxes = [] From 4319024d9a5e23326962edb723e7d499b4ae7107 Mon Sep 17 00:00:00 2001 From: Andrew Baumann Date: Sun, 30 Jul 2023 13:42:25 +0200 Subject: [PATCH 3/4] fixup a few nits --- pdfannots/__init__.py | 9 ++++----- pdfannots/types.py | 2 +- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pdfannots/__init__.py b/pdfannots/__init__.py index 40efa80..5c0fe29 100644 --- a/pdfannots/__init__.py +++ b/pdfannots/__init__.py @@ -71,16 +71,15 @@ def _mkannotation( # decode as string, normalise line endings, replace special characters contents = cleanup_text(pdfminer.utils.decode_text(contents)) + rgb: typ.Optional[RGB] = None color = pa.get('C') if color is not None: - if not (isinstance(color, list) and len(color) == 3 and all(isinstance(e, float) for e in color)): + if isinstance(color, list) + and len(color) == 3 + and all(isinstance(e, float) and 0 <= e <= 1 for e in color): rgb = RGB(*color) else: logger.warning("Invalid color %s in annotation on %s", color, page) - rgb = None - else: - # Handle the case when 'C' attribute is None (no color information available) - rgb = None # Rect defines the location of the annotation on the page rect = pdftypes.resolve1(pa.get('Rect')) diff --git a/pdfannots/types.py b/pdfannots/types.py index 5751ea3..b957a97 100644 --- a/pdfannots/types.py +++ b/pdfannots/types.py @@ -299,7 +299,7 @@ def __init__( contents: typ.Optional[str] = None, author: typ.Optional[str] = None, created: typ.Optional[datetime.datetime] = None, - color: typ.Optional[RGB] = None): + color: typ.Optional[RGB] = None): # Construct boxes from quadpoints boxes = [] From 386863306e5c3ec4c14dd729e493413868756cba Mon Sep 17 00:00:00 2001 From: Andrew Baumann Date: Sun, 30 Jul 2023 13:44:20 +0200 Subject: [PATCH 4/4] fix the fixup --- pdfannots/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pdfannots/__init__.py b/pdfannots/__init__.py index 5c0fe29..007cf5d 100644 --- a/pdfannots/__init__.py +++ b/pdfannots/__init__.py @@ -74,9 +74,9 @@ def _mkannotation( rgb: typ.Optional[RGB] = None color = pa.get('C') if color is not None: - if isinstance(color, list) + if (isinstance(color, list) and len(color) == 3 - and all(isinstance(e, float) and 0 <= e <= 1 for e in color): + and all(isinstance(e, float) and 0 <= e <= 1 for e in color)): rgb = RGB(*color) else: logger.warning("Invalid color %s in annotation on %s", color, page)