diff --git a/pdfannots/__init__.py b/pdfannots/__init__.py index da4d6f5..42ff469 100644 --- a/pdfannots/__init__.py +++ b/pdfannots/__init__.py @@ -22,7 +22,7 @@ import pdfminer.settings import pdfminer.utils -from .types import Page, Outline, AnnotationType, Annotation, Document +from .types import Page, Outline, AnnotationType, Annotation, Document, RGB from .utils import cleanup_text, decode_datetime pdfminer.settings.STRICT = False @@ -71,6 +71,16 @@ def _mkannotation( # decode as string, normalise line endings, replace special characters contents = cleanup_text(pdfminer.utils.decode_text(contents)) + color = pa.get('C') + + + if not (isinstance(color, list) and len(color) == 3 and all(isinstance(e, float) for e in color)): + rgb = RGB(*color) + else: + logger.warning("Invalid color %s in annotation on %s", color, page) + rgb = None + + # Rect defines the location of the annotation on the page rect = pdftypes.resolve1(pa.get('Rect')) @@ -94,7 +104,7 @@ def _mkannotation( created = decode_datetime(createds) return Annotation(page, annot_type, quadpoints, rect, - contents, author=author, created=created) + contents, author=author, created=created, color=rgb) def _get_outlines(doc: PDFDocument) -> typ.Iterator[Outline]: diff --git a/pdfannots/printer/json.py b/pdfannots/printer/json.py index 13e128c..3420eb5 100644 --- a/pdfannots/printer/json.py +++ b/pdfannots/printer/json.py @@ -35,6 +35,12 @@ def annot_to_dict( if annot.created: result['created'] = annot.created.strftime('%Y-%m-%dT%H:%M:%S') + if annot.color: + try: + result['color'] = annot.color.ashex() + except: + pass + return result diff --git a/pdfannots/types.py b/pdfannots/types.py index 2f2dc82..3d84757 100644 --- a/pdfannots/types.py +++ b/pdfannots/types.py @@ -297,7 +297,8 @@ def __init__( rect: typ.Optional[BoxCoords] = None, contents: typ.Optional[str] = None, author: typ.Optional[str] = None, - created: typ.Optional[datetime.datetime] = None): + created: typ.Optional[datetime.datetime] = None, + color: typ.Optional[RGB] = None): # RGB color expressed as a 3-tuple of floats in [0, 1] # Construct boxes from quadpoints boxes = [] @@ -324,6 +325,7 @@ def __init__( self.author = author self.created = created self.text = [] + self.color = color self.pre_context = None self.post_context = None self.boxes = boxes @@ -479,3 +481,15 @@ def nearest_outline( return page.outlines[idx - 1] return None + + +class RGB(typ.NamedTuple): + red: float + green: float + blue: float + + def ashex(self) -> str: + red_hex = format(int(self.red * 255), '02x') + green_hex = format(int(self.green * 255), '02x') + blue_hex = format(int(self.blue * 255), '02x') + return f"#{str(red_hex)}{str(green_hex)}{(blue_hex)}"