Skip to content

Commit

Permalink
fix: mypy
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Dec 3, 2024
1 parent 2961090 commit ec0a654
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 16 deletions.
22 changes: 14 additions & 8 deletions playa/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,11 +248,15 @@ def _load(self, parser: IndirectObjectParser) -> None:
for pos, line in reverse_iter_lines(parser.buffer):
line = line.strip()
if line == b"trailer":
_, trailer = next(ObjectParser(parser.buffer, doc, pos + len(b"trailer")))
_, trailer = next(
ObjectParser(parser.buffer, doc, pos + len(b"trailer"))
)
if not isinstance(trailer, dict):
break
self.trailer.update(trailer)
log.debug("trailer=%r", self.trailer)
return
log.warning("b'trailer' not found in document")
log.warning("b'trailer' not found in document or invalid")

@property
def objids(self) -> Iterable[int]:
Expand Down Expand Up @@ -1311,16 +1315,16 @@ def _read_xref_from(
log.debug("read_xref_from: start=%d, token=%r", start, token)
if token is KEYWORD_XREF:
parser.nextline()
xref = XRefTable(parser)
xref: XRef = XRefTable(parser)
else:
# It might be an XRefStream, if this is an indirect object...
_, token2 = parser.nexttoken()
_, token3 = parser.nexttoken()
if token3 is KEYWORD_OBJ:
_, token = parser.nexttoken()
_, token = parser.nexttoken()
if token is KEYWORD_OBJ:
# XRefStream: PDF-1.5
self.parser.seek(pos)
self.parser.reset()
xref: XRef = XRefStream(self.parser)
xref = XRefStream(self.parser)
else:
# Well, maybe it's an XRef table without "xref" (but
# probably not)
Expand Down Expand Up @@ -1354,7 +1358,9 @@ def __init__(self, doc: Document):
page_objects = list(doc._get_page_objects())
except (KeyError, IndexError):
page_objects = list(doc._get_pages_from_xrefs())
for page_idx, ((objid, properties), label) in enumerate(zip(page_objects, page_labels)):
for page_idx, ((objid, properties), label) in enumerate(
zip(page_objects, page_labels)
):
page = Page(doc, objid, properties, label, page_idx, doc.space)
self._pages.append(page)
if label is not None:
Expand Down
3 changes: 2 additions & 1 deletion playa/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ def __init__(
else:
log.warning(
"MediaBox missing from Page id %d (and not inherited),"
" defaulting to US Letter (612x792)", pageid
" defaulting to US Letter (612x792)",
pageid,
)
self.mediabox = (0, 0, 612, 792)
self.cropbox = self.mediabox
Expand Down
15 changes: 9 additions & 6 deletions playa/structtree.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
)

from playa.data_structures import NumberTree
from playa.parser import KEYWORD_NULL, PSLiteral
from playa.parser import KEYWORD_NULL, PSLiteral, PDFObject
from playa.pdftypes import ObjRef, resolve1, dict_value, list_value
from playa.utils import decode_text

Expand Down Expand Up @@ -190,20 +190,21 @@ def __init__(
return
parent_id = page.attrs["StructParents"]
parent_array = list_value(parent_tree[parent_id])
assert isinstance(parent_array, list) # srsly
self._parse_parent_tree(parent_array)
else:
# ...EXCEPT that the ParentTree is sometimes missing, in which
# case we fall back to the non-approved way.
self._parse_struct_tree()

def _make_attributes(
self, obj: Dict[str, Any], revision: Union[int, None]
self, attrs: Dict[str, Any], revision: Union[int, None]
) -> Dict[str, Any]:
attr_obj_list = []
attr_obj_list: List[PDFObject] = []
for key in "C", "A":
if key not in obj:
if key not in attrs:
continue
attr_obj = resolve1(obj[key])
attr_obj = resolve1(attrs[key])
# It could be a list of attribute objects (why?)
if isinstance(attr_obj, list):
attr_obj_list.extend(resolve1(val) for val in attr_obj)
Expand All @@ -221,10 +222,12 @@ def _make_attributes(
if aobj == revision and prev_obj is not None:
attr_objs.append(prev_obj)
prev_obj = None
else:
elif isinstance(aobj, dict):
if prev_obj is not None:
attr_objs.append(prev_obj)
prev_obj = aobj
else:
logger.warning("Structure attribute of unknown type: %r", aobj)
if prev_obj is not None:
attr_objs.append(prev_obj)
# Now merge all the attribute objects in the collected to a
Expand Down
2 changes: 1 addition & 1 deletion tests/test_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
except ImportError:
pdfminer = None # type: ignore
import playa
from playa.exceptions import PDFEncryptionError, PDFSyntaxError
from playa.exceptions import PDFEncryptionError
from .data import TESTDIR, BASEPDFS, PASSWORDS, XFAILS, CONTRIB

# We know pdfminer.six gives different output for these and we don't
Expand Down

0 comments on commit ec0a654

Please sign in to comment.