Skip to content

Commit

Permalink
refactor!: nexttoken redundant for lexer
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Oct 28, 2024
1 parent f5ab4bb commit 1a12046
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 7 deletions.
5 changes: 5 additions & 0 deletions playa/pdfdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,7 @@ def __init__(
if self.catalog.get("Type") is not LITERAL_CATALOG:
if settings.STRICT:
raise PDFSyntaxError("Catalog not found!")
self.parser.seek(0)

def _initialize_password(self, password: str = "") -> None:
"""Initialize the decryption handler with a given password, if any.
Expand Down Expand Up @@ -823,6 +824,10 @@ def _initialize_password(self, password: str = "") -> None:
assert self.parser is not None
self.parser.fallback = False # need to read streams with exact length

def __iter__(self) -> Iterator[Tuple[int, object]]:
"""Iterate over positions and top-level PDF objects in the file."""
return self.parser

def _getobj_objstm(self, stream: PDFStream, index: int, objid: int) -> object:
if stream.objid in self._parsed_objs:
(objs, n) = self._parsed_objs[stream.objid]
Expand Down
1 change: 1 addition & 0 deletions playa/pdfparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def do_keyword(self, pos: int, token: PSKeyword) -> None:
self.add_results(*self.pop(1))

elif token is KEYWORD_ENDOBJ:
# objid genno "obj" ... and the object itself
self.add_results(*self.pop(4))

elif token is KEYWORD_NULL:
Expand Down
8 changes: 2 additions & 6 deletions playa/psparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,10 +285,6 @@ def __iter__(self) -> Iterator[Tuple[int, PSBaseParserToken]]:
"""Iterate over tokens."""
return self

def nexttoken(self) -> Tuple[int, PSBaseParserToken]:
"""Get the next token in iteration, raising StopIteration when done."""
return self.__next__()

def __next__(self) -> Tuple[int, PSBaseParserToken]:
"""Get the next token in iteration, raising StopIteration when
done."""
Expand Down Expand Up @@ -455,7 +451,7 @@ def flush(self) -> None:
return

def __next__(self) -> PSStackEntry[ExtraT]:
"""Return the next object, returning StopIteration at EOF.
"""Return the next object, raising StopIteration at EOF.
Arrays and dictionaries are represented as Python lists and
dictionaries.
Expand Down Expand Up @@ -575,4 +571,4 @@ def get_inline_data(self, target: bytes = b"EI") -> Tuple[int, bytes]:
def nexttoken(self) -> Tuple[int, PSBaseParserToken]:
"""Get the next token in iteration, raising StopIteration when
done."""
return self._lexer.__next__()
return next(self._lexer)
6 changes: 6 additions & 0 deletions tests/test_pdfdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ def test_read_header():
assert read_header(BytesIO(b"%PDF-1.7")) == "1.7"


def test_objects():
with playa.open(TESTDIR / "simple1.pdf") as doc:
for obj in doc:
print(obj)


def test_page_labels():
with playa.open(TESTDIR / "contrib" / "pagelabels.pdf") as doc:
labels = [label for _, label in zip(range(10), doc.page_labels)]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_psparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ def inline_parsers(
bp = Lexer(data)
assert bp.get_inline_data(target=target, blocksize=blocksize) == expected
if nexttoken is not None:
assert bp.nexttoken() == nexttoken
assert next(bp) == nexttoken


def test_get_inline_data() -> None:
Expand Down

0 comments on commit 1a12046

Please sign in to comment.