diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 7b8e1d90..53179f82 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -7,7 +7,11 @@ on: jobs: benchmark: - runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Set up Python diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3955b0b1..b30756de 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,26 +6,48 @@ on: branches: [ "main" ] jobs: - test: + check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - with: - submodules: true - name: Set up Python uses: actions/setup-python@v5 - with: - python-version: "3.9" - name: Install Hatch uses: pypa/hatch@install - name: Check types with mypy run: hatch run mypy --install-types --non-interactive playa + test: + strategy: + fail-fast: true + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.9" + - name: Install Hatch + uses: pypa/hatch@install - name: Run tests run: hatch test --cover -py 3.9 - - name: Install ghostscript - run: sudo apt update && sudo apt install ghostscript + thirdparty-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Set up Python + uses: actions/setup-python@v5 + - name: Install Hatch + uses: pypa/hatch@install + - name: Run pdf.js testsuite + run: | + hatch run pytest -k pdf.js - name: Run pdfplumber tests run: | + sudo apt update && sudo apt install ghostscript cd samples/3rdparty/pdfplumber python -m venv venv SETUPTOOLS_SCM_PRETEND_VERSION=0.2.5 venv/bin/pip install -e ../../.. diff --git a/benchmarks/parser.py b/benchmarks/parser.py index 88204832..a8ee5f27 100644 --- a/benchmarks/parser.py +++ b/benchmarks/parser.py @@ -288,19 +288,19 @@ def bench_mmap(): from playa.parser import Lexer - with tempfile.NamedTemporaryFile() as tf: + with tempfile.TemporaryFile(mode="w+b") as tf: runs = 100 - with open(tf.name, "wb") as outfh: - outfh.write(DATA * runs) - with open(tf.name, "rb") as infh: - start = time.time() - mapping = mmap.mmap(infh.fileno(), 0, access=mmap.ACCESS_READ) - parser = Lexer(mapping) - _ = list(parser) - print( - "PLAYA Lexer (mmap): %fms / run" - % ((time.time() - start) / runs * 1000), - ) + tf.write(DATA * runs) + tf.flush() + tf.seek(0, 0) + start = time.time() + mapping = mmap.mmap(tf.fileno(), 0, access=mmap.ACCESS_READ) + parser = Lexer(mapping) + _ = list(parser) + print( + "PLAYA Lexer (mmap): %fms / run" + % ((time.time() - start) / runs * 1000), + ) def bench_playa(): @@ -341,21 +341,21 @@ def bench_pdfminer(): "pdfminer.six Lexer (BytesIO): %fms / run" % ((time.time() - start) / runs * 1000), ) - with tempfile.NamedTemporaryFile() as tf: + with tempfile.TemporaryFile(mode="w+b") as tf: runs = 100 - with open(tf.name, "wb") as outfh: - outfh.write(DATA * runs) - with open(tf.name, "rb") as infh: - parser = PSBaseParser(infh) - while True: - try: - _ = parser.nexttoken() - except PSEOF: - break - print( - "pdfminer.six Lexer (BinaryIO): %fms / run" - % ((time.time() - start) / runs * 1000), - ) + tf.write(DATA * runs) + tf.flush() + tf.seek(0, 0) + parser = PSBaseParser(tf) + while True: + try: + _ = parser.nexttoken() + except PSEOF: + break + print( + "pdfminer.six Lexer (BinaryIO): %fms / run" + % ((time.time() - start) / runs * 1000), + ) runs = 20 start = time.time() for _ in range(runs): diff --git a/tests/data.py b/tests/data.py index 944fc011..1cdc2279 100644 --- a/tests/data.py +++ b/tests/data.py @@ -20,7 +20,7 @@ ALLPDFS.extend(PLUMBERS.glob("*.pdf")) PDFJS = TESTDIR / "3rdparty" / "pdf.js" / "test" try: - with open(PDFJS / "test_manifest.json") as infh: + with open(PDFJS / "test_manifest.json", encoding="utf-8") as infh: manifest = json.load(infh) for entry in manifest: path = PDFJS / entry["file"]