From 35511109cef378c644105fbea8a4b1088c2c41ed Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Fri, 27 Dec 2024 09:35:15 -0500 Subject: [PATCH 1/6] ci: test on windows and mac --- .github/workflows/tests.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3955b0b1..13e7e225 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -7,7 +7,11 @@ on: jobs: test: - runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 with: From 9c31a4a05cd2b029d6465455480aa6a952aaa61d Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Fri, 27 Dec 2024 09:40:56 -0500 Subject: [PATCH 2/6] fix(ci): only run basic tests on non-Linux --- .github/workflows/tests.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 13e7e225..29f95075 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -23,13 +23,14 @@ jobs: - name: Install Hatch uses: pypa/hatch@install - name: Check types with mypy + if: ${{ matrix.os }} == "ubuntu-latest" run: hatch run mypy --install-types --non-interactive playa - name: Run tests run: hatch test --cover -py 3.9 - - name: Install ghostscript - run: sudo apt update && sudo apt install ghostscript - name: Run pdfplumber tests + if: ${{ matrix.os }} == "ubuntu-latest" run: | + sudo apt update && sudo apt install ghostscript cd samples/3rdparty/pdfplumber python -m venv venv SETUPTOOLS_SCM_PRETEND_VERSION=0.2.5 venv/bin/pip install -e ../../.. From 1d4760be63556bda1acb74a0d04e4c4d2f86c20c Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Fri, 27 Dec 2024 09:41:38 -0500 Subject: [PATCH 3/6] fix(ci): oups, do not assume utf-8 everywhere --- tests/data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data.py b/tests/data.py index 944fc011..1cdc2279 100644 --- a/tests/data.py +++ b/tests/data.py @@ -20,7 +20,7 @@ ALLPDFS.extend(PLUMBERS.glob("*.pdf")) PDFJS = TESTDIR / "3rdparty" / "pdf.js" / "test" try: - with open(PDFJS / "test_manifest.json") as infh: + with open(PDFJS / "test_manifest.json", encoding="utf-8") as infh: manifest = json.load(infh) for entry in manifest: path = PDFJS / entry["file"] From 3118f3169421f6f9def90cc510652f99f367bf26 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Fri, 27 Dec 2024 09:52:12 -0500 Subject: [PATCH 4/6] fix(ci): split up tests --- .github/workflows/tests.yml | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 29f95075..dba58da9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,6 +6,16 @@ on: branches: [ "main" ] jobs: + check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + - name: Check types with mypy + run: | + pip install mypy + mypy --install-types --non-interactive playa test: strategy: fail-fast: true @@ -14,21 +24,28 @@ jobs: runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - with: - submodules: true - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.9" - name: Install Hatch uses: pypa/hatch@install - - name: Check types with mypy - if: ${{ matrix.os }} == "ubuntu-latest" - run: hatch run mypy --install-types --non-interactive playa - name: Run tests run: hatch test --cover -py 3.9 + thirdparty-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + submodules: true + - name: Set up Python + uses: actions/setup-python@v5 + - name: Install Hatch + uses: pypa/hatch@install + - name: Run pdf.js testsuite + run: | + hatch run pytest -k pdf.js - name: Run pdfplumber tests - if: ${{ matrix.os }} == "ubuntu-latest" run: | sudo apt update && sudo apt install ghostscript cd samples/3rdparty/pdfplumber From 7a6c430c110175dbca5d7180c522e586488afbb6 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Fri, 27 Dec 2024 09:55:33 -0500 Subject: [PATCH 5/6] fix(ci): fix CI again :) --- .github/workflows/benchmarks.yml | 6 +++++- .github/workflows/tests.yml | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 7b8e1d90..53179f82 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -7,7 +7,11 @@ on: jobs: benchmark: - runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 - name: Set up Python diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index dba58da9..b30756de 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,10 +12,10 @@ jobs: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 + - name: Install Hatch + uses: pypa/hatch@install - name: Check types with mypy - run: | - pip install mypy - mypy --install-types --non-interactive playa + run: hatch run mypy --install-types --non-interactive playa test: strategy: fail-fast: true From 32cbda0e129b9d8e73c28a782538bc196abdd55e Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Fri, 27 Dec 2024 10:09:22 -0500 Subject: [PATCH 6/6] fix(ci): do not try to multiply open on windows --- benchmarks/parser.py | 52 ++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/benchmarks/parser.py b/benchmarks/parser.py index 88204832..a8ee5f27 100644 --- a/benchmarks/parser.py +++ b/benchmarks/parser.py @@ -288,19 +288,19 @@ def bench_mmap(): from playa.parser import Lexer - with tempfile.NamedTemporaryFile() as tf: + with tempfile.TemporaryFile(mode="w+b") as tf: runs = 100 - with open(tf.name, "wb") as outfh: - outfh.write(DATA * runs) - with open(tf.name, "rb") as infh: - start = time.time() - mapping = mmap.mmap(infh.fileno(), 0, access=mmap.ACCESS_READ) - parser = Lexer(mapping) - _ = list(parser) - print( - "PLAYA Lexer (mmap): %fms / run" - % ((time.time() - start) / runs * 1000), - ) + tf.write(DATA * runs) + tf.flush() + tf.seek(0, 0) + start = time.time() + mapping = mmap.mmap(tf.fileno(), 0, access=mmap.ACCESS_READ) + parser = Lexer(mapping) + _ = list(parser) + print( + "PLAYA Lexer (mmap): %fms / run" + % ((time.time() - start) / runs * 1000), + ) def bench_playa(): @@ -341,21 +341,21 @@ def bench_pdfminer(): "pdfminer.six Lexer (BytesIO): %fms / run" % ((time.time() - start) / runs * 1000), ) - with tempfile.NamedTemporaryFile() as tf: + with tempfile.TemporaryFile(mode="w+b") as tf: runs = 100 - with open(tf.name, "wb") as outfh: - outfh.write(DATA * runs) - with open(tf.name, "rb") as infh: - parser = PSBaseParser(infh) - while True: - try: - _ = parser.nexttoken() - except PSEOF: - break - print( - "pdfminer.six Lexer (BinaryIO): %fms / run" - % ((time.time() - start) / runs * 1000), - ) + tf.write(DATA * runs) + tf.flush() + tf.seek(0, 0) + parser = PSBaseParser(tf) + while True: + try: + _ = parser.nexttoken() + except PSEOF: + break + print( + "pdfminer.six Lexer (BinaryIO): %fms / run" + % ((time.time() - start) / runs * 1000), + ) runs = 20 start = time.time() for _ in range(runs):