diff --git a/CHANGELOG.md b/CHANGELOG.md index 688ffbd7a5..b4de0d4095 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ UNRELEASED ---------- +* [ [#1498](https://github.com/digitalfabrik/integreat-cms/issues/1498) ] Fix PDF errors on specific pages in Arabic & Farsi + 2023.2.2 -------- diff --git a/pyproject.toml b/pyproject.toml index 28a3acae71..b48cc4d05a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ dependencies = [ "rules", "six", "webauthn", - "xhtml2pdf==0.2.8", + "xhtml2pdf", ] [project.optional-dependencies] @@ -157,10 +157,10 @@ pinned = [ "pydantic==1.10.5", "Pygments==2.14.0", "pyHanko==0.17.0", - "pyhanko-certvalidator==0.20.0", + "pyhanko-certvalidator==0.20.1", "pyOpenSSL==23.0.0", "pyotp==2.8.0", - "PyPDF3==1.0.6", + "pypdf==3.4.1", "pypng==0.20220715.0", "pyrsistent==0.19.3", "python-bidi==0.4.2", @@ -180,7 +180,6 @@ pinned = [ "stack-data==0.6.2", "svglib==1.5.1", "tinycss2==1.2.1", - "tqdm==4.64.1", "traitlets==5.9.0", "typing_extensions==4.5.0", "tzdata==2022.7", @@ -190,7 +189,7 @@ pinned = [ "wcwidth==0.2.6", "webauthn==1.7.2", "webencodings==0.5.1", - "xhtml2pdf==0.2.8", + "xhtml2pdf==0.2.9", "yarl==1.8.2", ] diff --git "a/tests/pdf/files/28814d44dd/Integreat - Ukrainisch - \320\233\320\260\321\201\320\272\320\260\320\262\320\276 \320\277\321\200\320\276\321\201\320\270\320\274\320\276 \320\262 \320\220\321\203\320\263\321\201\320\261\321\203\321\200\320\263.pdf" "b/tests/pdf/files/28814d44dd/Integreat - Ukrainisch - \320\233\320\260\321\201\320\272\320\260\320\262\320\276 \320\277\321\200\320\276\321\201\320\270\320\274\320\276 \320\262 \320\220\321\203\320\263\321\201\320\261\321\203\321\200\320\263.pdf" index e453e2c1fb..3a593707d0 100644 Binary files "a/tests/pdf/files/28814d44dd/Integreat - Ukrainisch - \320\233\320\260\321\201\320\272\320\260\320\262\320\276 \320\277\321\200\320\276\321\201\320\270\320\274\320\276 \320\262 \320\220\321\203\320\263\321\201\320\261\321\203\321\200\320\263.pdf" and "b/tests/pdf/files/28814d44dd/Integreat - Ukrainisch - \320\233\320\260\321\201\320\272\320\260\320\262\320\276 \320\277\321\200\320\276\321\201\320\270\320\274\320\276 \320\262 \320\220\321\203\320\263\321\201\320\261\321\203\321\200\320\263.pdf" differ diff --git "a/tests/pdf/files/3b02f5ea5b/Integreat - Arabisch - \331\205\330\271\331\204\331\210\331\205\330\247\330\252 \330\247\331\204\331\210\330\265\331\210\331\204.pdf" "b/tests/pdf/files/3b02f5ea5b/Integreat - Arabisch - \331\205\330\271\331\204\331\210\331\205\330\247\330\252 \330\247\331\204\331\210\330\265\331\210\331\204.pdf" index 02855d6aa8..420cec4428 100644 Binary files "a/tests/pdf/files/3b02f5ea5b/Integreat - Arabisch - \331\205\330\271\331\204\331\210\331\205\330\247\330\252 \330\247\331\204\331\210\330\265\331\210\331\204.pdf" and "b/tests/pdf/files/3b02f5ea5b/Integreat - Arabisch - \331\205\330\271\331\204\331\210\331\205\330\247\330\252 \330\247\331\204\331\210\330\265\331\210\331\204.pdf" differ diff --git "a/tests/pdf/files/52d22a85dc/Integreat - Amharisch - \341\212\245\341\212\225\341\212\263\341\212\225 \341\213\260\341\210\205\341\212\223 \341\210\230\341\214\241.pdf" "b/tests/pdf/files/52d22a85dc/Integreat - Amharisch - \341\212\245\341\212\225\341\212\263\341\212\225 \341\213\260\341\210\205\341\212\223 \341\210\230\341\214\241.pdf" index c70bd12d40..d3cef9ca58 100644 Binary files "a/tests/pdf/files/52d22a85dc/Integreat - Amharisch - \341\212\245\341\212\225\341\212\263\341\212\225 \341\213\260\341\210\205\341\212\223 \341\210\230\341\214\241.pdf" and "b/tests/pdf/files/52d22a85dc/Integreat - Amharisch - \341\212\245\341\212\225\341\212\263\341\212\225 \341\213\260\341\210\205\341\212\223 \341\210\230\341\214\241.pdf" differ diff --git a/tests/pdf/files/6262976c99/Integreat - Deutsch - Willkommen.pdf b/tests/pdf/files/6262976c99/Integreat - Deutsch - Willkommen.pdf index 1f7e0ecadd..8bfd2fbe6c 100644 Binary files a/tests/pdf/files/6262976c99/Integreat - Deutsch - Willkommen.pdf and b/tests/pdf/files/6262976c99/Integreat - Deutsch - Willkommen.pdf differ diff --git a/tests/pdf/files/92ff67bd01/Integreat - Deutsch - Augsburg.pdf b/tests/pdf/files/92ff67bd01/Integreat - Deutsch - Augsburg.pdf index c30c21b444..9c13d4c50e 100644 Binary files a/tests/pdf/files/92ff67bd01/Integreat - Deutsch - Augsburg.pdf and b/tests/pdf/files/92ff67bd01/Integreat - Deutsch - Augsburg.pdf differ diff --git "a/tests/pdf/files/ba6f45d0ab/Integreat - Griechisch - \316\232\316\261\316\273\317\216\317\202 \316\256\316\273\316\270\316\261\317\204\316\265 \317\203\317\204\316\277 Augsburg.pdf" "b/tests/pdf/files/ba6f45d0ab/Integreat - Griechisch - \316\232\316\261\316\273\317\216\317\202 \316\256\316\273\316\270\316\261\317\204\316\265 \317\203\317\204\316\277 Augsburg.pdf" index 45d8b853bb..0c289f9039 100644 Binary files "a/tests/pdf/files/ba6f45d0ab/Integreat - Griechisch - \316\232\316\261\316\273\317\216\317\202 \316\256\316\273\316\270\316\261\317\204\316\265 \317\203\317\204\316\277 Augsburg.pdf" and "b/tests/pdf/files/ba6f45d0ab/Integreat - Griechisch - \316\232\316\261\316\273\317\216\317\202 \316\256\316\273\316\270\316\261\317\204\316\265 \317\203\317\204\316\277 Augsburg.pdf" differ diff --git a/tests/pdf/files/cdff964723/Integreat - German - Willkommen.pdf b/tests/pdf/files/cdff964723/Integreat - German - Willkommen.pdf deleted file mode 100644 index 8f545bd3db..0000000000 Binary files a/tests/pdf/files/cdff964723/Integreat - German - Willkommen.pdf and /dev/null differ diff --git a/tests/pdf/files/e155c5e38b/Integreat - Englisch - Welcome.pdf b/tests/pdf/files/e155c5e38b/Integreat - Englisch - Welcome.pdf index 60d697651b..92b9f3c56a 100644 Binary files a/tests/pdf/files/e155c5e38b/Integreat - Englisch - Welcome.pdf and b/tests/pdf/files/e155c5e38b/Integreat - Englisch - Welcome.pdf differ diff --git a/tests/pdf/test_pdf_export.py b/tests/pdf/test_pdf_export.py index 2f297f00a1..c32f4a99f2 100644 --- a/tests/pdf/test_pdf_export.py +++ b/tests/pdf/test_pdf_export.py @@ -3,7 +3,7 @@ from urllib.parse import urlencode, quote import pytest -import PyPDF3 +import pypdf from django.urls import reverse @@ -107,25 +107,22 @@ def test_pdf_export( print(response.headers) assert response.headers.get("Content-Type") == "application/pdf" # Compare file content - result_pdf = PyPDF3.PdfFileReader( - io.BytesIO(b"".join(response.streaming_content)) - ) + result_pdf = pypdf.PdfReader(io.BytesIO(b"".join(response.streaming_content))) # pylint: disable=consider-using-with - expected_pdf = PyPDF3.PdfFileReader( + expected_pdf = pypdf.PdfReader( open(f"tests/pdf/files/{expected_filename}", "rb") ) # Assert that both documents have same number of pages - assert result_pdf.numPages == expected_pdf.numPages + assert len(result_pdf.pages) == len(expected_pdf.pages) # Assert that the content is identical - for page_number in range(result_pdf.numPages): - result_page = result_pdf.getPage(page_number) - expected_page = expected_pdf.getPage(page_number) - assert result_page.artBox == expected_page.artBox - assert result_page.bleedBox == expected_page.bleedBox - assert result_page.cropBox == expected_page.cropBox - assert result_page.mediaBox == expected_page.mediaBox - assert result_page.extractText() == expected_page.extractText() - assert result_page.getContents() == expected_page.getContents() + for page_number, result_page in enumerate(result_pdf.pages): + expected_page = expected_pdf.pages[page_number] + assert result_page.artbox == expected_page.artbox + assert result_page.bleedbox == expected_page.bleedbox + assert result_page.cropbox == expected_page.cropbox + assert result_page.mediabox == expected_page.mediabox + assert result_page.extract_text() == expected_page.extract_text() + assert result_page.get_contents() == expected_page.get_contents() # pylint: disable=unused-argument,too-many-locals