From 93de7063fc9795382ba6416d31eabe0fbca3f670 Mon Sep 17 00:00:00 2001 From: felix Date: Thu, 9 May 2024 14:48:13 +0200 Subject: [PATCH] update --- .conda/meta.yml | 50 ++ .github/workflows/builds.yml | 25 +- .github/workflows/doc-status.yml | 22 - .github/workflows/docker.yml | 36 - .github/workflows/docs.yml | 51 -- .github/workflows/main.yml | 73 +- .github/workflows/public_docker_images.yml | 86 --- .github/workflows/pull_requests.yml | 32 - README.md | 29 +- onnxtr/models/detection/_utils/__init__.py | 1 - onnxtr/models/detection/_utils/base.py | 41 -- .../models/differentiable_binarization.py | 4 +- onnxtr/models/detection/models/fast.py | 2 +- onnxtr/models/detection/models/linknet.py | 2 +- onnxtr/models/engine.py | 9 +- onnxtr/models/preprocessor/base.py | 5 +- onnxtr/transforms/base.py | 6 +- onnxtr/utils/geometry.py | 56 ++ pyproject.toml | 28 +- tests/common/test_models_classification.py | 87 +++ tests/common/test_models_detection.py | 68 ++ .../test_models_preprocessor.py} | 14 +- tests/common/test_models_recognition.py | 104 +++ .../test_models_recognition_predictor.py | 39 -- tests/common/test_models_zoo.py | 167 +++++ tests/common/test_transforms.py | 67 +- tests/conftest.py | 5 +- tests/pytorch/test_datasets_pt.py | 623 ------------------ tests/pytorch/test_file_utils_pt.py | 5 - tests/pytorch/test_io_image_pt.py | 52 -- .../pytorch/test_models_classification_pt.py | 194 ------ tests/pytorch/test_models_detection_pt.py | 187 ------ tests/pytorch/test_models_factory.py | 69 -- tests/pytorch/test_models_preprocessor_pt.py | 46 -- tests/pytorch/test_models_recognition_pt.py | 155 ----- tests/pytorch/test_models_utils_pt.py | 65 -- tests/pytorch/test_models_zoo_pt.py | 327 --------- tests/pytorch/test_transforms_pt.py | 351 ---------- tests/tensorflow/test_datasets_loader_tf.py | 75 --- tests/tensorflow/test_datasets_tf.py | 605 ----------------- tests/tensorflow/test_file_utils_tf.py | 5 - tests/tensorflow/test_io_image_tf.py | 52 -- .../test_models_classification_tf.py | 227 ------- tests/tensorflow/test_models_detection_tf.py | 270 -------- tests/tensorflow/test_models_factory.py | 70 -- .../tensorflow/test_models_recognition_tf.py | 233 ------- tests/tensorflow/test_models_utils_tf.py | 60 -- tests/tensorflow/test_models_zoo_tf.py | 325 --------- tests/tensorflow/test_transforms_tf.py | 492 -------------- 49 files changed, 653 insertions(+), 4944 deletions(-) create mode 100644 .conda/meta.yml delete mode 100644 .github/workflows/doc-status.yml delete mode 100644 .github/workflows/docker.yml delete mode 100644 .github/workflows/docs.yml delete mode 100644 .github/workflows/public_docker_images.yml delete mode 100644 .github/workflows/pull_requests.yml delete mode 100644 onnxtr/models/detection/_utils/__init__.py delete mode 100644 onnxtr/models/detection/_utils/base.py create mode 100644 tests/common/test_models_classification.py rename tests/{tensorflow/test_models_preprocessor_tf.py => common/test_models_preprocessor.py} (68%) create mode 100644 tests/common/test_models_recognition.py delete mode 100644 tests/common/test_models_recognition_predictor.py create mode 100644 tests/common/test_models_zoo.py delete mode 100644 tests/pytorch/test_datasets_pt.py delete mode 100644 tests/pytorch/test_file_utils_pt.py delete mode 100644 tests/pytorch/test_io_image_pt.py delete mode 100644 tests/pytorch/test_models_classification_pt.py delete mode 100644 tests/pytorch/test_models_detection_pt.py delete mode 100644 tests/pytorch/test_models_factory.py delete mode 100644 tests/pytorch/test_models_preprocessor_pt.py delete mode 100644 tests/pytorch/test_models_recognition_pt.py delete mode 100644 tests/pytorch/test_models_utils_pt.py delete mode 100644 tests/pytorch/test_models_zoo_pt.py delete mode 100644 tests/pytorch/test_transforms_pt.py delete mode 100644 tests/tensorflow/test_datasets_loader_tf.py delete mode 100644 tests/tensorflow/test_datasets_tf.py delete mode 100644 tests/tensorflow/test_file_utils_tf.py delete mode 100644 tests/tensorflow/test_io_image_tf.py delete mode 100644 tests/tensorflow/test_models_classification_tf.py delete mode 100644 tests/tensorflow/test_models_detection_tf.py delete mode 100644 tests/tensorflow/test_models_factory.py delete mode 100644 tests/tensorflow/test_models_recognition_tf.py delete mode 100644 tests/tensorflow/test_models_utils_tf.py delete mode 100644 tests/tensorflow/test_models_zoo_tf.py delete mode 100644 tests/tensorflow/test_transforms_tf.py diff --git a/.conda/meta.yml b/.conda/meta.yml new file mode 100644 index 0000000..b148aaa --- /dev/null +++ b/.conda/meta.yml @@ -0,0 +1,50 @@ +{% set pyproject = load_file_data('../pyproject.toml', from_recipe_dir=True) %} +{% set project = pyproject.get('project') %} +{% set urls = pyproject.get('project', {}).get('urls') %} +{% set version = environ.get('BUILD_VERSION', '0.1.0a0') %} + +package: + name: {{ project.get('name') }} + version: {{ version }} + +source: + fn: {{ project.get('name') }}-{{ version }}.tar.gz + url: ../dist/{{ project.get('name') }}-{{ version }}.tar.gz + +build: + script: python setup.py install --single-version-externally-managed --record=record.txt + +requirements: + host: + - python>=3.9, <3.12 + - setuptools + + run: + - numpy >=1.16.0, <2.0.0 + - onnxruntime >=1.17.0, <2.0.0 + - scipy >=1.4.0, <2.0.0 + - pillow >=9.2.0 + - opencv >=4.5.0, <5.0.0 + - pypdfium2-team::pypdfium2_helpers >=4.0.0, <5.0.0 + - pyclipper >=1.2.0, <2.0.0 + - shapely >=1.6.0, <3.0.0 + - langdetect >=1.0.9, <2.0.0 + - rapidfuzz >=3.0.0, <4.0.0 + - defusedxml >=0.7.0 + - anyascii >=0.3.2 + - tqdm >=4.30.0 + +test: + requires: + - pip + + imports: + - onnxtr + +about: + home: {{ urls.get('repository') }} + license: Apache-2.0 + license_file: {{ project.get('license', {}).get('file') }} + summary: {{ project.get('description') | replace(":", " -")}} + doc_url: {{ urls.get('documentation') }} + dev_url: {{ urls.get('repository') }} diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index 6897e62..9a8e507 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -12,41 +12,24 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, macos-latest, windows-latest] python: ["3.9", "3.10"] - framework: [tensorflow, pytorch] steps: - uses: actions/checkout@v4 - - if: matrix.os == 'macos-latest' - name: Install MacOS prerequisites - run: brew install cairo pango gdk-pixbuf libffi - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} architecture: x64 - - if: matrix.framework == 'tensorflow' - name: Cache python modules (TF) + - name: Cache python modules uses: actions/cache@v4 with: path: ~/.cache/pip key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - - if: matrix.framework == 'pytorch' - name: Cache python modules (PT) - uses: actions/cache@v4 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - - if: matrix.framework == 'tensorflow' - name: Install package (TF) - run: | - python -m pip install --upgrade pip - pip install -e .[tf,viz,html] --upgrade - - if: matrix.framework == 'pytorch' - name: Install package (PT) + - name: Install package run: | python -m pip install --upgrade pip - pip install -e .[torch,viz,html] --upgrade + pip install -e .[viz] --upgrade - name: Import package run: python -c "import onnxtr; print(onnxtr.__version__)" diff --git a/.github/workflows/doc-status.yml b/.github/workflows/doc-status.yml deleted file mode 100644 index 294f3dc..0000000 --- a/.github/workflows/doc-status.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: doc-status -on: - page_build - -jobs: - see-page-build-payload: - runs-on: ubuntu-latest - steps: - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.9" - architecture: x64 - - name: check status - run: | - import os - status, errormsg = os.getenv('STATUS'), os.getenv('ERROR') - if status != 'built': raise AssertionError(f"There was an error building the page on GitHub pages.\n\nStatus: {status}\n\nError messsage: {errormsg}") - shell: python - env: - STATUS: ${{ github.event.build.status }} - ERROR: ${{ github.event.build.error.message }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml deleted file mode 100644 index 70302c9..0000000 --- a/.github/workflows/docker.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: docker - -on: - push: - branches: main - pull_request: - branches: main - -jobs: - docker-package: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Build docker image - run: docker build -t doctr-tf-py3.9-slim --build-arg SYSTEM=cpu . - - name: Run docker container - run: docker run doctr-tf-py3.9-slim python3 -c 'import doctr' - - pytest-api: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.9"] - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Build & run docker - run: cd api && make lock && make run - - name: Ping server - run: wget --spider --tries=12 http://localhost:8080/docs - - name: Run docker test - run: cd api && make test diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index 79965b5..0000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: docs -on: - push: - branches: main - -jobs: - docs-deploy: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.9"] - steps: - - uses: actions/checkout@v4 - with: - persist-credentials: false - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v4 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-docs - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[tf,viz,html] - pip install -e .[docs] - - - name: Build documentation - run: cd docs && bash build.sh - - - name: Documentation sanity check - run: test -e docs/build/index.html || exit - - - name: Install SSH Client 🔑 - uses: webfactory/ssh-agent@v0.4.1 - with: - ssh-private-key: ${{ secrets.SSH_DEPLOY_KEY }} - - - name: Deploy to Github Pages - uses: JamesIves/github-pages-deploy-action@3.7.1 - with: - BRANCH: gh-pages - FOLDER: 'docs/build' - COMMIT_MESSAGE: '[skip ci] Documentation updates' - CLEAN: true - SSH: true diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ac34c9c..267218e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -28,8 +28,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -e .[tf,viz,html] --upgrade - pip install -e .[testing] + pip install -e .[viz,html,testing] --upgrade - name: Run unittests run: | coverage run -m pytest tests/common/ -rs @@ -40,77 +39,9 @@ jobs: path: ./coverage-common.xml if-no-files-found: error - pytest-tf: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.9"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v4 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-tests - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[tf,viz,html] --upgrade - pip install -e .[testing] - - name: Run unittests - run: | - coverage run -m pytest tests/tensorflow/ -rs - coverage xml -o coverage-tf.xml - - uses: actions/upload-artifact@v4 - with: - name: coverage-tf - path: ./coverage-tf.xml - if-no-files-found: error - - pytest-torch: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.9"] - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v4 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-tests - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[torch,viz,html] --upgrade - pip install -e .[testing] - - - name: Run unittests - run: | - coverage run -m pytest tests/pytorch/ -rs - coverage xml -o coverage-pt.xml - - - uses: actions/upload-artifact@v4 - with: - name: coverage-pytorch - path: ./coverage-pt.xml - if-no-files-found: error - codecov-upload: runs-on: ubuntu-latest - needs: [ pytest-common, pytest-tf, pytest-torch ] + needs: [ pytest-common ] steps: - uses: actions/checkout@v4 - uses: actions/download-artifact@v4 diff --git a/.github/workflows/public_docker_images.yml b/.github/workflows/public_docker_images.yml deleted file mode 100644 index 2ccdb66..0000000 --- a/.github/workflows/public_docker_images.yml +++ /dev/null @@ -1,86 +0,0 @@ -# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages -# -name: Docker image on ghcr.io - -on: - push: - tags: - - 'v*' - pull_request: - branches: main - schedule: - - cron: '0 2 29 * *' # At 02:00 on day-of-month 29 - -env: - REGISTRY: ghcr.io - -jobs: - build-and-push-image: - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - # Must match version at https://www.python.org/ftp/python/ - python: ["3.9.18", "3.10.13", "3.11.8"] - framework: ["tf", "torch", "tf,viz,html,contrib", "torch,viz,html,contrib"] - system: ["cpu", "gpu"] - - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - - name: Log in to the Container registry - uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 - with: - images: ${{ env.REGISTRY }}/${{ github.repository }} - tags: | - # used only on schedule event - type=schedule,pattern={{date 'YYYY-MM'}},prefix=${{ matrix.framework }}-py${{ matrix.python }}-${{ matrix.system }}- - # used only if a tag following semver is published - type=semver,pattern={{raw}},prefix=${{ matrix.framework }}-py${{ matrix.python }}-${{ matrix.system }}- - - - name: Build Docker image - id: build - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - build-args: | - FRAMEWORK=${{ matrix.framework }} - PYTHON_VERSION=${{ matrix.python }} - SYSTEM=${{ matrix.system }} - DOCTR_REPO=${{ github.repository }} - DOCTR_VERSION=${{ github.sha }} - push: false # push only if `import doctr` works - tags: ${{ steps.meta.outputs.tags }} - - - name: Check if `import doctr` works - run: docker run ${{ steps.build.outputs.imageid }} python3 -c 'import doctr' - - - name: Push Docker image - # Push only if the CI is not triggered by "PR on main" - if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request' - uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 - with: - context: . - build-args: | - FRAMEWORK=${{ matrix.framework }} - PYTHON_VERSION=${{ matrix.python }} - SYSTEM=${{ matrix.system }} - DOCTR_REPO=${{ github.repository }} - DOCTR_VERSION=${{ github.sha }} - push: true - tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/pull_requests.yml b/.github/workflows/pull_requests.yml deleted file mode 100644 index 045c467..0000000 --- a/.github/workflows/pull_requests.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: pull_requests - -on: - pull_request: - branches: main - -jobs: - docs-build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.9" - architecture: x64 - - name: Cache python modules - uses: actions/cache@v4 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-docs - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[tf,viz,html] --upgrade - pip install -e .[docs] - - - name: Build documentation - run: cd docs && bash build.sh - - - name: Documentation sanity check - run: test -e docs/build/index.html || exit diff --git a/README.md b/README.md index 787daff..ea901c8 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,25 @@ -# onnxcr -Todo +# OnnxTR +## Work in progress -- tests -- actions -- readme -- testen +### docTR meets Onnx (doctr wrapper - onny pipeline) + +```python +from onnxtr.io import DocumentFile +from onnxtr.models import ocr_predictor + +# PDF +pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf") +# Image +single_img_doc = DocumentFile.from_images("path/to/your/img.jpg") +# Webpage (requires `weasyprint` to be installed) +webpage_doc = DocumentFile.from_url("https://www.yoursite.com") +# Multiple page images +multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"]) + +model = ocr_predictor() +# PDF +doc = DocumentFile.from_pdf("path/to/your/doc.pdf") +# Analyze +result = model(doc) +``` diff --git a/onnxtr/models/detection/_utils/__init__.py b/onnxtr/models/detection/_utils/__init__.py deleted file mode 100644 index 9b5ed21..0000000 --- a/onnxtr/models/detection/_utils/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .base import * diff --git a/onnxtr/models/detection/_utils/base.py b/onnxtr/models/detection/_utils/base.py deleted file mode 100644 index b4686db..0000000 --- a/onnxtr/models/detection/_utils/base.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright (C) 2021-2024, Mindee | Felix Dittrich. - -# This program is licensed under the Apache License 2.0. -# See LICENSE or go to for full license details. - -import cv2 -import numpy as np - -__all__ = ["erode", "dilate"] - - -def erode(x: np.ndarray, kernel_size: int) -> np.ndarray: - """Performs erosion on a given tensor - - Args: - ---- - x: boolean tensor of shape (N, H, W, C) - kernel_size: the size of the kernel to use for erosion - - Returns: - ------- - the eroded tensor - """ - kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8) - return 1 - cv2.erode(1 - x.astype(np.uint8), kernel, iterations=1) - - -def dilate(x: np.ndarray, kernel_size: int) -> np.ndarray: - """Performs dilation on a given tensor - - Args: - ---- - x: boolean tensor of shape (N, H, W, C) - kernel_size: the size of the kernel to use for dilation - - Returns: - ------- - the dilated tensor - """ - kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8) - return cv2.dilate(x.astype(np.uint8), kernel, iterations=1) diff --git a/onnxtr/models/detection/models/differentiable_binarization.py b/onnxtr/models/detection/models/differentiable_binarization.py index 5597729..a9b029e 100644 --- a/onnxtr/models/detection/models/differentiable_binarization.py +++ b/onnxtr/models/detection/models/differentiable_binarization.py @@ -77,9 +77,7 @@ def __call__( if return_model_output: out["out_map"] = prob_map - out["preds"] = [ - dict(zip(["words"], preds)) for preds in self.postprocessor(np.transpose(prob_map, (0, 2, 3, 1))) - ] + out["preds"] = [dict(zip(["words"], preds)) for preds in self.postprocessor(prob_map)] return out diff --git a/onnxtr/models/detection/models/fast.py b/onnxtr/models/detection/models/fast.py index 167fa1a..964d94f 100644 --- a/onnxtr/models/detection/models/fast.py +++ b/onnxtr/models/detection/models/fast.py @@ -78,7 +78,7 @@ def __call__( if return_model_output: out["out_map"] = prob_map - out["preds"] = [dict(zip("words", preds)) for preds in self.postprocessor(np.transpose(prob_map, (0, 2, 3, 1)))] + out["preds"] = [dict(zip(["words"], preds)) for preds in self.postprocessor(prob_map)] return out diff --git a/onnxtr/models/detection/models/linknet.py b/onnxtr/models/detection/models/linknet.py index bd82c82..9f0f417 100644 --- a/onnxtr/models/detection/models/linknet.py +++ b/onnxtr/models/detection/models/linknet.py @@ -78,7 +78,7 @@ def __call__( if return_model_output: out["out_map"] = prob_map - out["preds"] = [dict(zip("words", preds)) for preds in self.postprocessor(np.transpose(prob_map, (0, 2, 3, 1)))] + out["preds"] = [dict(zip(["words"], preds)) for preds in self.postprocessor(prob_map)] return out diff --git a/onnxtr/models/engine.py b/onnxtr/models/engine.py index a9a6a41..fbe8475 100644 --- a/onnxtr/models/engine.py +++ b/onnxtr/models/engine.py @@ -3,12 +3,13 @@ # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. -from typing import Any, List +from typing import Any import numpy as np import onnxruntime from onnxtr.utils.data import download_from_url +from onnxtr.utils.geometry import shape_translate class Engine: @@ -26,7 +27,7 @@ def __init__(self, url: str, **kwargs: Any) -> None: archive_path, providers=["CPUExecutionProvider", "CUDAExecutionProvider"] ) - def run(self, inputs: np.ndarray) -> List[np.ndarray]: - inputs = np.transpose(inputs, (0, 3, 1, 2)).astype(np.float32) + def run(self, inputs: np.ndarray) -> np.ndarray: + inputs = shape_translate(inputs, format="BCHW") logits = self.runtime.run(["logits"], {"input": inputs})[0] - return logits + return shape_translate(logits, format="BHWC") diff --git a/onnxtr/models/preprocessor/base.py b/onnxtr/models/preprocessor/base.py index b332561..73ee78b 100644 --- a/onnxtr/models/preprocessor/base.py +++ b/onnxtr/models/preprocessor/base.py @@ -9,6 +9,7 @@ import numpy as np from onnxtr.transforms import Normalize, Resize +from onnxtr.utils.geometry import shape_translate from onnxtr.utils.multithreading import multithread_exec from onnxtr.utils.repr import NestedObject @@ -67,6 +68,7 @@ def sample_transforms(self, x: np.ndarray) -> np.ndarray: raise TypeError("unsupported data type for numpy.ndarray") elif x.dtype not in (np.uint8, np.float16, np.float32): raise TypeError("unsupported data type for torch.Tensor") + x = shape_translate(x, "HWC") # Data type & 255 division if x.dtype == np.uint8: x = x.astype(np.float32) / 255.0 @@ -95,13 +97,14 @@ def __call__(self, x: Union[np.ndarray, List[np.ndarray]]) -> List[np.ndarray]: raise TypeError("unsupported data type for numpy.ndarray") elif x.dtype not in (np.uint8, np.float16, np.float32): raise TypeError("unsupported data type for torch.Tensor") + x = shape_translate(x, "BHWC") # Data type & 255 division if x.dtype == np.uint8: x = x.astype(np.float32) / 255.0 # Resizing if (x.shape[1], x.shape[2]) != self.resize.output_size: - x = self.resize(x, self.resize.output_size, method=self.resize.method, antialias=self.resize.antialias) + x = self.resize(x) batches = [x] diff --git a/onnxtr/transforms/base.py b/onnxtr/transforms/base.py index f176bb6..5a6607d 100644 --- a/onnxtr/transforms/base.py +++ b/onnxtr/transforms/base.py @@ -26,6 +26,7 @@ def __init__( self.interpolation = interpolation self.preserve_aspect_ratio = preserve_aspect_ratio self.symmetric_pad = symmetric_pad + self.output_size = size if isinstance(size, tuple) else (size, size) if not isinstance(self.size, (int, tuple, list)): raise AssertionError("size should be either a tuple, a list or an int") @@ -34,7 +35,10 @@ def __call__( self, img: np.ndarray, ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]: - h, w = img.shape[:2] + if img.ndim == 3: + h, w = img.shape[0:2] + else: + h, w = img.shape[1:3] sh, sw = self.size # Calculate aspect ratio of the image diff --git a/onnxtr/utils/geometry.py b/onnxtr/utils/geometry.py index 9c77de1..16eb399 100644 --- a/onnxtr/utils/geometry.py +++ b/onnxtr/utils/geometry.py @@ -25,6 +25,7 @@ "rotate_abs_geoms", "extract_crops", "extract_rcrops", + "shape_translate", ] @@ -57,6 +58,61 @@ def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox: return (min(x), min(y)), (max(x), max(y)) +def shape_translate(data: np.ndarray, format: str) -> np.ndarray: + """Translate the shape of the input data to the desired format + + Args: + ---- + data: input data + format: target format ('BCHW', 'BHWC', 'CHW', or 'HWC') + + Returns: + ------- + the reshaped data + """ + # Get the current shape + current_shape = data.shape + + # Check the number of dimensions + num_dims = len(current_shape) + + # Check the position of image channels + if num_dims == 4 and current_shape[1] in [1, 3] and format in ["BCHW", "BHWC"]: + # Channels are in the second dimension + channels_second = True + elif num_dims == 4 and current_shape[3] in [1, 3] and format in ["CHW", "HWC"]: + # Channels are in the fourth dimension + channels_second = False + elif num_dims == 3 and current_shape[0] in [1, 3] and format == "CHW": + # Channels are in the first dimension + channels_second = False + elif num_dims == 3 and current_shape[2] in [1, 3] and format == "HWC": + # Channels are in the third dimension + channels_second = False + else: + # Data does not seem to be an image + channels_second = None + + # Reshape the data according to the target format + if format == "BCHW" and not channels_second: + # Move channels to the second dimension + reshaped_data = np.moveaxis(data, -1, 1) + elif format == "BHWC" and channels_second: + # Move channels to the last dimension + reshaped_data = np.moveaxis(data, 1, -1) + elif format == "CHW" and not channels_second: + # Move channels to the first dimension + reshaped_data = np.moveaxis(data, -1, 0) + elif format == "HWC" and not channels_second: + # Move channels to the third dimension + reshaped_data = np.moveaxis(data, -1, 2) + else: + # Return the data without reshaping + reshaped_data = data + + return reshaped_data + + def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]: """Compute enclosing bbox either from: diff --git a/pyproject.toml b/pyproject.toml index e2ae960..f39fc0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,16 +68,6 @@ quality = [ "mypy>=0.812", "pre-commit>=2.17.0", ] -docs = [ - "sphinx>=3.0.0,!=3.5.0", - "sphinxemoji>=0.1.8", - "sphinx-copybutton>=0.3.1", - "docutils<0.22", - "recommonmark>=0.7.1", - "sphinx-markdown-tables>=0.0.15", - "sphinx-tabs>=3.3.0", - "furo>=2022.3.4", -] dev = [ # HTML "weasyprint>=55.0", @@ -92,19 +82,9 @@ dev = [ "ruff>=0.1.5", "mypy>=0.812", "pre-commit>=2.17.0", - # Documentation - "sphinx>=3.0.0,!=3.5.0", - "sphinxemoji>=0.1.8", - "sphinx-copybutton>=0.3.1", - "docutils<0.22", - "recommonmark>=0.7.1", - "sphinx-markdown-tables>=0.0.15", - "sphinx-tabs>=3.3.0", - "furo>=2022.3.4", ] [project.urls] -#documentation = "https://mindee.github.io/doctr" repository = "https://github.com/felixdittrich92/OnnxTR" tracker = "https://github.com/felixdittrich92/OnnxTR/issues" #changelog = "https://mindee.github.io/doctr/changelog.html" @@ -113,7 +93,7 @@ tracker = "https://github.com/felixdittrich92/OnnxTR/issues" zip-safe = true [tool.setuptools.packages.find] -exclude = ["api*", "demo*", "docs*", "tests*"] +exclude = ["tests*"] [tool.mypy] files = "onnxtr/" @@ -163,14 +143,12 @@ select = [ ignore = ["E402", "E203", "F403", "E731", "N812", "N817", "C408"] [tool.ruff.lint.isort] -known-first-party = ["onnxtr", "app", "utils"] -known-third-party = ["fastapi", "onnxruntime", "cv2"] +known-first-party = ["onnxtr", "utils"] +known-third-party = ["onnxruntime", "cv2"] [tool.ruff.lint.per-file-ignores] "onnxtr/models/**.py" = ["N806", "F841"] -"onnxtr/datasets/**.py" = ["N806"] "tests/**.py" = ["D"] -"docs/**.py" = ["D"] ".github/**.py" = ["D"] diff --git a/tests/common/test_models_classification.py b/tests/common/test_models_classification.py new file mode 100644 index 0000000..48de11c --- /dev/null +++ b/tests/common/test_models_classification.py @@ -0,0 +1,87 @@ +import cv2 +import numpy as np +import pytest + +from onnxtr.models import classification +from onnxtr.models.classification.predictor import OrientationPredictor +from onnxtr.models.engine import Engine + + +@pytest.mark.parametrize( + "arch_name, input_shape", + [ + ["mobilenet_v3_small_crop_orientation", (256, 256, 3)], + ["mobilenet_v3_small_page_orientation", (512, 512, 3)], + ], +) +def test_classification_models(arch_name, input_shape): + batch_size = 8 + model = classification.__dict__[arch_name]() + assert isinstance(model, Engine) + input_tensor = np.random.rand(batch_size, *input_shape).astype(np.float32) + out = model(input_tensor) + assert isinstance(out, np.ndarray) + assert out.shape == (8, 4) + + +@pytest.mark.parametrize( + "arch_name", + [ + "mobilenet_v3_small_crop_orientation", + "mobilenet_v3_small_page_orientation", + ], +) +def test_classification_zoo(arch_name): + if "crop" in arch_name: + batch_size = 16 + input_array = np.random.rand(batch_size, 3, 256, 256).astype(np.float32) + # Model + predictor = classification.zoo.crop_orientation_predictor(arch_name) + + with pytest.raises(ValueError): + predictor = classification.zoo.crop_orientation_predictor(arch="wrong_model") + else: + batch_size = 2 + input_array = np.random.rand(batch_size, 3, 512, 512).astype(np.float32) + # Model + predictor = classification.zoo.page_orientation_predictor(arch_name) + + with pytest.raises(ValueError): + predictor = classification.zoo.page_orientation_predictor(arch="wrong_model") + # object check + assert isinstance(predictor, OrientationPredictor) + + out = predictor(input_array) + class_idxs, classes, confs = out[0], out[1], out[2] + assert isinstance(class_idxs, list) and len(class_idxs) == batch_size + assert isinstance(classes, list) and len(classes) == batch_size + assert isinstance(confs, list) and len(confs) == batch_size + assert all(isinstance(pred, int) for pred in class_idxs) + assert all(isinstance(pred, int) for pred in classes) and all(pred in [0, 90, 180, -90] for pred in classes) + assert all(isinstance(pred, float) for pred in confs) + + +def test_crop_orientation_model(mock_text_box): + text_box_0 = cv2.imread(mock_text_box) + # rotates counter-clockwise + text_box_270 = np.rot90(text_box_0, 1) + text_box_180 = np.rot90(text_box_0, 2) + text_box_90 = np.rot90(text_box_0, 3) + classifier = classification.crop_orientation_predictor("mobilenet_v3_small_crop_orientation") + assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3] + # 270 degrees is equivalent to -90 degrees + assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90] + assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2]) + + +def test_page_orientation_model(mock_payslip): + text_box_0 = cv2.imread(mock_payslip) + # rotates counter-clockwise + text_box_270 = np.rot90(text_box_0, 1) + text_box_180 = np.rot90(text_box_0, 2) + text_box_90 = np.rot90(text_box_0, 3) + classifier = classification.crop_orientation_predictor("mobilenet_v3_small_page_orientation") + assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3] + # 270 degrees is equivalent to -90 degrees + assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90] + assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2]) diff --git a/tests/common/test_models_detection.py b/tests/common/test_models_detection.py index 1dc8621..3897df1 100644 --- a/tests/common/test_models_detection.py +++ b/tests/common/test_models_detection.py @@ -1,7 +1,10 @@ import numpy as np import pytest +from onnxtr.models import detection from onnxtr.models.detection.postprocessor.base import GeneralDetectionPostProcessor +from onnxtr.models.detection.predictor import DetectionPredictor +from onnxtr.models.engine import Engine def test_postprocessor(): @@ -58,3 +61,68 @@ def test_postprocessor(): r_out = r_postprocessor.polygon_to_box(issue_points) assert isinstance(out, tuple) and len(out) == 4 assert isinstance(r_out, np.ndarray) and r_out.shape == (4, 2) + + +@pytest.mark.parametrize( + "arch_name, input_shape, output_size, out_prob", + [ + ["db_resnet34", (1024, 1024, 3), (1024, 1024, 1), True], + ["db_resnet50", (1024, 1024, 3), (1024, 1024, 1), True], + ["db_mobilenet_v3_large", (1024, 1024, 3), (1024, 1024, 1), True], + ["linknet_resnet18", (1024, 1024, 3), (1024, 1024, 1), True], + ["linknet_resnet34", (1024, 1024, 3), (1024, 1024, 1), True], + ["linknet_resnet50", (1024, 1024, 3), (1024, 1024, 1), True], + ["fast_tiny", (1024, 1024, 3), (1024, 1024, 1), True], + ["fast_small", (1024, 1024, 3), (1024, 1024, 1), True], + ["fast_base", (1024, 1024, 3), (1024, 1024, 1), True], + ], +) +def test_detection_models(arch_name, input_shape, output_size, out_prob): + batch_size = 2 + model = detection.__dict__[arch_name]() + assert isinstance(model, Engine) + input_array = np.random.rand(batch_size, *input_shape).astype(np.float32) + out = model(input_array, return_model_output=True) + assert isinstance(out, dict) + assert len(out) == 2 + # Check proba map + assert out["out_map"].shape == (batch_size, *output_size) + assert out["out_map"].dtype == np.float32 + if out_prob: + assert np.all(out["out_map"] >= 0) and np.all(out["out_map"] <= 1) + # Check boxes + for boxes_dict in out["preds"]: + for boxes in boxes_dict.values(): + assert boxes.shape[1] == 5 + assert np.all(boxes[:, :2] < boxes[:, 2:4]) + assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1) + + +@pytest.mark.parametrize( + "arch_name", + [ + "db_resnet34", + "db_resnet50", + "db_mobilenet_v3_large", + "linknet_resnet18", + "linknet_resnet34", + "linknet_resnet50", + "fast_tiny", + "fast_small", + "fast_base", + ], +) +def test_detection_zoo(arch_name): + # Model + predictor = detection.zoo.detection_predictor(arch_name) + # object check + assert isinstance(predictor, DetectionPredictor) + input_array = np.random.rand(2, 3, 1024, 1024).astype(np.float32) + + out, seq_maps = predictor(input_array, return_maps=True) + assert all(isinstance(boxes, dict) for boxes in out) + assert all(isinstance(boxes["words"], np.ndarray) and boxes["words"].shape[1] == 5 for boxes in out) + assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps) + assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps) + # check that all values in the seq_maps are between 0 and 1 + assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps) diff --git a/tests/tensorflow/test_models_preprocessor_tf.py b/tests/common/test_models_preprocessor.py similarity index 68% rename from tests/tensorflow/test_models_preprocessor_tf.py rename to tests/common/test_models_preprocessor.py index d1f2151..4ea2e7f 100644 --- a/tests/tensorflow/test_models_preprocessor_tf.py +++ b/tests/common/test_models_preprocessor.py @@ -1,7 +1,7 @@ import numpy as np import pytest -import tensorflow as tf -from doctr.models.preprocessor import PreProcessor + +from onnxtr.models.preprocessor import PreProcessor @pytest.mark.parametrize( @@ -9,12 +9,8 @@ [ [2, (128, 128), np.full((3, 256, 128, 3), 255, dtype=np.uint8), 1, 0.5], # numpy uint8 [2, (128, 128), np.ones((3, 256, 128, 3), dtype=np.float32), 1, 0.5], # numpy fp32 - [2, (128, 128), tf.cast(tf.fill((3, 256, 128, 3), 255), dtype=tf.uint8), 1, 0.5], # tf uint8 - [2, (128, 128), tf.ones((3, 128, 128, 3), dtype=tf.float32), 1, 0.5], # tf fp32 [2, (128, 128), [np.full((256, 128, 3), 255, dtype=np.uint8)] * 3, 2, 0.5], # list of numpy uint8 [2, (128, 128), [np.ones((256, 128, 3), dtype=np.float32)] * 3, 2, 0.5], # list of numpy fp32 - [2, (128, 128), [tf.cast(tf.fill((256, 128, 3), 255), dtype=tf.uint8)] * 3, 2, 0.5], # list of tf uint8 - [2, (128, 128), [tf.ones((128, 128, 3), dtype=tf.float32)] * 3, 2, 0.5], # list of tf fp32 ], ) def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, expected_value): @@ -36,8 +32,8 @@ def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, e out = processor(input_tensor) assert isinstance(out, list) and len(out) == expected_batches - assert all(isinstance(b, tf.Tensor) for b in out) - assert all(b.dtype == tf.float32 for b in out) + assert all(isinstance(b, np.ndarray) for b in out) + assert all(b.dtype == np.float32 for b in out) assert all(b.shape[1:3] == output_size for b in out) - assert all(tf.math.reduce_all(tf.math.abs(b - expected_value) < 1e-6) for b in out) + assert all(np.all(b == expected_value) for b in out) # TODO: Fix me assert len(repr(processor).split("\n")) == 4 diff --git a/tests/common/test_models_recognition.py b/tests/common/test_models_recognition.py new file mode 100644 index 0000000..f89ab78 --- /dev/null +++ b/tests/common/test_models_recognition.py @@ -0,0 +1,104 @@ +import numpy as np +import pytest + +from onnxtr.models import recognition +from onnxtr.models.engine import Engine +from onnxtr.models.recognition.predictor import RecognitionPredictor +from onnxtr.models.recognition.predictor._utils import remap_preds, split_crops + + +@pytest.mark.parametrize( + "crops, max_ratio, target_ratio, dilation, channels_last, num_crops", + [ + # No split required + [[np.zeros((32, 128, 3), dtype=np.uint8)], 8, 4, 1.4, True, 1], + [[np.zeros((3, 32, 128), dtype=np.uint8)], 8, 4, 1.4, False, 1], + # Split required + [[np.zeros((32, 1024, 3), dtype=np.uint8)], 8, 6, 1.4, True, 5], + [[np.zeros((3, 32, 1024), dtype=np.uint8)], 8, 6, 1.4, False, 5], + ], +) +def test_split_crops(crops, max_ratio, target_ratio, dilation, channels_last, num_crops): + new_crops, crop_map, should_remap = split_crops(crops, max_ratio, target_ratio, dilation, channels_last) + assert len(new_crops) == num_crops + assert len(crop_map) == len(crops) + assert should_remap == (len(crops) != len(new_crops)) + + +@pytest.mark.parametrize( + "preds, crop_map, dilation, pred", + [ + # Nothing to remap + [[("hello", 0.5)], [0], 1.4, [("hello", 0.5)]], + # Merge + [[("hellowo", 0.5), ("loworld", 0.6)], [(0, 2)], 1.4, [("helloworld", 0.5)]], + ], +) +def test_remap_preds(preds, crop_map, dilation, pred): + preds = remap_preds(preds, crop_map, dilation) + assert len(preds) == len(pred) + assert preds == pred + assert all(isinstance(pred, tuple) for pred in preds) + assert all(isinstance(pred[0], str) and isinstance(pred[1], float) for pred in preds) + + +@pytest.mark.parametrize( + "arch_name, input_shape", + [ + ["crnn_vgg16_bn", (32, 128, 3)], + ["crnn_mobilenet_v3_small", (32, 128, 3)], + ["crnn_mobilenet_v3_large", (32, 128, 3)], + ["sar_resnet31", (32, 128, 3)], + ["master", (32, 128, 3)], + ["vitstr_small", (32, 128, 3)], + ["vitstr_base", (32, 128, 3)], + ["parseq", (32, 128, 3)], + ], +) +def test_recognition_models(arch_name, input_shape, mock_vocab): + batch_size = 4 + model = recognition.__dict__[arch_name]() + assert isinstance(model, Engine) + input_array = np.random.rand(batch_size, *input_shape).astype(np.float32) + + out = model(input_array) + assert isinstance(out, dict) + assert len(out) == 1 + assert isinstance(out["preds"], list) + assert len(out["preds"]) == batch_size + assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"]) + + # test model post processor + post_processor = model.postprocessor + decoded = post_processor(np.random.rand(2, len(mock_vocab), 30).astype(np.float32)) + assert isinstance(decoded, list) + assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in decoded) + assert len(decoded) == 2 + assert all(char in mock_vocab for word, _ in decoded for char in word) + # Repr + assert repr(post_processor) == f"{post_processor.__name__}(vocab_size={len(mock_vocab)})" + + +@pytest.mark.parametrize( + "arch_name", + [ + "crnn_vgg16_bn", + "crnn_mobilenet_v3_small", + "crnn_mobilenet_v3_large", + "sar_resnet31", + "master", + "vitstr_small", + "vitstr_base", + "parseq", + ], +) +def test_recognition_zoo(arch_name): + batch_size = 2 + # Model + predictor = recognition.zoo.recognition_predictor(arch_name) + # object check + assert isinstance(predictor, RecognitionPredictor) + input_array = np.random.rand(batch_size, 3, 128, 128).astype(np.float32) + out = predictor(input_array) + assert isinstance(out, list) and len(out) == batch_size + assert all(isinstance(word, str) and isinstance(conf, float) for word, conf in out) diff --git a/tests/common/test_models_recognition_predictor.py b/tests/common/test_models_recognition_predictor.py deleted file mode 100644 index 734239a..0000000 --- a/tests/common/test_models_recognition_predictor.py +++ /dev/null @@ -1,39 +0,0 @@ -import numpy as np -import pytest - -from onnxtr.models.recognition.predictor._utils import remap_preds, split_crops - - -@pytest.mark.parametrize( - "crops, max_ratio, target_ratio, dilation, channels_last, num_crops", - [ - # No split required - [[np.zeros((32, 128, 3), dtype=np.uint8)], 8, 4, 1.4, True, 1], - [[np.zeros((3, 32, 128), dtype=np.uint8)], 8, 4, 1.4, False, 1], - # Split required - [[np.zeros((32, 1024, 3), dtype=np.uint8)], 8, 6, 1.4, True, 5], - [[np.zeros((3, 32, 1024), dtype=np.uint8)], 8, 6, 1.4, False, 5], - ], -) -def test_split_crops(crops, max_ratio, target_ratio, dilation, channels_last, num_crops): - new_crops, crop_map, should_remap = split_crops(crops, max_ratio, target_ratio, dilation, channels_last) - assert len(new_crops) == num_crops - assert len(crop_map) == len(crops) - assert should_remap == (len(crops) != len(new_crops)) - - -@pytest.mark.parametrize( - "preds, crop_map, dilation, pred", - [ - # Nothing to remap - [[("hello", 0.5)], [0], 1.4, [("hello", 0.5)]], - # Merge - [[("hellowo", 0.5), ("loworld", 0.6)], [(0, 2)], 1.4, [("helloworld", 0.5)]], - ], -) -def test_remap_preds(preds, crop_map, dilation, pred): - preds = remap_preds(preds, crop_map, dilation) - assert len(preds) == len(pred) - assert preds == pred - assert all(isinstance(pred, tuple) for pred in preds) - assert all(isinstance(pred[0], str) and isinstance(pred[1], float) for pred in preds) diff --git a/tests/common/test_models_zoo.py b/tests/common/test_models_zoo.py new file mode 100644 index 0000000..056d4f2 --- /dev/null +++ b/tests/common/test_models_zoo.py @@ -0,0 +1,167 @@ +import numpy as np +import pytest + +from onnxtr import models +from onnxtr.io import Document, DocumentFile +from onnxtr.models import detection, recognition +from onnxtr.models.detection.predictor import DetectionPredictor +from onnxtr.models.detection.zoo import detection_predictor +from onnxtr.models.predictor import OCRPredictor +from onnxtr.models.preprocessor import PreProcessor +from onnxtr.models.recognition.predictor import RecognitionPredictor +from onnxtr.models.recognition.zoo import recognition_predictor +from onnxtr.utils.repr import NestedObject + + +# Create a dummy callback +class _DummyCallback: + def __call__(self, loc_preds): + return loc_preds + + +@pytest.mark.parametrize( + "assume_straight_pages, straighten_pages", + [ + [True, False], + [False, False], + [True, True], + ], +) +def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages): + det_bsize = 4 + det_predictor = DetectionPredictor( + PreProcessor(output_size=(1024, 1024), batch_size=det_bsize), + detection.db_mobilenet_v3_large(assume_straight_pages=assume_straight_pages), + ) + + reco_bsize = 16 + reco_predictor = RecognitionPredictor( + PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True), + recognition.crnn_vgg16_bn(vocab=mock_vocab), + ) + + doc = DocumentFile.from_pdf(mock_pdf) + + predictor = OCRPredictor( + det_predictor, + reco_predictor, + assume_straight_pages=assume_straight_pages, + straighten_pages=straighten_pages, + detect_orientation=True, + detect_language=True, + ) + + if assume_straight_pages: + assert predictor.crop_orientation_predictor is None + else: + assert isinstance(predictor.crop_orientation_predictor, NestedObject) + + out = predictor(doc) + assert isinstance(out, Document) + assert len(out.pages) == 2 + # Dimension check + with pytest.raises(ValueError): + input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) + _ = predictor([input_page]) + + orientation = 0 + assert out.pages[0].orientation["value"] == orientation + language = "unknown" + assert out.pages[0].language["value"] == language + + +def test_trained_ocr_predictor(mock_payslip): + doc = DocumentFile.from_images(mock_payslip) + + det_predictor = detection_predictor( + "db_resnet50", + batch_size=2, + assume_straight_pages=True, + symmetric_pad=True, + preserve_aspect_ratio=False, + ) + reco_predictor = recognition_predictor("crnn_vgg16_bn", batch_size=128) + + predictor = OCRPredictor( + det_predictor, + reco_predictor, + assume_straight_pages=True, + straighten_pages=True, + preserve_aspect_ratio=False, + ) + # test hooks + predictor.add_hook(_DummyCallback()) + + out = predictor(doc) + + assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr." + geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]]) + assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05) + + assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised" + geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]]) + assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05) + + det_predictor = detection_predictor( + "db_resnet50", + batch_size=2, + assume_straight_pages=True, + preserve_aspect_ratio=True, + symmetric_pad=True, + ) + + predictor = OCRPredictor( + det_predictor, + reco_predictor, + assume_straight_pages=True, + straighten_pages=True, + preserve_aspect_ratio=True, + symmetric_pad=True, + ) + + out = predictor(doc) + + assert "Mr" in out.pages[0].blocks[0].lines[0].words[0].value + + +def _test_predictor(predictor): + # Output checks + assert isinstance(predictor, OCRPredictor) + + doc = [np.zeros((1024, 1024, 3), dtype=np.uint8)] + out = predictor(doc) + # Document + assert isinstance(out, Document) + + # The input doc has 1 page + assert len(out.pages) == 1 + # Dimension check + with pytest.raises(ValueError): + input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) + _ = predictor([input_page]) + + +@pytest.mark.parametrize( + "det_arch, reco_arch", + [ + ["db_mobilenet_v3_large", "crnn_vgg16_bn"], + ], +) +def test_zoo_models(det_arch, reco_arch): + # Model + predictor = models.ocr_predictor(det_arch, reco_arch) + _test_predictor(predictor) + + # passing model instance directly + det_model = detection.__dict__[det_arch]() + reco_model = recognition.__dict__[reco_arch]() + predictor = models.ocr_predictor(det_model, reco_model) + _test_predictor(predictor) + + # passing recognition model as detection model + with pytest.raises(ValueError): + models.ocr_predictor(det_arch=reco_model) + + # passing detection model as recognition model + with pytest.raises(ValueError): + models.ocr_predictor(reco_arch=det_model) diff --git a/tests/common/test_transforms.py b/tests/common/test_transforms.py index 4640904..6d84d0c 100644 --- a/tests/common/test_transforms.py +++ b/tests/common/test_transforms.py @@ -1 +1,66 @@ -# TODO +import numpy as np +import pytest + +from onnxtr.transforms import Normalize, Resize + + +def test_resize(): + output_size = (32, 32) + transfo = Resize(output_size) + input_t = np.ones((64, 64, 3), dtype=np.float32) + out = transfo(input_t) + + assert np.all(out == 1) + assert out.shape[:2] == output_size + assert repr(transfo) == f"Resize(output_size={output_size}, interpolation='1')" + + transfo = Resize(output_size, preserve_aspect_ratio=True) + input_t = np.ones((32, 64, 3), dtype=np.float32) + out = transfo(input_t) + + assert out.shape[:2] == output_size + assert not np.all(out == 1) + # Asymetric padding + assert np.all(out[-1] == 0) and np.all(out[0] == 0) + + # Symetric padding + transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True) + assert repr(transfo) == ( + f"Resize(output_size={output_size}, interpolation='1', " f"preserve_aspect_ratio=True, symmetric_pad=True)" + ) + out = transfo(input_t) + assert out.shape[:2] == output_size + # symetric padding + assert np.all(out[-1] == 0) and np.all(out[0] == 0) + + # Inverse aspect ratio + input_t = np.ones((3, 64, 32), dtype=np.float32) + out = transfo(input_t) + + assert not np.all(out == 1) + assert out.shape[:2] == output_size + + # Same aspect ratio + output_size = (32, 128) + transfo = Resize(output_size, preserve_aspect_ratio=True) + out = transfo(np.ones((3, 16, 64), dtype=np.float32)) + assert out.shape[:2] == output_size + + +@pytest.mark.parametrize( + "input_shape", + [ + [8, 32, 32, 3], + [32, 32, 3], + [32, 3], + ], +) +def test_normalize(input_shape): + mean, std = [0.5, 0.5, 0.5], [0.5, 0.5, 0.5] + transfo = Normalize(mean, std) + input_t = np.ones(input_shape, dtype=np.float32) + + out = transfo(input_t) + + assert np.all(out == 1) + assert repr(transfo) == f"Normalize(mean={mean}, std={std})" diff --git a/tests/conftest.py b/tests/conftest.py index aca8889..418783d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,10 +39,7 @@ def synthesize_text_img( @pytest.fixture(scope="session") def mock_vocab(): - return ( - "3K}7eé;5àÎYho]QwV6qU~W\"XnbBvcADfËmy.9ÔpÛ*{CôïE%M4#ÈR:g@T$x?0î£|za1ù8,OG€P-kçHëÀÂ2É/ûIJ'j" - "(LNÙFut[)èZs+&°Sd=Ï!<â_Ç>rêi`l" - ) + return "3K}7eé;5àÎYho]QwV6qU~W\"XnbBvcADfËmy.9ÔpÛ*{CôïE%M4#ÈR:g@T$x?0î£|za1ù8,OG€P-kçHëÀÂ2É/ûIJ'j(LNÙFut[)èZs+&°Sd=Ï!<â_Ç>rêi`l" # noqa @pytest.fixture(scope="session") diff --git a/tests/pytorch/test_datasets_pt.py b/tests/pytorch/test_datasets_pt.py deleted file mode 100644 index 09e46d9..0000000 --- a/tests/pytorch/test_datasets_pt.py +++ /dev/null @@ -1,623 +0,0 @@ -import os -from shutil import move - -import numpy as np -import pytest -import torch -from doctr import datasets -from doctr.file_utils import CLASS_NAME -from doctr.transforms import Resize -from torch.utils.data import DataLoader, RandomSampler - - -def _validate_dataset(ds, input_size, batch_size=2, class_indices=False, is_polygons=False): - # Fetch one sample - img, target = ds[0] - - assert isinstance(img, torch.Tensor) - assert img.shape == (3, *input_size) - assert img.dtype == torch.float32 - assert isinstance(target, dict) - assert isinstance(target["boxes"], np.ndarray) and target["boxes"].dtype == np.float32 - if is_polygons: - assert target["boxes"].ndim == 3 and target["boxes"].shape[1:] == (4, 2) - else: - assert target["boxes"].ndim == 2 and target["boxes"].shape[1:] == (4,) - assert np.all(np.logical_and(target["boxes"] <= 1, target["boxes"] >= 0)) - if class_indices: - assert isinstance(target["labels"], np.ndarray) and target["labels"].dtype == np.int64 - else: - assert isinstance(target["labels"], list) and all(isinstance(s, str) for s in target["labels"]) - assert len(target["labels"]) == len(target["boxes"]) - - # Check batching - loader = DataLoader( - ds, - batch_size=batch_size, - drop_last=True, - sampler=RandomSampler(ds), - num_workers=0, - pin_memory=True, - collate_fn=ds.collate_fn, - ) - - images, targets = next(iter(loader)) - assert isinstance(images, torch.Tensor) and images.shape == (batch_size, 3, *input_size) - assert isinstance(targets, list) and all(isinstance(elt, dict) for elt in targets) - - -def _validate_dataset_recognition_part(ds, input_size, batch_size=2): - # Fetch one sample - img, label = ds[0] - - assert isinstance(img, torch.Tensor) - assert img.shape == (3, *input_size) - assert img.dtype == torch.float32 - assert isinstance(label, str) - - # Check batching - loader = DataLoader( - ds, - batch_size=batch_size, - drop_last=True, - sampler=RandomSampler(ds), - num_workers=0, - pin_memory=True, - collate_fn=ds.collate_fn, - ) - - images, labels = next(iter(loader)) - assert isinstance(images, torch.Tensor) and images.shape == (batch_size, 3, *input_size) - assert isinstance(labels, list) and all(isinstance(elt, str) for elt in labels) - - -def test_visiondataset(): - url = "https://github.com/mindee/doctr/releases/download/v0.6.0/mnist.zip" - with pytest.raises(ValueError): - datasets.datasets.VisionDataset(url, download=False) - - dataset = datasets.datasets.VisionDataset(url, download=True, extract_archive=True) - assert len(dataset) == 0 - assert repr(dataset) == "VisionDataset()" - - -def test_rotation_dataset(mock_image_folder): - input_size = (1024, 1024) - - ds = datasets.OrientationDataset(img_folder=mock_image_folder, img_transforms=Resize(input_size)) - assert len(ds) == 5 - img, target = ds[0] - assert isinstance(img, torch.Tensor) - assert img.dtype == torch.float32 - assert img.shape[-2:] == input_size - # Prefilled rotation targets - assert isinstance(target, np.ndarray) and target.dtype == np.int64 - # check that all prefilled targets are 0 (degrees) - assert np.all(target == 0) - - loader = DataLoader(ds, batch_size=2, collate_fn=ds.collate_fn) - images, targets = next(iter(loader)) - assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size) - assert isinstance(targets, list) and all(isinstance(elt, np.ndarray) for elt in targets) - - -def test_detection_dataset(mock_image_folder, mock_detection_label): - input_size = (1024, 1024) - - ds = datasets.DetectionDataset( - img_folder=mock_image_folder, - label_path=mock_detection_label, - img_transforms=Resize(input_size), - ) - - assert len(ds) == 5 - img, target_dict = ds[0] - target = target_dict[CLASS_NAME] - assert isinstance(img, torch.Tensor) - assert img.dtype == torch.float32 - assert img.shape[-2:] == input_size - # Bounding boxes - assert isinstance(target_dict, dict) - assert isinstance(target, np.ndarray) and target.dtype == np.float32 - assert np.all(np.logical_and(target[:, :4] >= 0, target[:, :4] <= 1)) - assert target.shape[1] == 4 - - loader = DataLoader(ds, batch_size=2, collate_fn=ds.collate_fn) - images, targets = next(iter(loader)) - assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size) - assert isinstance(targets, list) and all( - isinstance(elt, np.ndarray) for target in targets for elt in target.values() - ) - # Rotated DS - rotated_ds = datasets.DetectionDataset( - img_folder=mock_image_folder, - label_path=mock_detection_label, - img_transforms=Resize(input_size), - use_polygons=True, - ) - _, r_target = rotated_ds[0] - assert r_target[CLASS_NAME].shape[1:] == (4, 2) - - # File existence check - img_name, _ = ds.data[0] - move(os.path.join(ds.root, img_name), os.path.join(ds.root, "tmp_file")) - with pytest.raises(FileNotFoundError): - datasets.DetectionDataset(mock_image_folder, mock_detection_label) - move(os.path.join(ds.root, "tmp_file"), os.path.join(ds.root, img_name)) - - -def test_recognition_dataset(mock_image_folder, mock_recognition_label): - input_size = (32, 128) - ds = datasets.RecognitionDataset( - img_folder=mock_image_folder, - labels_path=mock_recognition_label, - img_transforms=Resize(input_size, preserve_aspect_ratio=True), - ) - assert len(ds) == 5 - image, label = ds[0] - assert isinstance(image, torch.Tensor) - assert image.shape[-2:] == input_size - assert image.dtype == torch.float32 - assert isinstance(label, str) - - loader = DataLoader(ds, batch_size=2, collate_fn=ds.collate_fn) - images, labels = next(iter(loader)) - assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size) - assert isinstance(labels, list) and all(isinstance(elt, str) for elt in labels) - - # File existence check - img_name, _ = ds.data[0] - move(os.path.join(ds.root, img_name), os.path.join(ds.root, "tmp_file")) - with pytest.raises(FileNotFoundError): - datasets.RecognitionDataset(mock_image_folder, mock_recognition_label) - move(os.path.join(ds.root, "tmp_file"), os.path.join(ds.root, img_name)) - - -@pytest.mark.parametrize( - "use_polygons", - [False, True], -) -def test_ocrdataset(mock_ocrdataset, use_polygons): - input_size = (512, 512) - - ds = datasets.OCRDataset( - *mock_ocrdataset, - img_transforms=Resize(input_size), - use_polygons=use_polygons, - ) - - assert len(ds) == 3 - _validate_dataset(ds, input_size, is_polygons=use_polygons) - - # File existence check - img_name, _ = ds.data[0] - move(os.path.join(ds.root, img_name), os.path.join(ds.root, "tmp_file")) - with pytest.raises(FileNotFoundError): - datasets.OCRDataset(*mock_ocrdataset) - move(os.path.join(ds.root, "tmp_file"), os.path.join(ds.root, img_name)) - - -def test_charactergenerator(): - input_size = (32, 32) - vocab = "abcdef" - - ds = datasets.CharacterGenerator( - vocab=vocab, - num_samples=10, - cache_samples=True, - img_transforms=Resize(input_size), - ) - - assert len(ds) == 10 - image, label = ds[0] - assert isinstance(image, torch.Tensor) - assert image.shape[-2:] == input_size - assert image.dtype == torch.float32 - assert isinstance(label, int) and label < len(vocab) - - loader = DataLoader(ds, batch_size=2, collate_fn=ds.collate_fn) - images, targets = next(iter(loader)) - assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size) - assert isinstance(targets, torch.Tensor) and targets.shape == (2,) - assert targets.dtype == torch.int64 - - -def test_wordgenerator(): - input_size = (32, 128) - wordlen_range = (1, 10) - vocab = "abcdef" - - ds = datasets.WordGenerator( - vocab=vocab, - min_chars=wordlen_range[0], - max_chars=wordlen_range[1], - num_samples=10, - cache_samples=True, - img_transforms=Resize(input_size), - ) - - assert len(ds) == 10 - image, target = ds[0] - assert isinstance(image, torch.Tensor) - assert image.shape[-2:] == input_size - assert image.dtype == torch.float32 - assert isinstance(target, str) and len(target) >= wordlen_range[0] and len(target) <= wordlen_range[1] - assert all(char in vocab for char in target) - - loader = DataLoader(ds, batch_size=2, collate_fn=ds.collate_fn) - images, targets = next(iter(loader)) - assert isinstance(images, torch.Tensor) and images.shape == (2, 3, *input_size) - assert isinstance(targets, list) and len(targets) == 2 and all(isinstance(t, str) for t in targets) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples", - [ - [[512, 512], 3], # Actual set has 2700 training samples and 300 test samples - ], -) -def test_artefact_detection(input_size, num_samples, rotate, mock_doc_artefacts): - # monkeypatch the path to temporary dataset - datasets.DocArtefacts.URL = mock_doc_artefacts - datasets.DocArtefacts.SHA256 = None - - ds = datasets.DocArtefacts( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - cache_dir="/".join(mock_doc_artefacts.split("/")[:-2]), - cache_subdir=mock_doc_artefacts.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"DocArtefacts(train={True})" - _validate_dataset(ds, input_size, class_indices=True, is_polygons=rotate) - - -# NOTE: following datasets support also recognition task - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 626 training samples and 360 test samples - [[32, 128], 15, True], # recognition - ], -) -def test_sroie(input_size, num_samples, rotate, recognition, mock_sroie_dataset): - # monkeypatch the path to temporary dataset - datasets.SROIE.TRAIN = (mock_sroie_dataset, None, "sroie2019_train_task1.zip") - - ds = datasets.SROIE( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_sroie_dataset.split("/")[:-2]), - cache_subdir=mock_sroie_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"SROIE(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 5, False], # Actual set has 229 train and 233 test samples - [[32, 128], 25, True], # recognition - ], -) -def test_ic13_dataset(input_size, num_samples, rotate, recognition, mock_ic13): - ds = datasets.IC13( - *mock_ic13, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - ) - - assert len(ds) == num_samples - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 7149 train and 796 test samples - [[32, 128], 5, True], # recognition - ], -) -def test_imgur5k_dataset(input_size, num_samples, rotate, recognition, mock_imgur5k): - ds = datasets.IMGUR5K( - *mock_imgur5k, - train=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - ) - - assert len(ds) == num_samples - 1 # -1 because of the test set 90 / 10 split - assert repr(ds) == f"IMGUR5K(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[32, 128], 3, False], # Actual set has 33402 training samples and 13068 test samples - [[32, 128], 12, True], # recognition - ], -) -def test_svhn(input_size, num_samples, rotate, recognition, mock_svhn_dataset): - # monkeypatch the path to temporary dataset - datasets.SVHN.TRAIN = (mock_svhn_dataset, None, "svhn_train.tar") - - ds = datasets.SVHN( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_svhn_dataset.split("/")[:-2]), - cache_subdir=mock_svhn_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"SVHN(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 149 training samples and 50 test samples - [[32, 128], 9, True], # recognition - ], -) -def test_funsd(input_size, num_samples, rotate, recognition, mock_funsd_dataset): - # monkeypatch the path to temporary dataset - datasets.FUNSD.URL = mock_funsd_dataset - datasets.FUNSD.SHA256 = None - datasets.FUNSD.FILE_NAME = "funsd.zip" - - ds = datasets.FUNSD( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_funsd_dataset.split("/")[:-2]), - cache_subdir=mock_funsd_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"FUNSD(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 800 training samples and 100 test samples - [[32, 128], 9, True], # recognition - ], -) -def test_cord(input_size, num_samples, rotate, recognition, mock_cord_dataset): - # monkeypatch the path to temporary dataset - datasets.CORD.TRAIN = (mock_cord_dataset, None, "cord_train.zip") - - ds = datasets.CORD( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_cord_dataset.split("/")[:-2]), - cache_subdir=mock_cord_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"CORD(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 2, False], # Actual set has 772875 training samples and 85875 test samples - [[32, 128], 10, True], # recognition - ], -) -def test_synthtext(input_size, num_samples, rotate, recognition, mock_synthtext_dataset): - # monkeypatch the path to temporary dataset - datasets.SynthText.URL = mock_synthtext_dataset - datasets.SynthText.SHA256 = None - - ds = datasets.SynthText( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_synthtext_dataset.split("/")[:-2]), - cache_subdir=mock_synthtext_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"SynthText(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[32, 128], 1, False], # Actual set has 2000 training samples and 3000 test samples - [[32, 128], 1, True], # recognition - ], -) -def test_iiit5k(input_size, num_samples, rotate, recognition, mock_iiit5k_dataset): - # monkeypatch the path to temporary dataset - datasets.IIIT5K.URL = mock_iiit5k_dataset - datasets.IIIT5K.SHA256 = None - - ds = datasets.IIIT5K( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_iiit5k_dataset.split("/")[:-2]), - cache_subdir=mock_iiit5k_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"IIIT5K(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size, batch_size=1) - else: - _validate_dataset(ds, input_size, batch_size=1, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 100 training samples and 249 test samples - [[32, 128], 3, True], # recognition - ], -) -def test_svt(input_size, num_samples, rotate, recognition, mock_svt_dataset): - # monkeypatch the path to temporary dataset - datasets.SVT.URL = mock_svt_dataset - datasets.SVT.SHA256 = None - - ds = datasets.SVT( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_svt_dataset.split("/")[:-2]), - cache_subdir=mock_svt_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"SVT(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 246 training samples and 249 test samples - [[32, 128], 3, True], # recognition - ], -) -def test_ic03(input_size, num_samples, rotate, recognition, mock_ic03_dataset): - # monkeypatch the path to temporary dataset - datasets.IC03.TRAIN = (mock_ic03_dataset, None, "ic03_train.zip") - - ds = datasets.IC03( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_ic03_dataset.split("/")[:-2]), - cache_subdir=mock_ic03_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"IC03(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 2, False], - [[32, 128], 5, True], - ], -) -def test_wildreceipt_dataset(input_size, num_samples, rotate, recognition, mock_wildreceipt_dataset): - ds = datasets.WILDRECEIPT( - *mock_wildreceipt_dataset, - train=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - ) - assert len(ds) == num_samples - assert repr(ds) == f"WILDRECEIPT(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -# NOTE: following datasets are only for recognition task - - -def test_mjsynth_dataset(mock_mjsynth_dataset): - input_size = (32, 128) - ds = datasets.MJSynth( - *mock_mjsynth_dataset, - img_transforms=Resize(input_size, preserve_aspect_ratio=True), - ) - - assert len(ds) == 4 # Actual set has 7581382 train and 1337891 test samples - assert repr(ds) == f"MJSynth(train={True})" - _validate_dataset_recognition_part(ds, input_size) - - -def test_iiithws_dataset(mock_iiithws_dataset): - input_size = (32, 128) - ds = datasets.IIITHWS( - *mock_iiithws_dataset, - img_transforms=Resize(input_size, preserve_aspect_ratio=True), - ) - - assert len(ds) == 4 # Actual set has 7141797 train and 793533 test samples - assert repr(ds) == f"IIITHWS(train={True})" - _validate_dataset_recognition_part(ds, input_size) diff --git a/tests/pytorch/test_file_utils_pt.py b/tests/pytorch/test_file_utils_pt.py deleted file mode 100644 index 7b36789..0000000 --- a/tests/pytorch/test_file_utils_pt.py +++ /dev/null @@ -1,5 +0,0 @@ -from doctr.file_utils import is_torch_available - - -def test_file_utils(): - assert is_torch_available() diff --git a/tests/pytorch/test_io_image_pt.py b/tests/pytorch/test_io_image_pt.py deleted file mode 100644 index ad8a44b..0000000 --- a/tests/pytorch/test_io_image_pt.py +++ /dev/null @@ -1,52 +0,0 @@ -import numpy as np -import pytest -import torch -from doctr.io import decode_img_as_tensor, read_img_as_tensor, tensor_from_numpy - - -def test_read_img_as_tensor(mock_image_path): - img = read_img_as_tensor(mock_image_path) - - assert isinstance(img, torch.Tensor) - assert img.dtype == torch.float32 - assert img.shape == (3, 900, 1200) - - img = read_img_as_tensor(mock_image_path, dtype=torch.float16) - assert img.dtype == torch.float16 - img = read_img_as_tensor(mock_image_path, dtype=torch.uint8) - assert img.dtype == torch.uint8 - - with pytest.raises(ValueError): - read_img_as_tensor(mock_image_path, dtype=torch.float64) - - -def test_decode_img_as_tensor(mock_image_stream): - img = decode_img_as_tensor(mock_image_stream) - - assert isinstance(img, torch.Tensor) - assert img.dtype == torch.float32 - assert img.shape == (3, 900, 1200) - - img = decode_img_as_tensor(mock_image_stream, dtype=torch.float16) - assert img.dtype == torch.float16 - img = decode_img_as_tensor(mock_image_stream, dtype=torch.uint8) - assert img.dtype == torch.uint8 - - with pytest.raises(ValueError): - decode_img_as_tensor(mock_image_stream, dtype=torch.float64) - - -def test_tensor_from_numpy(mock_image_stream): - with pytest.raises(ValueError): - tensor_from_numpy(np.zeros((256, 256, 3)), torch.int64) - - out = tensor_from_numpy(np.zeros((256, 256, 3), dtype=np.uint8)) - - assert isinstance(out, torch.Tensor) - assert out.dtype == torch.float32 - assert out.shape == (3, 256, 256) - - out = tensor_from_numpy(np.zeros((256, 256, 3), dtype=np.uint8), dtype=torch.float16) - assert out.dtype == torch.float16 - out = tensor_from_numpy(np.zeros((256, 256, 3), dtype=np.uint8), dtype=torch.uint8) - assert out.dtype == torch.uint8 diff --git a/tests/pytorch/test_models_classification_pt.py b/tests/pytorch/test_models_classification_pt.py deleted file mode 100644 index ca3e7e2..0000000 --- a/tests/pytorch/test_models_classification_pt.py +++ /dev/null @@ -1,194 +0,0 @@ -import os -import tempfile - -import cv2 -import numpy as np -import onnxruntime -import pytest -import torch -from doctr.models import classification -from doctr.models.classification.predictor import OrientationPredictor -from doctr.models.utils import export_model_to_onnx - - -def _test_classification(model, input_shape, output_size, batch_size=2): - # Forward - with torch.no_grad(): - out = model(torch.rand((batch_size, *input_shape), dtype=torch.float32)) - # Output checks - assert isinstance(out, torch.Tensor) - assert out.dtype == torch.float32 - assert out.numpy().shape == (batch_size, *output_size) - # Check FP16 - if torch.cuda.is_available(): - model = model.half().cuda() - with torch.no_grad(): - out = model(torch.rand((batch_size, *input_shape), dtype=torch.float16).cuda()) - assert out.dtype == torch.float16 - - -@pytest.mark.parametrize( - "arch_name, input_shape, output_size", - [ - ["vgg16_bn_r", (3, 32, 32), (126,)], - ["resnet18", (3, 32, 32), (126,)], - ["resnet31", (3, 32, 32), (126,)], - ["resnet34", (3, 32, 32), (126,)], - ["resnet34_wide", (3, 32, 32), (126,)], - ["resnet50", (3, 32, 32), (126,)], - ["magc_resnet31", (3, 32, 32), (126,)], - ["mobilenet_v3_small", (3, 32, 32), (126,)], - ["mobilenet_v3_large", (3, 32, 32), (126,)], - ["textnet_tiny", (3, 32, 32), (126,)], - ["textnet_small", (3, 32, 32), (126,)], - ["textnet_base", (3, 32, 32), (126,)], - ["vit_s", (3, 32, 32), (126,)], - ["vit_b", (3, 32, 32), (126,)], - # Check that the interpolation of positional embeddings for vit models works correctly - ["vit_s", (3, 64, 64), (126,)], - ], -) -def test_classification_architectures(arch_name, input_shape, output_size): - # Model - model = classification.__dict__[arch_name](pretrained=True).eval() - _test_classification(model, input_shape, output_size) - # Check that you can pretrained everything up until the last layer - classification.__dict__[arch_name](pretrained=True, num_classes=10) - - -@pytest.mark.parametrize( - "arch_name, input_shape", - [ - ["mobilenet_v3_small_crop_orientation", (3, 256, 256)], - ["mobilenet_v3_small_page_orientation", (3, 512, 512)], - ], -) -def test_classification_models(arch_name, input_shape): - batch_size = 8 - model = classification.__dict__[arch_name](pretrained=False, input_shape=input_shape).eval() - assert isinstance(model, torch.nn.Module) - input_tensor = torch.rand((batch_size, *input_shape)) - - if torch.cuda.is_available(): - model.cuda() - input_tensor = input_tensor.cuda() - out = model(input_tensor) - assert isinstance(out, torch.Tensor) - assert out.shape == (8, 4) - - -@pytest.mark.parametrize( - "arch_name", - [ - "mobilenet_v3_small_crop_orientation", - "mobilenet_v3_small_page_orientation", - ], -) -def test_classification_zoo(arch_name): - if "crop" in arch_name: - batch_size = 16 - input_tensor = torch.rand((batch_size, 3, 256, 256)) - # Model - predictor = classification.zoo.crop_orientation_predictor(arch_name, pretrained=False) - predictor.model.eval() - - with pytest.raises(ValueError): - predictor = classification.zoo.crop_orientation_predictor(arch="wrong_model", pretrained=False) - else: - batch_size = 2 - input_tensor = torch.rand((batch_size, 3, 512, 512)) - # Model - predictor = classification.zoo.page_orientation_predictor(arch_name, pretrained=False) - predictor.model.eval() - - with pytest.raises(ValueError): - predictor = classification.zoo.page_orientation_predictor(arch="wrong_model", pretrained=False) - # object check - assert isinstance(predictor, OrientationPredictor) - if torch.cuda.is_available(): - predictor.model.cuda() - input_tensor = input_tensor.cuda() - - with torch.no_grad(): - out = predictor(input_tensor) - out = predictor(input_tensor) - class_idxs, classes, confs = out[0], out[1], out[2] - assert isinstance(class_idxs, list) and len(class_idxs) == batch_size - assert isinstance(classes, list) and len(classes) == batch_size - assert isinstance(confs, list) and len(confs) == batch_size - assert all(isinstance(pred, int) for pred in class_idxs) - assert all(isinstance(pred, int) for pred in classes) and all(pred in [0, 90, 180, -90] for pred in classes) - assert all(isinstance(pred, float) for pred in confs) - - -def test_crop_orientation_model(mock_text_box): - text_box_0 = cv2.imread(mock_text_box) - # rotates counter-clockwise - text_box_270 = np.rot90(text_box_0, 1) - text_box_180 = np.rot90(text_box_0, 2) - text_box_90 = np.rot90(text_box_0, 3) - classifier = classification.crop_orientation_predictor("mobilenet_v3_small_crop_orientation", pretrained=True) - assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3] - # 270 degrees is equivalent to -90 degrees - assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90] - assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2]) - - -def test_page_orientation_model(mock_payslip): - text_box_0 = cv2.imread(mock_payslip) - # rotates counter-clockwise - text_box_270 = np.rot90(text_box_0, 1) - text_box_180 = np.rot90(text_box_0, 2) - text_box_90 = np.rot90(text_box_0, 3) - classifier = classification.crop_orientation_predictor("mobilenet_v3_small_page_orientation", pretrained=True) - assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3] - # 270 degrees is equivalent to -90 degrees - assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90] - assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2]) - - -@pytest.mark.parametrize( - "arch_name, input_shape, output_size", - [ - ["vgg16_bn_r", (3, 32, 32), (126,)], - ["resnet18", (3, 32, 32), (126,)], - ["resnet31", (3, 32, 32), (126,)], - ["resnet34", (3, 32, 32), (126,)], - ["resnet34_wide", (3, 32, 32), (126,)], - ["resnet50", (3, 32, 32), (126,)], - ["magc_resnet31", (3, 32, 32), (126,)], - ["mobilenet_v3_small", (3, 32, 32), (126,)], - ["mobilenet_v3_large", (3, 32, 32), (126,)], - ["mobilenet_v3_small_crop_orientation", (3, 256, 256), (4,)], - ["mobilenet_v3_small_page_orientation", (3, 512, 512), (4,)], - ["vit_s", (3, 32, 32), (126,)], - ["vit_b", (3, 32, 32), (126,)], - ["textnet_tiny", (3, 32, 32), (126,)], - ["textnet_small", (3, 32, 32), (126,)], - ["textnet_base", (3, 32, 32), (126,)], - ], -) -def test_models_onnx_export(arch_name, input_shape, output_size): - # Model - batch_size = 2 - model = classification.__dict__[arch_name](pretrained=True).eval() - dummy_input = torch.rand((batch_size, *input_shape), dtype=torch.float32) - pt_logits = model(dummy_input).detach().cpu().numpy() - with tempfile.TemporaryDirectory() as tmpdir: - # Export - model_path = export_model_to_onnx(model, model_name=os.path.join(tmpdir, "model"), dummy_input=dummy_input) - - assert os.path.exists(model_path) - # Inference - ort_session = onnxruntime.InferenceSession( - os.path.join(tmpdir, "model.onnx"), providers=["CPUExecutionProvider"] - ) - ort_outs = ort_session.run(["logits"], {"input": dummy_input.numpy()}) - - assert isinstance(ort_outs, list) and len(ort_outs) == 1 - assert ort_outs[0].shape == (batch_size, *output_size) - # Check that the output is close to the PyTorch output - only warn if not close - try: - assert np.allclose(pt_logits, ort_outs[0], atol=1e-4) - except AssertionError: - pytest.skip(f"Output of {arch_name}:\nMax element-wise difference: {np.max(np.abs(pt_logits - ort_outs[0]))}") diff --git a/tests/pytorch/test_models_detection_pt.py b/tests/pytorch/test_models_detection_pt.py deleted file mode 100644 index 26c7a63..0000000 --- a/tests/pytorch/test_models_detection_pt.py +++ /dev/null @@ -1,187 +0,0 @@ -import math -import os -import tempfile - -import numpy as np -import onnxruntime -import pytest -import torch -from doctr.file_utils import CLASS_NAME -from doctr.models import detection -from doctr.models.detection._utils import dilate, erode -from doctr.models.detection.fast.pytorch import reparameterize -from doctr.models.detection.predictor import DetectionPredictor -from doctr.models.utils import export_model_to_onnx - - -@pytest.mark.parametrize("train_mode", [True, False]) -@pytest.mark.parametrize( - "arch_name, input_shape, output_size, out_prob", - [ - ["db_resnet34", (3, 512, 512), (1, 512, 512), True], - ["db_resnet50", (3, 512, 512), (1, 512, 512), True], - ["db_mobilenet_v3_large", (3, 512, 512), (1, 512, 512), True], - ["linknet_resnet18", (3, 512, 512), (1, 512, 512), True], - ["linknet_resnet34", (3, 512, 512), (1, 512, 512), True], - ["linknet_resnet50", (3, 512, 512), (1, 512, 512), True], - ["fast_tiny", (3, 512, 512), (1, 512, 512), True], - ["fast_tiny_rep", (3, 512, 512), (1, 512, 512), True], # Reparameterized model - ["fast_small", (3, 512, 512), (1, 512, 512), True], - ["fast_base", (3, 512, 512), (1, 512, 512), True], - ], -) -def test_detection_models(arch_name, input_shape, output_size, out_prob, train_mode): - batch_size = 2 - if arch_name == "fast_tiny_rep": - model = reparameterize(detection.fast_tiny(pretrained=True).eval()) - train_mode = False # Reparameterized model is not trainable - else: - model = detection.__dict__[arch_name](pretrained=True) - model = model.train() if train_mode else model.eval() - assert isinstance(model, torch.nn.Module) - input_tensor = torch.rand((batch_size, *input_shape)) - target = [ - {CLASS_NAME: np.array([[0.5, 0.5, 1, 1], [0.5, 0.5, 0.8, 0.8]], dtype=np.float32)}, - {CLASS_NAME: np.array([[0.5, 0.5, 1, 1], [0.5, 0.5, 0.8, 0.9]], dtype=np.float32)}, - ] - if torch.cuda.is_available(): - model.cuda() - input_tensor = input_tensor.cuda() - out = model(input_tensor, target, return_model_output=True, return_preds=not train_mode) - assert isinstance(out, dict) - assert len(out) == 3 if not train_mode else len(out) == 2 - # Check proba map - assert out["out_map"].shape == (batch_size, *output_size) - assert out["out_map"].dtype == torch.float32 - if out_prob: - assert torch.all((out["out_map"] >= 0) & (out["out_map"] <= 1)) - # Check boxes - if not train_mode: - for boxes_dict in out["preds"]: - for boxes in boxes_dict.values(): - assert boxes.shape[1] == 5 - assert np.all(boxes[:, :2] < boxes[:, 2:4]) - assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1) - # Check loss - assert isinstance(out["loss"], torch.Tensor) - # Check the rotated case (same targets) - target = [ - { - CLASS_NAME: np.array( - [[[0.5, 0.5], [1, 0.5], [1, 1], [0.5, 1]], [[0.5, 0.5], [0.8, 0.5], [0.8, 0.8], [0.5, 0.8]]], - dtype=np.float32, - ) - }, - { - CLASS_NAME: np.array( - [[[0.5, 0.5], [1, 0.5], [1, 1], [0.5, 1]], [[0.5, 0.5], [0.8, 0.5], [0.8, 0.9], [0.5, 0.9]]], - dtype=np.float32, - ) - }, - ] - loss = model(input_tensor, target)["loss"] - assert isinstance(loss, torch.Tensor) and ((loss - out["loss"]).abs() / loss).item() < 1 - - -@pytest.mark.parametrize( - "arch_name", - [ - "db_resnet34", - "db_resnet50", - "db_mobilenet_v3_large", - "linknet_resnet18", - "fast_tiny", - ], -) -def test_detection_zoo(arch_name): - # Model - predictor = detection.zoo.detection_predictor(arch_name, pretrained=False) - predictor.model.eval() - # object check - assert isinstance(predictor, DetectionPredictor) - input_tensor = torch.rand((2, 3, 1024, 1024)) - if torch.cuda.is_available(): - predictor.model.cuda() - input_tensor = input_tensor.cuda() - - with torch.no_grad(): - out, seq_maps = predictor(input_tensor, return_maps=True) - assert all(isinstance(boxes, dict) for boxes in out) - assert all(isinstance(boxes[CLASS_NAME], np.ndarray) and boxes[CLASS_NAME].shape[1] == 5 for boxes in out) - assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps) - assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps) - # check that all values in the seq_maps are between 0 and 1 - assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps) - - -def test_fast_reparameterization(): - dummy_input = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) - base_model = detection.fast_tiny(pretrained=True, exportable=True).eval() - base_model_params = sum(p.numel() for p in base_model.parameters()) - assert math.isclose(base_model_params, 13535296) # base model params - base_out = base_model(dummy_input)["logits"] - rep_model = reparameterize(base_model) - rep_model_params = sum(p.numel() for p in rep_model.parameters()) - assert math.isclose(rep_model_params, 8521920) # reparameterized model params - rep_out = rep_model(dummy_input)["logits"] - diff = base_out - rep_out - assert diff.mean() < 5e-2 - - -def test_erode(): - x = torch.zeros((1, 1, 3, 3)) - x[..., 1, 1] = 1 - expected = torch.zeros((1, 1, 3, 3)) - out = erode(x, 3) - assert torch.equal(out, expected) - - -def test_dilate(): - x = torch.zeros((1, 1, 3, 3)) - x[..., 1, 1] = 1 - expected = torch.ones((1, 1, 3, 3)) - out = dilate(x, 3) - assert torch.equal(out, expected) - - -@pytest.mark.parametrize( - "arch_name, input_shape, output_size", - [ - ["db_resnet34", (3, 512, 512), (1, 512, 512)], - ["db_resnet50", (3, 512, 512), (1, 512, 512)], - ["db_mobilenet_v3_large", (3, 512, 512), (1, 512, 512)], - ["linknet_resnet18", (3, 512, 512), (1, 512, 512)], - ["linknet_resnet34", (3, 512, 512), (1, 512, 512)], - ["linknet_resnet50", (3, 512, 512), (1, 512, 512)], - ["fast_tiny", (3, 512, 512), (1, 512, 512)], - ["fast_small", (3, 512, 512), (1, 512, 512)], - ["fast_base", (3, 512, 512), (1, 512, 512)], - ["fast_tiny_rep", (3, 512, 512), (1, 512, 512)], # Reparameterized model - ], -) -def test_models_onnx_export(arch_name, input_shape, output_size): - # Model - batch_size = 2 - if arch_name == "fast_tiny_rep": - model = reparameterize(detection.fast_tiny(pretrained=True, exportable=True).eval()) - else: - model = detection.__dict__[arch_name](pretrained=True, exportable=True).eval() - dummy_input = torch.rand((batch_size, *input_shape), dtype=torch.float32) - pt_logits = model(dummy_input)["logits"].detach().cpu().numpy() - with tempfile.TemporaryDirectory() as tmpdir: - # Export - model_path = export_model_to_onnx(model, model_name=os.path.join(tmpdir, "model"), dummy_input=dummy_input) - assert os.path.exists(model_path) - # Inference - ort_session = onnxruntime.InferenceSession( - os.path.join(tmpdir, "model.onnx"), providers=["CPUExecutionProvider"] - ) - ort_outs = ort_session.run(["logits"], {"input": dummy_input.numpy()}) - - assert isinstance(ort_outs, list) and len(ort_outs) == 1 - assert ort_outs[0].shape == (batch_size, *output_size) - # Check that the output is close to the PyTorch output - only warn if not close - try: - assert np.allclose(pt_logits, ort_outs[0], atol=1e-4) - except AssertionError: - pytest.skip(f"Output of {arch_name}:\nMax element-wise difference: {np.max(np.abs(pt_logits - ort_outs[0]))}") diff --git a/tests/pytorch/test_models_factory.py b/tests/pytorch/test_models_factory.py deleted file mode 100644 index 5c75582..0000000 --- a/tests/pytorch/test_models_factory.py +++ /dev/null @@ -1,69 +0,0 @@ -import json -import os -import tempfile - -import pytest -from doctr import models -from doctr.models.factory import _save_model_and_config_for_hf_hub, from_hub, push_to_hf_hub - - -def test_push_to_hf_hub(): - model = models.classification.resnet18(pretrained=False) - with pytest.raises(ValueError): - # run_config and/or arch must be specified - push_to_hf_hub(model, model_name="test", task="classification") - with pytest.raises(ValueError): - # task must be one of classification, detection, recognition, obj_detection - push_to_hf_hub(model, model_name="test", task="invalid_task", arch="mobilenet_v3_small") - with pytest.raises(ValueError): - # arch not in available architectures for task - push_to_hf_hub(model, model_name="test", task="detection", arch="crnn_mobilenet_v3_large") - - -@pytest.mark.parametrize( - "arch_name, task_name, dummy_model_id", - [ - ["vgg16_bn_r", "classification", "Felix92/doctr-dummy-torch-vgg16-bn-r"], - ["resnet18", "classification", "Felix92/doctr-dummy-torch-resnet18"], - ["resnet31", "classification", "Felix92/doctr-dummy-torch-resnet31"], - ["resnet34", "classification", "Felix92/doctr-dummy-torch-resnet34"], - ["resnet34_wide", "classification", "Felix92/doctr-dummy-torch-resnet34-wide"], - ["resnet50", "classification", "Felix92/doctr-dummy-torch-resnet50"], - ["magc_resnet31", "classification", "Felix92/doctr-dummy-torch-magc-resnet31"], - ["mobilenet_v3_small", "classification", "Felix92/doctr-dummy-torch-mobilenet-v3-small"], - ["mobilenet_v3_large", "classification", "Felix92/doctr-dummy-torch-mobilenet-v3-large"], - ["vit_s", "classification", "Felix92/doctr-dummy-torch-vit-s"], - ["textnet_tiny", "classification", "Felix92/doctr-dummy-torch-textnet-tiny"], - ["db_resnet34", "detection", "Felix92/doctr-dummy-torch-db-resnet34"], - ["db_resnet50", "detection", "Felix92/doctr-dummy-torch-db-resnet50"], - ["db_mobilenet_v3_large", "detection", "Felix92/doctr-dummy-torch-db-mobilenet-v3-large"], - ["linknet_resnet18", "detection", "Felix92/doctr-dummy-torch-linknet-resnet18"], - ["linknet_resnet34", "detection", "Felix92/doctr-dummy-torch-linknet-resnet34"], - ["linknet_resnet50", "detection", "Felix92/doctr-dummy-torch-linknet-resnet50"], - ["crnn_vgg16_bn", "recognition", "Felix92/doctr-dummy-torch-crnn-vgg16-bn"], - ["crnn_mobilenet_v3_small", "recognition", "Felix92/doctr-dummy-torch-crnn-mobilenet-v3-small"], - ["crnn_mobilenet_v3_large", "recognition", "Felix92/doctr-dummy-torch-crnn-mobilenet-v3-large"], - ["sar_resnet31", "recognition", "Felix92/doctr-dummy-torch-sar-resnet31"], - ["master", "recognition", "Felix92/doctr-dummy-torch-master"], - ["vitstr_small", "recognition", "Felix92/doctr-dummy-torch-vitstr-small"], - ["parseq", "recognition", "Felix92/doctr-dummy-torch-parseq"], - ], -) -def test_models_huggingface_hub(arch_name, task_name, dummy_model_id, tmpdir): - with tempfile.TemporaryDirectory() as tmp_dir: - model = models.__dict__[task_name].__dict__[arch_name](pretrained=True).eval() - - _save_model_and_config_for_hf_hub(model, arch=arch_name, task=task_name, save_dir=tmp_dir) - - assert hasattr(model, "cfg") - assert len(os.listdir(tmp_dir)) == 2 - assert os.path.exists(tmp_dir + "/pytorch_model.bin") - assert os.path.exists(tmp_dir + "/config.json") - tmp_config = json.load(open(tmp_dir + "/config.json")) - assert arch_name == tmp_config["arch"] - assert task_name == tmp_config["task"] - assert all(key in model.cfg.keys() for key in tmp_config.keys()) - - # test from hub - hub_model = from_hub(repo_id=dummy_model_id) - assert isinstance(hub_model, type(model)) diff --git a/tests/pytorch/test_models_preprocessor_pt.py b/tests/pytorch/test_models_preprocessor_pt.py deleted file mode 100644 index e3e2983..0000000 --- a/tests/pytorch/test_models_preprocessor_pt.py +++ /dev/null @@ -1,46 +0,0 @@ -import numpy as np -import pytest -import torch -from doctr.models.preprocessor import PreProcessor - - -@pytest.mark.parametrize( - "batch_size, output_size, input_tensor, expected_batches, expected_value", - [ - [2, (128, 128), np.full((3, 256, 128, 3), 255, dtype=np.uint8), 1, 0.5], # numpy uint8 - [2, (128, 128), np.ones((3, 256, 128, 3), dtype=np.float32), 1, 0.5], # numpy fp32 - [2, (128, 128), torch.full((3, 3, 256, 128), 255, dtype=torch.uint8), 1, 0.5], # torch uint8 - [2, (128, 128), torch.ones((3, 3, 256, 128), dtype=torch.float32), 1, 0.5], # torch fp32 - [2, (128, 128), torch.ones((3, 3, 256, 128), dtype=torch.float16), 1, 0.5], # torch fp16 - [2, (128, 128), [np.full((256, 128, 3), 255, dtype=np.uint8)] * 3, 2, 0.5], # list of numpy uint8 - [2, (128, 128), [np.ones((256, 128, 3), dtype=np.float32)] * 3, 2, 0.5], # list of numpy fp32 - [2, (128, 128), [torch.full((3, 256, 128), 255, dtype=torch.uint8)] * 3, 2, 0.5], # list of torch uint8 - [2, (128, 128), [torch.ones((3, 256, 128), dtype=torch.float32)] * 3, 2, 0.5], # list of torch fp32 - [2, (128, 128), [torch.ones((3, 256, 128), dtype=torch.float16)] * 3, 2, 0.5], # list of torch fp32 - ], -) -def test_preprocessor(batch_size, output_size, input_tensor, expected_batches, expected_value): - processor = PreProcessor(output_size, batch_size) - - # Invalid input type - with pytest.raises(TypeError): - processor(42) - # 4D check - with pytest.raises(AssertionError): - processor(np.full((256, 128, 3), 255, dtype=np.uint8)) - with pytest.raises(TypeError): - processor(np.full((1, 256, 128, 3), 255, dtype=np.int32)) - # 3D check - with pytest.raises(AssertionError): - processor([np.full((3, 256, 128, 3), 255, dtype=np.uint8)]) - with pytest.raises(TypeError): - processor([np.full((256, 128, 3), 255, dtype=np.int32)]) - - with torch.no_grad(): - out = processor(input_tensor) - assert isinstance(out, list) and len(out) == expected_batches - assert all(isinstance(b, torch.Tensor) for b in out) - assert all(b.dtype == torch.float32 for b in out) - assert all(b.shape[-2:] == output_size for b in out) - assert all(torch.all(b == expected_value) for b in out) - assert len(repr(processor).split("\n")) == 4 diff --git a/tests/pytorch/test_models_recognition_pt.py b/tests/pytorch/test_models_recognition_pt.py deleted file mode 100644 index 64a0d70..0000000 --- a/tests/pytorch/test_models_recognition_pt.py +++ /dev/null @@ -1,155 +0,0 @@ -import os -import tempfile - -import numpy as np -import onnxruntime -import psutil -import pytest -import torch -from doctr.models import recognition -from doctr.models.recognition.crnn.pytorch import CTCPostProcessor -from doctr.models.recognition.master.pytorch import MASTERPostProcessor -from doctr.models.recognition.parseq.pytorch import PARSeqPostProcessor -from doctr.models.recognition.predictor import RecognitionPredictor -from doctr.models.recognition.sar.pytorch import SARPostProcessor -from doctr.models.recognition.vitstr.pytorch import ViTSTRPostProcessor -from doctr.models.utils import export_model_to_onnx - -system_available_memory = int(psutil.virtual_memory().available / 1024**3) - - -@pytest.mark.parametrize("train_mode", [True, False]) -@pytest.mark.parametrize( - "arch_name, input_shape", - [ - ["crnn_vgg16_bn", (3, 32, 128)], - ["crnn_mobilenet_v3_small", (3, 32, 128)], - ["crnn_mobilenet_v3_large", (3, 32, 128)], - ["sar_resnet31", (3, 32, 128)], - ["master", (3, 32, 128)], - ["vitstr_small", (3, 32, 128)], - ["vitstr_base", (3, 32, 128)], - ["parseq", (3, 32, 128)], - ], -) -def test_recognition_models(arch_name, input_shape, train_mode, mock_vocab): - batch_size = 4 - model = recognition.__dict__[arch_name](vocab=mock_vocab, pretrained=True, input_shape=input_shape) - model = model.train() if train_mode else model.eval() - assert isinstance(model, torch.nn.Module) - input_tensor = torch.rand((batch_size, *input_shape)) - target = ["i", "am", "a", "jedi"] - - if torch.cuda.is_available(): - model.cuda() - input_tensor = input_tensor.cuda() - out = model(input_tensor, target, return_model_output=True, return_preds=not train_mode) - assert isinstance(out, dict) - assert len(out) == 3 if not train_mode else len(out) == 2 - if not train_mode: - assert isinstance(out["preds"], list) - assert len(out["preds"]) == batch_size - assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"]) - assert isinstance(out["out_map"], torch.Tensor) - assert out["out_map"].dtype == torch.float32 - assert isinstance(out["loss"], torch.Tensor) - # test model in train mode needs targets - with pytest.raises(ValueError): - model.train() - model(input_tensor, None) - - -@pytest.mark.parametrize( - "post_processor, input_shape", - [ - [CTCPostProcessor, [2, 119, 30]], - [SARPostProcessor, [2, 119, 30]], - [ViTSTRPostProcessor, [2, 119, 30]], - [MASTERPostProcessor, [2, 119, 30]], - [PARSeqPostProcessor, [2, 119, 30]], - ], -) -def test_reco_postprocessors(post_processor, input_shape, mock_vocab): - processor = post_processor(mock_vocab) - decoded = processor(torch.rand(*input_shape)) - assert isinstance(decoded, list) - assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in decoded) - assert len(decoded) == input_shape[0] - assert all(char in mock_vocab for word, _ in decoded for char in word) - # Repr - assert repr(processor) == f"{post_processor.__name__}(vocab_size={len(mock_vocab)})" - - -@pytest.mark.parametrize( - "arch_name", - [ - "crnn_vgg16_bn", - "crnn_mobilenet_v3_small", - "crnn_mobilenet_v3_large", - "sar_resnet31", - "master", - "vitstr_small", - "vitstr_base", - "parseq", - ], -) -def test_recognition_zoo(arch_name): - batch_size = 2 - # Model - predictor = recognition.zoo.recognition_predictor(arch_name, pretrained=False) - predictor.model.eval() - # object check - assert isinstance(predictor, RecognitionPredictor) - input_tensor = torch.rand((batch_size, 3, 128, 128)) - if torch.cuda.is_available(): - predictor.model.cuda() - input_tensor = input_tensor.cuda() - - with torch.no_grad(): - out = predictor(input_tensor) - out = predictor(input_tensor) - assert isinstance(out, list) and len(out) == batch_size - assert all(isinstance(word, str) and isinstance(conf, float) for word, conf in out) - - -@pytest.mark.parametrize( - "arch_name, input_shape", - [ - ["crnn_vgg16_bn", (3, 32, 128)], - ["crnn_mobilenet_v3_small", (3, 32, 128)], - ["crnn_mobilenet_v3_large", (3, 32, 128)], - pytest.param( - "sar_resnet31", - (3, 32, 128), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "master", (3, 32, 128), marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory") - ), - ["vitstr_small", (3, 32, 128)], # testing one vitstr version is enough - ["parseq", (3, 32, 128)], - ], -) -def test_models_onnx_export(arch_name, input_shape): - # Model - batch_size = 2 - model = recognition.__dict__[arch_name](pretrained=True, exportable=True).eval() - dummy_input = torch.rand((batch_size, *input_shape), dtype=torch.float32) - pt_logits = model(dummy_input)["logits"].detach().cpu().numpy() - with tempfile.TemporaryDirectory() as tmpdir: - # Export - model_path = export_model_to_onnx(model, model_name=os.path.join(tmpdir, "model"), dummy_input=dummy_input) - assert os.path.exists(model_path) - # Inference - ort_session = onnxruntime.InferenceSession( - os.path.join(tmpdir, "model.onnx"), providers=["CPUExecutionProvider"] - ) - ort_outs = ort_session.run(["logits"], {"input": dummy_input.numpy()}) - - assert isinstance(ort_outs, list) and len(ort_outs) == 1 - assert ort_outs[0].shape == pt_logits.shape - # Check that the output is close to the PyTorch output - only warn if not close - try: - assert np.allclose(pt_logits, ort_outs[0], atol=1e-4) - except AssertionError: - pytest.skip(f"Output of {arch_name}:\nMax element-wise difference: {np.max(np.abs(pt_logits - ort_outs[0]))}") diff --git a/tests/pytorch/test_models_utils_pt.py b/tests/pytorch/test_models_utils_pt.py deleted file mode 100644 index 122978a..0000000 --- a/tests/pytorch/test_models_utils_pt.py +++ /dev/null @@ -1,65 +0,0 @@ -import os - -import pytest -import torch -from doctr.models.utils import ( - _bf16_to_float32, - _copy_tensor, - conv_sequence_pt, - load_pretrained_params, - set_device_and_dtype, -) -from torch import nn - - -def test_copy_tensor(): - x = torch.rand(8) - m = _copy_tensor(x) - assert m.device == x.device and m.dtype == x.dtype and m.shape == x.shape and torch.allclose(m, x) - - -def test_bf16_to_float32(): - x = torch.randn([2, 2], dtype=torch.bfloat16) - converted_x = _bf16_to_float32(x) - assert x.dtype == torch.bfloat16 and converted_x.dtype == torch.float32 and torch.equal(converted_x, x.float()) - - -def test_load_pretrained_params(tmpdir_factory): - model = nn.Sequential(nn.Linear(8, 8), nn.ReLU(), nn.Linear(8, 4)) - # Retrieve this URL - url = "https://github.com/mindee/doctr/releases/download/v0.2.1/tmp_checkpoint-6f0ce0e6.pt" - # Temp cache dir - cache_dir = tmpdir_factory.mktemp("cache") - # Pass an incorrect hash - with pytest.raises(ValueError): - load_pretrained_params(model, url, "mywronghash", cache_dir=str(cache_dir)) - # Let it resolve the hash from the file name - load_pretrained_params(model, url, cache_dir=str(cache_dir)) - # Check that the file was downloaded & the archive extracted - assert os.path.exists(cache_dir.join("models").join(url.rpartition("/")[-1].split("&")[0])) - # Check ignore keys - load_pretrained_params(model, url, cache_dir=str(cache_dir), ignore_keys=["2.weight"]) - # non matching keys - model = nn.Sequential(nn.Linear(8, 8), nn.ReLU(), nn.Linear(8, 4), nn.ReLU(), nn.Linear(4, 1)) - with pytest.raises(ValueError): - load_pretrained_params(model, url, cache_dir=str(cache_dir), ignore_keys=["2.weight"]) - - -def test_conv_sequence(): - assert len(conv_sequence_pt(3, 8, kernel_size=3)) == 1 - assert len(conv_sequence_pt(3, 8, True, kernel_size=3)) == 2 - assert len(conv_sequence_pt(3, 8, False, True, kernel_size=3)) == 2 - assert len(conv_sequence_pt(3, 8, True, True, kernel_size=3)) == 3 - - -def test_set_device_and_dtype(): - model = nn.Sequential(nn.Linear(8, 8), nn.ReLU(), nn.Linear(8, 4)) - batches = [torch.rand(8) for _ in range(2)] - model, batches = set_device_and_dtype(model, batches, device="cpu", dtype=torch.float32) - assert model[0].weight.device == torch.device("cpu") - assert model[0].weight.dtype == torch.float32 - assert batches[0].device == torch.device("cpu") - assert batches[0].dtype == torch.float32 - model, batches = set_device_and_dtype(model, batches, device="cpu", dtype=torch.float16) - assert model[0].weight.dtype == torch.float16 - assert batches[0].dtype == torch.float16 diff --git a/tests/pytorch/test_models_zoo_pt.py b/tests/pytorch/test_models_zoo_pt.py deleted file mode 100644 index 2ff3cf4..0000000 --- a/tests/pytorch/test_models_zoo_pt.py +++ /dev/null @@ -1,327 +0,0 @@ -import numpy as np -import pytest -from doctr import models -from doctr.file_utils import CLASS_NAME -from doctr.io import Document, DocumentFile -from doctr.io.elements import KIEDocument -from doctr.models import detection, recognition -from doctr.models.detection.predictor import DetectionPredictor -from doctr.models.detection.zoo import detection_predictor -from doctr.models.kie_predictor import KIEPredictor -from doctr.models.predictor import OCRPredictor -from doctr.models.preprocessor import PreProcessor -from doctr.models.recognition.predictor import RecognitionPredictor -from doctr.models.recognition.zoo import recognition_predictor -from torch import nn - - -# Create a dummy callback -class _DummyCallback: - def __call__(self, loc_preds): - return loc_preds - - -@pytest.mark.parametrize( - "assume_straight_pages, straighten_pages", - [ - [True, False], - [False, False], - [True, True], - ], -) -def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages): - det_bsize = 4 - det_predictor = DetectionPredictor( - PreProcessor(output_size=(512, 512), batch_size=det_bsize), - detection.db_mobilenet_v3_large( - pretrained=False, - pretrained_backbone=False, - assume_straight_pages=assume_straight_pages, - ), - ) - - assert not det_predictor.model.training - - reco_bsize = 32 - reco_predictor = RecognitionPredictor( - PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True), - recognition.crnn_vgg16_bn(pretrained=False, pretrained_backbone=False, vocab=mock_vocab), - ) - - assert not reco_predictor.model.training - - doc = DocumentFile.from_pdf(mock_pdf) - - predictor = OCRPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=assume_straight_pages, - straighten_pages=straighten_pages, - detect_orientation=True, - detect_language=True, - ) - - if assume_straight_pages: - assert predictor.crop_orientation_predictor is None - else: - assert isinstance(predictor.crop_orientation_predictor, nn.Module) - - out = predictor(doc) - assert isinstance(out, Document) - assert len(out.pages) == 2 - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - orientation = 0 - assert out.pages[0].orientation["value"] == orientation - - -def test_trained_ocr_predictor(mock_payslip): - doc = DocumentFile.from_images(mock_payslip) - - det_predictor = detection_predictor( - "db_resnet50", - pretrained=True, - batch_size=2, - assume_straight_pages=True, - symmetric_pad=True, - preserve_aspect_ratio=False, - ) - reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128) - - predictor = OCRPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=True, - straighten_pages=True, - preserve_aspect_ratio=False, - ) - - out = predictor(doc) - - assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr." - geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]]) - assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05) - - assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised" - geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]]) - assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05) - - det_predictor = detection_predictor( - "db_resnet50", - pretrained=True, - batch_size=2, - assume_straight_pages=True, - preserve_aspect_ratio=True, - symmetric_pad=True, - ) - - predictor = OCRPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=True, - straighten_pages=True, - preserve_aspect_ratio=True, - symmetric_pad=True, - ) - # test hooks - predictor.add_hook(_DummyCallback()) - - out = predictor(doc) - - assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr." - - -@pytest.mark.parametrize( - "assume_straight_pages, straighten_pages", - [ - [True, False], - [False, False], - [True, True], - ], -) -def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages): - det_bsize = 4 - det_predictor = DetectionPredictor( - PreProcessor(output_size=(512, 512), batch_size=det_bsize), - detection.db_mobilenet_v3_large( - pretrained=False, - pretrained_backbone=False, - assume_straight_pages=assume_straight_pages, - ), - ) - - assert not det_predictor.model.training - - reco_bsize = 32 - reco_predictor = RecognitionPredictor( - PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True), - recognition.crnn_vgg16_bn(pretrained=False, pretrained_backbone=False, vocab=mock_vocab), - ) - - assert not reco_predictor.model.training - - doc = DocumentFile.from_pdf(mock_pdf) - - predictor = KIEPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=assume_straight_pages, - straighten_pages=straighten_pages, - detect_orientation=True, - detect_language=True, - ) - - if assume_straight_pages: - assert predictor.crop_orientation_predictor is None - else: - assert isinstance(predictor.crop_orientation_predictor, nn.Module) - - out = predictor(doc) - assert isinstance(out, Document) - assert len(out.pages) == 2 - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - orientation = 0 - assert out.pages[0].orientation["value"] == orientation - - -def test_trained_kie_predictor(mock_payslip): - doc = DocumentFile.from_images(mock_payslip) - - det_predictor = detection_predictor( - "db_resnet50", - pretrained=True, - batch_size=2, - assume_straight_pages=True, - symmetric_pad=True, - preserve_aspect_ratio=False, - ) - reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128) - - predictor = KIEPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=True, - straighten_pages=True, - preserve_aspect_ratio=False, - ) - # test hooks - predictor.add_hook(_DummyCallback()) - - out = predictor(doc) - - assert isinstance(out, KIEDocument) - assert out.pages[0].predictions[CLASS_NAME][0].value == "Mr." - geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]]) - assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr, rtol=0.05) - - assert out.pages[0].predictions[CLASS_NAME][4].value == "revised" - geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]]) - assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][4].geometry), geometry_revised, rtol=0.05) - - det_predictor = detection_predictor( - "db_resnet50", - pretrained=True, - batch_size=2, - assume_straight_pages=True, - preserve_aspect_ratio=True, - symmetric_pad=True, - ) - - predictor = KIEPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=True, - straighten_pages=True, - preserve_aspect_ratio=True, - symmetric_pad=True, - ) - - out = predictor(doc) - - assert isinstance(out, KIEDocument) - assert out.pages[0].predictions[CLASS_NAME][0].value == "Mr." - - -def _test_predictor(predictor): - # Output checks - assert isinstance(predictor, OCRPredictor) - - doc = [np.zeros((512, 512, 3), dtype=np.uint8)] - out = predictor(doc) - # Document - assert isinstance(out, Document) - - # The input doc has 1 page - assert len(out.pages) == 1 - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - -def _test_kiepredictor(predictor): - # Output checks - assert isinstance(predictor, KIEPredictor) - - doc = [np.zeros((512, 512, 3), dtype=np.uint8)] - out = predictor(doc) - # Document - assert isinstance(out, KIEDocument) - - # The input doc has 1 page - assert len(out.pages) == 1 - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - -@pytest.mark.parametrize( - "det_arch, reco_arch", - [ - ["db_mobilenet_v3_large", "crnn_mobilenet_v3_large"], - ], -) -def test_zoo_models(det_arch, reco_arch): - # Model - predictor = models.ocr_predictor(det_arch, reco_arch, pretrained=True) - _test_predictor(predictor) - - # passing model instance directly - det_model = detection.__dict__[det_arch](pretrained=True) - reco_model = recognition.__dict__[reco_arch](pretrained=True) - predictor = models.ocr_predictor(det_model, reco_model) - _test_predictor(predictor) - - # passing recognition model as detection model - with pytest.raises(ValueError): - models.ocr_predictor(det_arch=reco_model, pretrained=True) - - # passing detection model as recognition model - with pytest.raises(ValueError): - models.ocr_predictor(reco_arch=det_model, pretrained=True) - - # KIE predictor - predictor = models.kie_predictor(det_arch, reco_arch, pretrained=True) - _test_kiepredictor(predictor) - - # passing model instance directly - det_model = detection.__dict__[det_arch](pretrained=True) - reco_model = recognition.__dict__[reco_arch](pretrained=True) - predictor = models.kie_predictor(det_model, reco_model) - _test_kiepredictor(predictor) - - # passing recognition model as detection model - with pytest.raises(ValueError): - models.kie_predictor(det_arch=reco_model, pretrained=True) - - # passing detection model as recognition model - with pytest.raises(ValueError): - models.kie_predictor(reco_arch=det_model, pretrained=True) diff --git a/tests/pytorch/test_transforms_pt.py b/tests/pytorch/test_transforms_pt.py deleted file mode 100644 index 76bc84e..0000000 --- a/tests/pytorch/test_transforms_pt.py +++ /dev/null @@ -1,351 +0,0 @@ -import math - -import numpy as np -import pytest -import torch -from doctr.transforms import ( - ChannelShuffle, - ColorInversion, - GaussianNoise, - RandomCrop, - RandomHorizontalFlip, - RandomResize, - RandomRotate, - RandomShadow, - Resize, -) -from doctr.transforms.functional import crop_detection, rotate_sample - - -def test_resize(): - output_size = (32, 32) - transfo = Resize(output_size) - input_t = torch.ones((3, 64, 64), dtype=torch.float32) - out = transfo(input_t) - - assert torch.all(out == 1) - assert out.shape[-2:] == output_size - assert repr(transfo) == f"Resize(output_size={output_size}, interpolation='bilinear')" - - transfo = Resize(output_size, preserve_aspect_ratio=True) - input_t = torch.ones((3, 32, 64), dtype=torch.float32) - out = transfo(input_t) - - assert out.shape[-2:] == output_size - assert not torch.all(out == 1) - # Asymetric padding - assert torch.all(out[:, -1] == 0) and torch.all(out[:, 0] == 1) - - # Symetric padding - transfo = Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True) - assert repr(transfo) == ( - f"Resize(output_size={output_size}, interpolation='bilinear', " - f"preserve_aspect_ratio=True, symmetric_pad=True)" - ) - out = transfo(input_t) - assert out.shape[-2:] == output_size - # symetric padding - assert torch.all(out[:, -1] == 0) and torch.all(out[:, 0] == 0) - - # Inverse aspect ratio - input_t = torch.ones((3, 64, 32), dtype=torch.float32) - out = transfo(input_t) - - assert not torch.all(out == 1) - assert out.shape[-2:] == output_size - - # Same aspect ratio - output_size = (32, 128) - transfo = Resize(output_size, preserve_aspect_ratio=True) - out = transfo(torch.ones((3, 16, 64), dtype=torch.float32)) - assert out.shape[-2:] == output_size - - # FP16 - input_t = torch.ones((3, 64, 64), dtype=torch.float16) - out = transfo(input_t) - assert out.dtype == torch.float16 - - -@pytest.mark.parametrize( - "rgb_min", - [ - 0.2, - 0.4, - 0.6, - ], -) -def test_invert_colorize(rgb_min): - transfo = ColorInversion(min_val=rgb_min) - input_t = torch.ones((8, 3, 32, 32), dtype=torch.float32) - out = transfo(input_t) - assert torch.all(out <= 1 - rgb_min + 1e-4) - assert torch.all(out >= 0) - - input_t = torch.full((8, 3, 32, 32), 255, dtype=torch.uint8) - out = transfo(input_t) - assert torch.all(out <= int(math.ceil(255 * (1 - rgb_min + 1e-4)))) - assert torch.all(out >= 0) - - # FP16 - input_t = torch.ones((8, 3, 32, 32), dtype=torch.float16) - out = transfo(input_t) - assert out.dtype == torch.float16 - - -def test_rotate_sample(): - img = torch.ones((3, 200, 100), dtype=torch.float32) - boxes = np.array([0, 0, 100, 200])[None, ...] - polys = np.stack((boxes[..., [0, 1]], boxes[..., [2, 1]], boxes[..., [2, 3]], boxes[..., [0, 3]]), axis=1) - rel_boxes = np.array([0, 0, 1, 1], dtype=np.float32)[None, ...] - rel_polys = np.stack( - (rel_boxes[..., [0, 1]], rel_boxes[..., [2, 1]], rel_boxes[..., [2, 3]], rel_boxes[..., [0, 3]]), axis=1 - ) - - # No angle - rotated_img, rotated_geoms = rotate_sample(img, boxes, 0, False) - assert torch.all(rotated_img == img) and np.all(rotated_geoms == rel_polys) - rotated_img, rotated_geoms = rotate_sample(img, boxes, 0, True) - assert torch.all(rotated_img == img) and np.all(rotated_geoms == rel_polys) - rotated_img, rotated_geoms = rotate_sample(img, polys, 0, False) - assert torch.all(rotated_img == img) and np.all(rotated_geoms == rel_polys) - rotated_img, rotated_geoms = rotate_sample(img, polys, 0, True) - assert torch.all(rotated_img == img) and np.all(rotated_geoms == rel_polys) - - # No expansion - expected_img = torch.zeros((3, 200, 100), dtype=torch.float32) - expected_img[:, 50:150] = 1 - expected_polys = np.array([[0, 0.75], [0, 0.25], [1, 0.25], [1, 0.75]])[None, ...] - rotated_img, rotated_geoms = rotate_sample(img, boxes, 90, False) - assert torch.all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, polys, 90, False) - assert torch.all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, rel_boxes, 90, False) - assert torch.all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, rel_polys, 90, False) - assert torch.all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - - # Expansion - expected_img = torch.ones((3, 100, 200), dtype=torch.float32) - expected_polys = np.array([[0, 1], [0, 0], [1, 0], [1, 1]], dtype=np.float32)[None, ...] - rotated_img, rotated_geoms = rotate_sample(img, boxes, 90, True) - assert torch.all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, polys, 90, True) - assert torch.all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, rel_boxes, 90, True) - assert torch.all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, rel_polys, 90, True) - assert torch.all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - - with pytest.raises(AssertionError): - rotate_sample(img, boxes[None, ...], 90, False) - - -def test_random_rotate(): - rotator = RandomRotate(max_angle=10.0, expand=False) - input_t = torch.ones((3, 50, 50), dtype=torch.float32) - boxes = np.array([[15, 20, 35, 30]]) - r_img, _r_boxes = rotator(input_t, boxes) - assert r_img.shape == input_t.shape - - rotator = RandomRotate(max_angle=10.0, expand=True) - r_img, _r_boxes = rotator(input_t, boxes) - assert r_img.shape != input_t.shape - - # FP16 (only on GPU) - if torch.cuda.is_available(): - input_t = torch.ones((3, 50, 50), dtype=torch.float16).cuda() - r_img, _ = rotator(input_t, boxes) - assert r_img.dtype == torch.float16 - - -def test_crop_detection(): - img = torch.ones((3, 50, 50), dtype=torch.float32) - abs_boxes = np.array([ - [15, 20, 35, 30], - [5, 10, 10, 20], - ]) - crop_box = (12 / 50, 23 / 50, 50 / 50, 50 / 50) - c_img, c_boxes = crop_detection(img, abs_boxes, crop_box) - assert c_img.shape == (3, 26, 37) - assert c_boxes.shape == (1, 4) - assert np.all(c_boxes == np.array([15 - 12, 0, 35 - 12, 30 - 23])[None, ...]) - - rel_boxes = np.array([ - [0.3, 0.4, 0.7, 0.6], - [0.1, 0.2, 0.2, 0.4], - ]) - crop_box = (0.24, 0.46, 1.0, 1.0) - c_img, c_boxes = crop_detection(img, rel_boxes, crop_box) - assert c_img.shape == (3, 26, 37) - assert c_boxes.shape == (1, 4) - assert np.abs(c_boxes - np.array([0.06 / 0.76, 0.0, 0.46 / 0.76, 0.14 / 0.54])[None, ...]).mean() < 1e-7 - - # FP16 - img = torch.ones((3, 50, 50), dtype=torch.float16) - c_img, _ = crop_detection(img, abs_boxes, crop_box) - assert c_img.dtype == torch.float16 - - with pytest.raises(AssertionError): - crop_detection(img, abs_boxes, (2, 6, 24, 56)) - - -@pytest.mark.parametrize( - "target", - [ - np.array([[15, 20, 35, 30]]), # box - np.array([[[15, 20], [35, 20], [35, 30], [15, 30]]]), # polygon - ], -) -def test_random_crop(target): - cropper = RandomCrop(scale=(0.5, 1.0), ratio=(0.75, 1.33)) - input_t = torch.ones((3, 50, 50), dtype=torch.float32) - img, target = cropper(input_t, target) - # Check the scale - assert img.shape[-1] * img.shape[-2] >= 0.4 * input_t.shape[-1] * input_t.shape[-2] - # Check aspect ratio - assert 0.65 <= img.shape[-2] / img.shape[-1] <= 1.5 - # Check the target - assert np.all(target >= 0) - if target.ndim == 2: - assert np.all(target[:, [0, 2]] <= img.shape[-1]) and np.all(target[:, [1, 3]] <= img.shape[-2]) - else: - assert np.all(target[..., 0] <= img.shape[-1]) and np.all(target[..., 1] <= img.shape[-2]) - - -@pytest.mark.parametrize( - "input_dtype, input_size", - [ - [torch.float32, (3, 32, 32)], - [torch.uint8, (3, 32, 32)], - ], -) -def test_channel_shuffle(input_dtype, input_size): - transfo = ChannelShuffle() - input_t = torch.rand(input_size, dtype=torch.float32) - if input_dtype == torch.uint8: - input_t = (255 * input_t).round() - input_t = input_t.to(dtype=input_dtype) - out = transfo(input_t) - assert isinstance(out, torch.Tensor) - assert out.shape == input_size - assert out.dtype == input_dtype - # Ensure that nothing has changed apart from channel order - if input_dtype == torch.uint8: - assert torch.all(input_t.sum(0) == out.sum(0)) - else: - # Float approximation - assert (input_t.sum(0) - out.sum(0)).abs().mean() < 1e-7 - - -@pytest.mark.parametrize( - "input_dtype,input_shape", - [ - [torch.float32, (3, 32, 32)], - [torch.uint8, (3, 32, 32)], - ], -) -def test_gaussian_noise(input_dtype, input_shape): - transform = GaussianNoise(0.0, 1.0) - input_t = torch.rand(input_shape, dtype=torch.float32) - if input_dtype == torch.uint8: - input_t = (255 * input_t).round() - input_t = input_t.to(dtype=input_dtype) - transformed = transform(input_t) - assert isinstance(transformed, torch.Tensor) - assert transformed.shape == input_shape - assert transformed.dtype == input_dtype - assert torch.any(transformed != input_t) - assert torch.all(transformed >= 0) - if input_dtype == torch.uint8: - assert torch.all(transformed <= 255) - else: - assert torch.all(transformed <= 1.0) - - -@pytest.mark.parametrize( - "p,target", - [ - [1, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)], - [0, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)], - [1, np.array([[[0.1, 0.1], [0.3, 0.1], [0.3, 0.4], [0.1, 0.4]]], dtype=np.float32)], - [0, np.array([[[0.1, 0.1], [0.3, 0.1], [0.3, 0.4], [0.1, 0.4]]], dtype=np.float32)], - ], -) -def test_randomhorizontalflip(p, target): - # testing for 2 cases, with flip probability 1 and 0. - transform = RandomHorizontalFlip(p) - input_t = torch.ones((3, 32, 32), dtype=torch.float32) - input_t[..., :16] = 0 - - transformed, _target = transform(input_t, target) - assert isinstance(transformed, torch.Tensor) - assert transformed.shape == input_t.shape - assert transformed.dtype == input_t.dtype - # integrity check of targets - assert isinstance(_target, np.ndarray) - assert _target.dtype == np.float32 - if _target.ndim == 2: - if p == 1: - assert np.all(_target == np.array([[0.7, 0.1, 0.9, 0.4]], dtype=np.float32)) - assert torch.all(transformed.mean((0, 1)) == torch.tensor([1] * 16 + [0] * 16, dtype=torch.float32)) - elif p == 0: - assert np.all(_target == np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)) - assert torch.all(transformed.mean((0, 1)) == torch.tensor([0] * 16 + [1] * 16, dtype=torch.float32)) - else: - if p == 1: - assert np.all(_target == np.array([[[0.9, 0.1], [0.7, 0.1], [0.7, 0.4], [0.9, 0.4]]], dtype=np.float32)) - assert torch.all(transformed.mean((0, 1)) == torch.tensor([1] * 16 + [0] * 16, dtype=torch.float32)) - elif p == 0: - assert np.all(_target == np.array([[[0.1, 0.1], [0.3, 0.1], [0.3, 0.4], [0.1, 0.4]]], dtype=np.float32)) - assert torch.all(transformed.mean((0, 1)) == torch.tensor([0] * 16 + [1] * 16, dtype=torch.float32)) - - -@pytest.mark.parametrize( - "input_dtype,input_shape", - [ - [torch.float32, (3, 32, 32)], - [torch.uint8, (3, 32, 32)], - [torch.float32, (3, 64, 32)], - [torch.uint8, (3, 64, 32)], - ], -) -def test_random_shadow(input_dtype, input_shape): - transform = RandomShadow((0.2, 0.8)) - input_t = torch.ones(input_shape, dtype=torch.float32) - if input_dtype == torch.uint8: - input_t = (255 * input_t).round() - input_t = input_t.to(dtype=input_dtype) - transformed = transform(input_t) - assert isinstance(transformed, torch.Tensor) - assert transformed.shape == input_shape - assert transformed.dtype == input_dtype - # The shadow will darken the picture - assert input_t.float().mean() >= transformed.float().mean() - assert torch.all(transformed >= 0) - if input_dtype == torch.uint8: - assert torch.all(transformed <= 255) - else: - assert torch.all(transformed <= 1.0) - - -@pytest.mark.parametrize( - "p,target", - [ - [1, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)], - [0, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)], - [1, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)], - [0, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)], - ], -) -def test_random_resize(p, target): - transfo = RandomResize(scale_range=(0.3, 1.3), p=p) - assert repr(transfo) == f"RandomResize(scale_range=(0.3, 1.3), p={p})" - - img = torch.rand((3, 64, 64)) - # Apply the transformation - out_img, out_target = transfo(img, target) - assert isinstance(out_img, torch.Tensor) - assert isinstance(out_target, np.ndarray) - # Resize is already well tested - assert torch.all(out_img == img) if p == 0 else out_img.shape != img.shape - assert out_target.shape == target.shape diff --git a/tests/tensorflow/test_datasets_loader_tf.py b/tests/tensorflow/test_datasets_loader_tf.py deleted file mode 100644 index 26d24ae..0000000 --- a/tests/tensorflow/test_datasets_loader_tf.py +++ /dev/null @@ -1,75 +0,0 @@ -from typing import List, Tuple - -import tensorflow as tf -from doctr.datasets import DataLoader - - -class MockDataset: - def __init__(self, input_size): - self.data: List[Tuple[float, bool]] = [ - (1, True), - (0, False), - (0.5, True), - ] - self.input_size = input_size - - def __len__(self): - return len(self.data) - - def __getitem__(self, index): - val, label = self.data[index] - return tf.cast(tf.fill(self.input_size, val), dtype=tf.float32), tf.constant(label, dtype=tf.bool) - - -class MockDatasetBis(MockDataset): - @staticmethod - def collate_fn(samples): - x, y = zip(*samples) - return tf.stack(x, axis=0), list(y) - - -def test_dataloader(): - loader = DataLoader( - MockDataset((32, 32)), - shuffle=True, - batch_size=2, - drop_last=True, - ) - - ds_iter = iter(loader) - num_batches = 0 - for x, y in ds_iter: - num_batches += 1 - assert len(loader) == 1 - assert num_batches == 1 - assert isinstance(x, tf.Tensor) and isinstance(y, tf.Tensor) - assert x.shape == (2, 32, 32) - assert y.shape == (2,) - - # Drop last - loader = DataLoader( - MockDataset((32, 32)), - shuffle=True, - batch_size=2, - drop_last=False, - ) - ds_iter = iter(loader) - num_batches = 0 - for x, y in ds_iter: - num_batches += 1 - assert loader.num_batches == 2 - assert num_batches == 2 - - # Custom collate - loader = DataLoader( - MockDatasetBis((32, 32)), - shuffle=True, - batch_size=2, - drop_last=False, - ) - - ds_iter = iter(loader) - x, y = next(ds_iter) - assert isinstance(x, tf.Tensor) and isinstance(y, list) - assert x.shape == (2, 32, 32) - assert len(y) == 2 diff --git a/tests/tensorflow/test_datasets_tf.py b/tests/tensorflow/test_datasets_tf.py deleted file mode 100644 index 4bbf946..0000000 --- a/tests/tensorflow/test_datasets_tf.py +++ /dev/null @@ -1,605 +0,0 @@ -import os -from shutil import move - -import numpy as np -import pytest -import tensorflow as tf -from doctr import datasets -from doctr.datasets import DataLoader -from doctr.file_utils import CLASS_NAME -from doctr.transforms import Resize - - -def _validate_dataset(ds, input_size, batch_size=2, class_indices=False, is_polygons=False): - # Fetch one sample - img, target = ds[0] - assert isinstance(img, tf.Tensor) - assert img.shape == (*input_size, 3) - assert img.dtype == tf.float32 - assert isinstance(target, dict) - assert isinstance(target["boxes"], np.ndarray) and target["boxes"].dtype == np.float32 - if is_polygons: - assert target["boxes"].ndim == 3 and target["boxes"].shape[1:] == (4, 2) - else: - assert target["boxes"].ndim == 2 and target["boxes"].shape[1:] == (4,) - assert np.all(np.logical_and(target["boxes"] <= 1, target["boxes"] >= 0)) - if class_indices: - assert isinstance(target["labels"], np.ndarray) and target["labels"].dtype == np.int64 - else: - assert isinstance(target["labels"], list) and all(isinstance(s, str) for s in target["labels"]) - assert len(target["labels"]) == len(target["boxes"]) - - # Check batching - loader = DataLoader(ds, batch_size=batch_size) - - images, targets = next(iter(loader)) - assert isinstance(images, tf.Tensor) and images.shape == (batch_size, *input_size, 3) - assert isinstance(targets, list) and all(isinstance(elt, dict) for elt in targets) - - -def _validate_dataset_recognition_part(ds, input_size, batch_size=2): - # Fetch one sample - img, label = ds[0] - assert isinstance(img, tf.Tensor) - assert img.shape == (*input_size, 3) - assert img.dtype == tf.float32 - assert isinstance(label, str) - - # Check batching - loader = DataLoader(ds, batch_size=batch_size) - - images, labels = next(iter(loader)) - assert isinstance(images, tf.Tensor) and images.shape == (batch_size, *input_size, 3) - assert isinstance(labels, list) and all(isinstance(elt, str) for elt in labels) - - -def test_visiondataset(): - url = "https://github.com/mindee/doctr/releases/download/v0.6.0/mnist.zip" - with pytest.raises(ValueError): - datasets.datasets.VisionDataset(url, download=False) - - dataset = datasets.datasets.VisionDataset(url, download=True, extract_archive=True) - assert len(dataset) == 0 - assert repr(dataset) == "VisionDataset()" - - -def test_rotation_dataset(mock_image_folder): - input_size = (1024, 1024) - - ds = datasets.OrientationDataset(img_folder=mock_image_folder, img_transforms=Resize(input_size)) - assert len(ds) == 5 - img, target = ds[0] - assert isinstance(img, tf.Tensor) - assert img.dtype == tf.float32 - assert img.shape[:2] == input_size - # Prefilled rotation targets - assert isinstance(target, np.ndarray) and target.dtype == np.int64 - # check that all prefilled targets are 0 (degrees) - assert np.all(target == 0) - - loader = DataLoader(ds, batch_size=2) - images, targets = next(iter(loader)) - assert isinstance(images, tf.Tensor) and images.shape == (2, *input_size, 3) - assert isinstance(targets, list) and all(isinstance(elt, np.ndarray) for elt in targets) - - -def test_detection_dataset(mock_image_folder, mock_detection_label): - input_size = (1024, 1024) - - ds = datasets.DetectionDataset( - img_folder=mock_image_folder, - label_path=mock_detection_label, - img_transforms=Resize(input_size), - ) - - assert len(ds) == 5 - img, target_dict = ds[0] - target = target_dict[CLASS_NAME] - assert isinstance(img, tf.Tensor) - assert img.shape[:2] == input_size - assert img.dtype == tf.float32 - # Bounding boxes - assert isinstance(target_dict, dict) - assert isinstance(target, np.ndarray) and target.dtype == np.float32 - assert np.all(np.logical_and(target[:, :4] >= 0, target[:, :4] <= 1)) - assert target.shape[1] == 4 - - loader = DataLoader(ds, batch_size=2) - images, targets = next(iter(loader)) - assert isinstance(images, tf.Tensor) and images.shape == (2, *input_size, 3) - assert isinstance(targets, list) and all( - isinstance(elt, np.ndarray) for target in targets for elt in target.values() - ) - - # Rotated DS - rotated_ds = datasets.DetectionDataset( - img_folder=mock_image_folder, - label_path=mock_detection_label, - img_transforms=Resize(input_size), - use_polygons=True, - ) - _, r_target = rotated_ds[0] - assert r_target[CLASS_NAME].shape[1:] == (4, 2) - - # File existence check - img_name, _ = ds.data[0] - move(os.path.join(ds.root, img_name), os.path.join(ds.root, "tmp_file")) - with pytest.raises(FileNotFoundError): - datasets.DetectionDataset(mock_image_folder, mock_detection_label) - move(os.path.join(ds.root, "tmp_file"), os.path.join(ds.root, img_name)) - - -def test_recognition_dataset(mock_image_folder, mock_recognition_label): - input_size = (32, 128) - ds = datasets.RecognitionDataset( - img_folder=mock_image_folder, - labels_path=mock_recognition_label, - img_transforms=Resize(input_size, preserve_aspect_ratio=True), - ) - assert len(ds) == 5 - image, label = ds[0] - assert isinstance(image, tf.Tensor) - assert image.shape[:2] == input_size - assert image.dtype == tf.float32 - assert isinstance(label, str) - - loader = DataLoader(ds, batch_size=2) - images, labels = next(iter(loader)) - assert isinstance(images, tf.Tensor) and images.shape == (2, *input_size, 3) - assert isinstance(labels, list) and all(isinstance(elt, str) for elt in labels) - - # File existence check - img_name, _ = ds.data[0] - move(os.path.join(ds.root, img_name), os.path.join(ds.root, "tmp_file")) - with pytest.raises(FileNotFoundError): - datasets.RecognitionDataset(mock_image_folder, mock_recognition_label) - move(os.path.join(ds.root, "tmp_file"), os.path.join(ds.root, img_name)) - - -@pytest.mark.parametrize( - "use_polygons", - [False, True], -) -def test_ocrdataset(mock_ocrdataset, use_polygons): - input_size = (512, 512) - - ds = datasets.OCRDataset( - *mock_ocrdataset, - img_transforms=Resize(input_size), - use_polygons=use_polygons, - ) - assert len(ds) == 3 - _validate_dataset(ds, input_size, is_polygons=use_polygons) - - # File existence check - img_name, _ = ds.data[0] - move(os.path.join(ds.root, img_name), os.path.join(ds.root, "tmp_file")) - with pytest.raises(FileNotFoundError): - datasets.OCRDataset(*mock_ocrdataset) - move(os.path.join(ds.root, "tmp_file"), os.path.join(ds.root, img_name)) - - -def test_charactergenerator(): - input_size = (32, 32) - vocab = "abcdef" - - ds = datasets.CharacterGenerator( - vocab=vocab, - num_samples=10, - cache_samples=True, - img_transforms=Resize(input_size), - ) - - assert len(ds) == 10 - image, label = ds[0] - assert isinstance(image, tf.Tensor) - assert image.shape[:2] == input_size - assert image.dtype == tf.float32 - assert isinstance(label, int) and label < len(vocab) - - loader = DataLoader(ds, batch_size=2, collate_fn=ds.collate_fn) - images, targets = next(iter(loader)) - assert isinstance(images, tf.Tensor) and images.shape == (2, *input_size, 3) - assert isinstance(targets, tf.Tensor) and targets.shape == (2,) - assert targets.dtype == tf.int32 - - -def test_wordgenerator(): - input_size = (32, 128) - wordlen_range = (1, 10) - vocab = "abcdef" - - ds = datasets.WordGenerator( - vocab=vocab, - min_chars=wordlen_range[0], - max_chars=wordlen_range[1], - num_samples=10, - cache_samples=True, - img_transforms=Resize(input_size), - ) - - assert len(ds) == 10 - image, target = ds[0] - assert isinstance(image, tf.Tensor) - assert image.shape[:2] == input_size - assert image.dtype == tf.float32 - assert isinstance(target, str) and len(target) >= wordlen_range[0] and len(target) <= wordlen_range[1] - assert all(char in vocab for char in target) - - loader = DataLoader(ds, batch_size=2, collate_fn=ds.collate_fn) - images, targets = next(iter(loader)) - assert isinstance(images, tf.Tensor) and images.shape == (2, *input_size, 3) - assert isinstance(targets, list) and len(targets) == 2 and all(isinstance(t, str) for t in targets) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples", - [ - [[512, 512], 3], # Actual set has 2700 training samples and 300 test samples - ], -) -def test_artefact_detection(input_size, num_samples, rotate, mock_doc_artefacts): - # monkeypatch the path to temporary dataset - datasets.DocArtefacts.URL = mock_doc_artefacts - datasets.DocArtefacts.SHA256 = None - - ds = datasets.DocArtefacts( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - cache_dir="/".join(mock_doc_artefacts.split("/")[:-2]), - cache_subdir=mock_doc_artefacts.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"DocArtefacts(train={True})" - _validate_dataset(ds, input_size, class_indices=True, is_polygons=rotate) - - -# NOTE: following datasets support also recognition task - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 626 training samples and 360 test samples - [[32, 128], 15, True], # recognition - ], -) -def test_sroie(input_size, num_samples, rotate, recognition, mock_sroie_dataset): - # monkeypatch the path to temporary dataset - datasets.SROIE.TRAIN = (mock_sroie_dataset, None, "sroie2019_train_task1.zip") - - ds = datasets.SROIE( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_sroie_dataset.split("/")[:-2]), - cache_subdir=mock_sroie_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"SROIE(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 5, False], # Actual set has 229 train and 233 test samples - [[32, 128], 25, True], # recognition - ], -) -def test_ic13_dataset(input_size, num_samples, rotate, recognition, mock_ic13): - ds = datasets.IC13( - *mock_ic13, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - ) - - assert len(ds) == num_samples - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 7149 train and 796 test samples - [[32, 128], 5, True], # recognition - ], -) -def test_imgur5k_dataset(input_size, num_samples, rotate, recognition, mock_imgur5k): - ds = datasets.IMGUR5K( - *mock_imgur5k, - train=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - ) - - assert len(ds) == num_samples - 1 # -1 because of the test set 90 / 10 split - assert repr(ds) == f"IMGUR5K(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[32, 128], 3, False], # Actual set has 33402 training samples and 13068 test samples - [[32, 128], 12, True], # recognition - ], -) -def test_svhn(input_size, num_samples, rotate, recognition, mock_svhn_dataset): - # monkeypatch the path to temporary dataset - datasets.SVHN.TRAIN = (mock_svhn_dataset, None, "svhn_train.tar") - - ds = datasets.SVHN( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_svhn_dataset.split("/")[:-2]), - cache_subdir=mock_svhn_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"SVHN(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 149 training samples and 50 test samples - [[32, 128], 9, True], # recognition - ], -) -def test_funsd(input_size, num_samples, rotate, recognition, mock_funsd_dataset): - # monkeypatch the path to temporary dataset - datasets.FUNSD.URL = mock_funsd_dataset - datasets.FUNSD.SHA256 = None - datasets.FUNSD.FILE_NAME = "funsd.zip" - - ds = datasets.FUNSD( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_funsd_dataset.split("/")[:-2]), - cache_subdir=mock_funsd_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"FUNSD(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 800 training samples and 100 test samples - [[32, 128], 9, True], # recognition - ], -) -def test_cord(input_size, num_samples, rotate, recognition, mock_cord_dataset): - # monkeypatch the path to temporary dataset - datasets.CORD.TRAIN = (mock_cord_dataset, None, "cord_train.zip") - - ds = datasets.CORD( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_cord_dataset.split("/")[:-2]), - cache_subdir=mock_cord_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"CORD(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 2, False], # Actual set has 772875 training samples and 85875 test samples - [[32, 128], 10, True], # recognition - ], -) -def test_synthtext(input_size, num_samples, rotate, recognition, mock_synthtext_dataset): - # monkeypatch the path to temporary dataset - datasets.SynthText.URL = mock_synthtext_dataset - datasets.SynthText.SHA256 = None - - ds = datasets.SynthText( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_synthtext_dataset.split("/")[:-2]), - cache_subdir=mock_synthtext_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"SynthText(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[32, 128], 1, False], # Actual set has 2000 training samples and 3000 test samples - [[32, 128], 1, True], # recognition - ], -) -def test_iiit5k(input_size, num_samples, rotate, recognition, mock_iiit5k_dataset): - # monkeypatch the path to temporary dataset - datasets.IIIT5K.URL = mock_iiit5k_dataset - datasets.IIIT5K.SHA256 = None - - ds = datasets.IIIT5K( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_iiit5k_dataset.split("/")[:-2]), - cache_subdir=mock_iiit5k_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"IIIT5K(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size, batch_size=1) - else: - _validate_dataset(ds, input_size, batch_size=1, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 100 training samples and 249 test samples - [[32, 128], 3, True], # recognition - ], -) -def test_svt(input_size, num_samples, rotate, recognition, mock_svt_dataset): - # monkeypatch the path to temporary dataset - datasets.SVT.URL = mock_svt_dataset - datasets.SVT.SHA256 = None - - ds = datasets.SVT( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_svt_dataset.split("/")[:-2]), - cache_subdir=mock_svt_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"SVT(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 3, False], # Actual set has 246 training samples and 249 test samples - [[32, 128], 3, True], # recognition - ], -) -def test_ic03(input_size, num_samples, rotate, recognition, mock_ic03_dataset): - # monkeypatch the path to temporary dataset - datasets.IC03.TRAIN = (mock_ic03_dataset, None, "ic03_train.zip") - - ds = datasets.IC03( - train=True, - download=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - cache_dir="/".join(mock_ic03_dataset.split("/")[:-2]), - cache_subdir=mock_ic03_dataset.split("/")[-2], - ) - - assert len(ds) == num_samples - assert repr(ds) == f"IC03(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -@pytest.mark.parametrize("rotate", [True, False]) -@pytest.mark.parametrize( - "input_size, num_samples, recognition", - [ - [[512, 512], 2, False], - [[32, 128], 5, True], - ], -) -def test_wildreceipt_dataset(input_size, num_samples, rotate, recognition, mock_wildreceipt_dataset): - ds = datasets.WILDRECEIPT( - *mock_wildreceipt_dataset, - train=True, - img_transforms=Resize(input_size), - use_polygons=rotate, - recognition_task=recognition, - ) - assert len(ds) == num_samples - assert repr(ds) == f"WILDRECEIPT(train={True})" - if recognition: - _validate_dataset_recognition_part(ds, input_size) - else: - _validate_dataset(ds, input_size, is_polygons=rotate) - - -# NOTE: following datasets are only for recognition task - - -def test_mjsynth_dataset(mock_mjsynth_dataset): - input_size = (32, 128) - ds = datasets.MJSynth( - *mock_mjsynth_dataset, - img_transforms=Resize(input_size, preserve_aspect_ratio=True), - ) - - assert len(ds) == 4 # Actual set has 7581382 train and 1337891 test samples - assert repr(ds) == f"MJSynth(train={True})" - _validate_dataset_recognition_part(ds, input_size) - - -def test_iiithws_dataset(mock_iiithws_dataset): - input_size = (32, 128) - ds = datasets.IIITHWS( - *mock_iiithws_dataset, - img_transforms=Resize(input_size, preserve_aspect_ratio=True), - ) - - assert len(ds) == 4 # Actual set has 7141797 train and 793533 test samples - assert repr(ds) == f"IIITHWS(train={True})" - _validate_dataset_recognition_part(ds, input_size) diff --git a/tests/tensorflow/test_file_utils_tf.py b/tests/tensorflow/test_file_utils_tf.py deleted file mode 100644 index a28709d..0000000 --- a/tests/tensorflow/test_file_utils_tf.py +++ /dev/null @@ -1,5 +0,0 @@ -from doctr.file_utils import is_tf_available - - -def test_file_utils(): - assert is_tf_available() diff --git a/tests/tensorflow/test_io_image_tf.py b/tests/tensorflow/test_io_image_tf.py deleted file mode 100644 index 1680fd2..0000000 --- a/tests/tensorflow/test_io_image_tf.py +++ /dev/null @@ -1,52 +0,0 @@ -import numpy as np -import pytest -import tensorflow as tf -from doctr.io import decode_img_as_tensor, read_img_as_tensor, tensor_from_numpy - - -def test_read_img_as_tensor(mock_image_path): - img = read_img_as_tensor(mock_image_path) - - assert isinstance(img, tf.Tensor) - assert img.dtype == tf.float32 - assert img.shape == (900, 1200, 3) - - img = read_img_as_tensor(mock_image_path, dtype=tf.float16) - assert img.dtype == tf.float16 - img = read_img_as_tensor(mock_image_path, dtype=tf.uint8) - assert img.dtype == tf.uint8 - - with pytest.raises(ValueError): - read_img_as_tensor(mock_image_path, dtype=tf.float64) - - -def test_decode_img_as_tensor(mock_image_stream): - img = decode_img_as_tensor(mock_image_stream) - - assert isinstance(img, tf.Tensor) - assert img.dtype == tf.float32 - assert img.shape == (900, 1200, 3) - - img = decode_img_as_tensor(mock_image_stream, dtype=tf.float16) - assert img.dtype == tf.float16 - img = decode_img_as_tensor(mock_image_stream, dtype=tf.uint8) - assert img.dtype == tf.uint8 - - with pytest.raises(ValueError): - decode_img_as_tensor(mock_image_stream, dtype=tf.float64) - - -def test_tensor_from_numpy(mock_image_stream): - with pytest.raises(ValueError): - tensor_from_numpy(np.zeros((256, 256, 3)), tf.int64) - - out = tensor_from_numpy(np.zeros((256, 256, 3), dtype=np.uint8)) - - assert isinstance(out, tf.Tensor) - assert out.dtype == tf.float32 - assert out.shape == (256, 256, 3) - - out = tensor_from_numpy(np.zeros((256, 256, 3), dtype=np.uint8), dtype=tf.float16) - assert out.dtype == tf.float16 - out = tensor_from_numpy(np.zeros((256, 256, 3), dtype=np.uint8), dtype=tf.uint8) - assert out.dtype == tf.uint8 diff --git a/tests/tensorflow/test_models_classification_tf.py b/tests/tensorflow/test_models_classification_tf.py deleted file mode 100644 index f6fc767..0000000 --- a/tests/tensorflow/test_models_classification_tf.py +++ /dev/null @@ -1,227 +0,0 @@ -import os -import tempfile - -import cv2 -import numpy as np -import onnxruntime -import psutil -import pytest -import tensorflow as tf -from doctr.models import classification -from doctr.models.classification.predictor import OrientationPredictor -from doctr.models.utils import export_model_to_onnx - -system_available_memory = int(psutil.virtual_memory().available / 1024**3) - - -@pytest.mark.parametrize( - "arch_name, input_shape, output_size", - [ - ["vgg16_bn_r", (32, 32, 3), (126,)], - ["resnet18", (32, 32, 3), (126,)], - ["resnet31", (32, 32, 3), (126,)], - ["resnet34", (32, 32, 3), (126,)], - ["resnet34_wide", (32, 32, 3), (126,)], - ["resnet50", (32, 32, 3), (126,)], - ["magc_resnet31", (32, 32, 3), (126,)], - ["mobilenet_v3_small", (32, 32, 3), (126,)], - ["mobilenet_v3_large", (32, 32, 3), (126,)], - ["vit_s", (32, 32, 3), (126,)], - ["vit_b", (32, 32, 3), (126,)], - ["textnet_tiny", (32, 32, 3), (126,)], - ["textnet_small", (32, 32, 3), (126,)], - ["textnet_base", (32, 32, 3), (126,)], - ], -) -def test_classification_architectures(arch_name, input_shape, output_size): - # Model - batch_size = 2 - tf.keras.backend.clear_session() - model = classification.__dict__[arch_name](pretrained=True, include_top=True, input_shape=input_shape) - # Forward - out = model(tf.random.uniform(shape=[batch_size, *input_shape], maxval=1, dtype=tf.float32)) - # Output checks - assert isinstance(out, tf.Tensor) - assert out.dtype == tf.float32 - assert out.numpy().shape == (batch_size, *output_size) - - -@pytest.mark.parametrize( - "arch_name, input_shape", - [ - ["mobilenet_v3_small_crop_orientation", (256, 256, 3)], - ["mobilenet_v3_small_page_orientation", (512, 512, 3)], - ], -) -def test_classification_models(arch_name, input_shape): - batch_size = 8 - reco_model = classification.__dict__[arch_name](pretrained=True, input_shape=input_shape) - assert isinstance(reco_model, tf.keras.Model) - input_tensor = tf.random.uniform(shape=[batch_size, *input_shape], minval=0, maxval=1) - - out = reco_model(input_tensor) - assert isinstance(out, tf.Tensor) - assert out.shape.as_list() == [8, 4] - - -@pytest.mark.parametrize( - "arch_name", - [ - "mobilenet_v3_small_crop_orientation", - "mobilenet_v3_small_page_orientation", - ], -) -def test_classification_zoo(arch_name): - if "crop" in arch_name: - batch_size = 16 - input_tensor = tf.random.uniform(shape=[batch_size, 256, 256, 3], minval=0, maxval=1) - # Model - predictor = classification.zoo.crop_orientation_predictor(arch_name, pretrained=False) - - with pytest.raises(ValueError): - predictor = classification.zoo.crop_orientation_predictor(arch="wrong_model", pretrained=False) - else: - batch_size = 2 - input_tensor = tf.random.uniform(shape=[batch_size, 512, 512, 3], minval=0, maxval=1) - # Model - predictor = classification.zoo.page_orientation_predictor(arch_name, pretrained=False) - - with pytest.raises(ValueError): - predictor = classification.zoo.page_orientation_predictor(arch="wrong_model", pretrained=False) - # object check - assert isinstance(predictor, OrientationPredictor) - out = predictor(input_tensor) - class_idxs, classes, confs = out[0], out[1], out[2] - assert isinstance(class_idxs, list) and len(class_idxs) == batch_size - assert isinstance(classes, list) and len(classes) == batch_size - assert isinstance(confs, list) and len(confs) == batch_size - assert all(isinstance(pred, int) for pred in class_idxs) - assert all(isinstance(pred, int) for pred in classes) and all(pred in [0, 90, 180, -90] for pred in classes) - assert all(isinstance(pred, float) for pred in confs) - - -def test_crop_orientation_model(mock_text_box): - text_box_0 = cv2.imread(mock_text_box) - # rotates counter-clockwise - text_box_270 = np.rot90(text_box_0, 1) - text_box_180 = np.rot90(text_box_0, 2) - text_box_90 = np.rot90(text_box_0, 3) - classifier = classification.crop_orientation_predictor("mobilenet_v3_small_crop_orientation", pretrained=True) - assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3] - # 270 degrees is equivalent to -90 degrees - assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90] - assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2]) - - -# TODO: uncomment when model is available -""" -def test_page_orientation_model(mock_payslip): - text_box_0 = cv2.imread(mock_payslip) - # rotates counter-clockwise - text_box_270 = np.rot90(text_box_0, 1) - text_box_180 = np.rot90(text_box_0, 2) - text_box_90 = np.rot90(text_box_0, 3) - classifier = classification.crop_orientation_predictor("mobilenet_v3_small_page_orientation", pretrained=True) - assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[0] == [0, 1, 2, 3] - # 270 degrees is equivalent to -90 degrees - assert classifier([text_box_0, text_box_270, text_box_180, text_box_90])[1] == [0, -90, 180, 90] - assert all(isinstance(pred, float) for pred in classifier([text_box_0, text_box_270, text_box_180, text_box_90])[2]) -""" - - -# temporarily fix to avoid killing the CI (tf2onnx v1.14 memory leak issue) -# ref.: https://github.com/mindee/doctr/pull/1201 -@pytest.mark.parametrize( - "arch_name, input_shape, output_size", - [ - ["vgg16_bn_r", (32, 32, 3), (126,)], - ["mobilenet_v3_small", (512, 512, 3), (126,)], - ["mobilenet_v3_large", (512, 512, 3), (126,)], - ["mobilenet_v3_small_crop_orientation", (256, 256, 3), (4,)], - ["mobilenet_v3_small_page_orientation", (512, 512, 3), (4,)], - ["resnet18", (32, 32, 3), (126,)], - ["vit_s", (32, 32, 3), (126,)], - ["textnet_tiny", (32, 32, 3), (126,)], - ["textnet_small", (32, 32, 3), (126,)], - pytest.param( - "resnet31", - (32, 32, 3), - (126,), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "resnet34", - (32, 32, 3), - (126,), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "resnet34_wide", - (32, 32, 3), - (126,), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "resnet50", - (32, 32, 3), - (126,), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "magc_resnet31", - (32, 32, 3), - (126,), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "vit_b", - (32, 32, 3), - (126,), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "textnet_base", - (32, 32, 3), - (126,), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - ], -) -def test_models_onnx_export(arch_name, input_shape, output_size): - # Model - batch_size = 2 - tf.keras.backend.clear_session() - if "orientation" in arch_name: - model = classification.__dict__[arch_name](pretrained=True, input_shape=input_shape) - else: - model = classification.__dict__[arch_name](pretrained=True, include_top=True, input_shape=input_shape) - - if arch_name == "vit_b" or arch_name == "vit_s": - # vit model needs a fixed batch size - dummy_input = [tf.TensorSpec([2, *input_shape], tf.float32, name="input")] - else: - # batch_size = None for dynamic batch size - dummy_input = [tf.TensorSpec([None, *input_shape], tf.float32, name="input")] - - np_dummy_input = np.random.rand(batch_size, *input_shape).astype(np.float32) - tf_logits = model(np_dummy_input, training=False).numpy() - with tempfile.TemporaryDirectory() as tmpdir: - # Export - model_path, output = export_model_to_onnx( - model, model_name=os.path.join(tmpdir, "model"), dummy_input=dummy_input - ) - - assert os.path.exists(model_path) - # Inference - ort_session = onnxruntime.InferenceSession( - os.path.join(tmpdir, "model.onnx"), providers=["CPUExecutionProvider"] - ) - ort_outs = ort_session.run(output, {"input": np_dummy_input}) - - assert isinstance(ort_outs, list) and len(ort_outs) == 1 - assert ort_outs[0].shape == (batch_size, *output_size) - # Check that the output is close to the TensorFlow output - only warn if not close - try: - assert np.allclose(tf_logits, ort_outs[0], atol=1e-4) - except AssertionError: - pytest.skip(f"Output of {arch_name}:\nMax element-wise difference: {np.max(np.abs(tf_logits - ort_outs[0]))}") diff --git a/tests/tensorflow/test_models_detection_tf.py b/tests/tensorflow/test_models_detection_tf.py deleted file mode 100644 index 24fe231..0000000 --- a/tests/tensorflow/test_models_detection_tf.py +++ /dev/null @@ -1,270 +0,0 @@ -import math -import os -import tempfile - -import numpy as np -import onnxruntime -import psutil -import pytest -import tensorflow as tf -from doctr.file_utils import CLASS_NAME -from doctr.io import DocumentFile -from doctr.models import detection -from doctr.models.detection._utils import dilate, erode -from doctr.models.detection.fast.tensorflow import reparameterize -from doctr.models.detection.predictor import DetectionPredictor -from doctr.models.preprocessor import PreProcessor -from doctr.models.utils import export_model_to_onnx - -system_available_memory = int(psutil.virtual_memory().available / 1024**3) - - -@pytest.mark.parametrize("train_mode", [True, False]) -@pytest.mark.parametrize( - "arch_name, input_shape, output_size, out_prob", - [ - ["db_resnet50", (512, 512, 3), (512, 512, 1), True], - ["db_mobilenet_v3_large", (512, 512, 3), (512, 512, 1), True], - ["linknet_resnet18", (512, 512, 3), (512, 512, 1), True], - ["linknet_resnet34", (512, 512, 3), (512, 512, 1), True], - ["linknet_resnet50", (512, 512, 3), (512, 512, 1), True], - ["fast_tiny", (512, 512, 3), (512, 512, 1), True], - ["fast_tiny_rep", (512, 512, 3), (512, 512, 1), True], # Reparameterized model - ["fast_small", (512, 512, 3), (512, 512, 1), True], - ["fast_base", (512, 512, 3), (512, 512, 1), True], - ], -) -def test_detection_models(arch_name, input_shape, output_size, out_prob, train_mode): - batch_size = 2 - tf.keras.backend.clear_session() - if arch_name == "fast_tiny_rep": - model = reparameterize(detection.fast_tiny(pretrained=True, input_shape=input_shape)) - train_mode = False # Reparameterized model is not trainable - else: - model = detection.__dict__[arch_name](pretrained=True, input_shape=input_shape) - assert isinstance(model, tf.keras.Model) - input_tensor = tf.random.uniform(shape=[batch_size, *input_shape], minval=0, maxval=1) - target = [ - {CLASS_NAME: np.array([[0.5, 0.5, 1, 1], [0.5, 0.5, 0.8, 0.8]], dtype=np.float32)}, - {CLASS_NAME: np.array([[0.5, 0.5, 1, 1], [0.5, 0.5, 0.8, 0.9]], dtype=np.float32)}, - ] - # test training model - out = model( - input_tensor, - target, - return_model_output=True, - return_preds=not train_mode, - training=train_mode, - ) - assert isinstance(out, dict) - assert len(out) == 3 if not train_mode else len(out) == 2 - # Check proba map - assert isinstance(out["out_map"], tf.Tensor) - assert out["out_map"].dtype == tf.float32 - seg_map = out["out_map"].numpy() - assert seg_map.shape == (batch_size, *output_size) - if out_prob: - assert np.all(np.logical_and(seg_map >= 0, seg_map <= 1)) - # Check boxes - if not train_mode: - for boxes_dict in out["preds"]: - for boxes in boxes_dict.values(): - assert boxes.shape[1] == 5 - assert np.all(boxes[:, :2] < boxes[:, 2:4]) - assert np.all(boxes[:, :4] >= 0) and np.all(boxes[:, :4] <= 1) - # Check loss - assert isinstance(out["loss"], tf.Tensor) - # Target checks - target = [ - {CLASS_NAME: np.array([[0, 0, 1, 1]], dtype=np.uint8)}, - {CLASS_NAME: np.array([[0, 0, 1, 1]], dtype=np.uint8)}, - ] - with pytest.raises(AssertionError): - out = model(input_tensor, target, training=True) - - target = [ - {CLASS_NAME: np.array([[0, 0, 1.5, 1.5]], dtype=np.float32)}, - {CLASS_NAME: np.array([[-0.2, -0.3, 1, 1]], dtype=np.float32)}, - ] - with pytest.raises(ValueError): - out = model(input_tensor, target, training=True) - - # Check the rotated case - target = [ - {CLASS_NAME: np.array([[0.75, 0.75, 0.5, 0.5, 0], [0.65, 0.65, 0.3, 0.3, 0]], dtype=np.float32)}, - {CLASS_NAME: np.array([[0.75, 0.75, 0.5, 0.5, 0], [0.65, 0.7, 0.3, 0.4, 0]], dtype=np.float32)}, - ] - loss = model(input_tensor, target, training=True)["loss"] - assert isinstance(loss, tf.Tensor) and ((loss - out["loss"]) / loss).numpy() < 1 - - -@pytest.fixture(scope="session") -def test_detectionpredictor(mock_pdf): - batch_size = 4 - predictor = DetectionPredictor( - PreProcessor(output_size=(512, 512), batch_size=batch_size), detection.db_resnet50(input_shape=(512, 512, 3)) - ) - - pages = DocumentFile.from_pdf(mock_pdf).as_images() - out = predictor(pages) - # The input PDF has 2 pages - assert len(out) == 2 - - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - return predictor - - -@pytest.fixture(scope="session") -def test_rotated_detectionpredictor(mock_pdf): - batch_size = 4 - predictor = DetectionPredictor( - PreProcessor(output_size=(512, 512), batch_size=batch_size), - detection.db_resnet50(assume_straight_pages=False, input_shape=(512, 512, 3)), - ) - - pages = DocumentFile.from_pdf(mock_pdf).as_images() - out = predictor(pages) - - # The input PDF has 2 pages - assert len(out) == 2 - - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - return predictor - - -@pytest.mark.parametrize( - "arch_name", - [ - "db_resnet50", - "db_mobilenet_v3_large", - "linknet_resnet18", - "fast_tiny", - ], -) -def test_detection_zoo(arch_name): - # Model - tf.keras.backend.clear_session() - predictor = detection.zoo.detection_predictor(arch_name, pretrained=False) - # object check - assert isinstance(predictor, DetectionPredictor) - input_tensor = tf.random.uniform(shape=[2, 1024, 1024, 3], minval=0, maxval=1) - out, seq_maps = predictor(input_tensor, return_maps=True) - assert all(isinstance(boxes, dict) for boxes in out) - assert all(isinstance(boxes[CLASS_NAME], np.ndarray) and boxes[CLASS_NAME].shape[1] == 5 for boxes in out) - assert all(isinstance(seq_map, np.ndarray) for seq_map in seq_maps) - assert all(seq_map.shape[:2] == (1024, 1024) for seq_map in seq_maps) - # check that all values in the seq_maps are between 0 and 1 - assert all((seq_map >= 0).all() and (seq_map <= 1).all() for seq_map in seq_maps) - - -def test_detection_zoo_error(): - with pytest.raises(ValueError): - _ = detection.zoo.detection_predictor("my_fancy_model", pretrained=False) - - -def test_fast_reparameterization(): - dummy_input = tf.random.uniform(shape=[1, 1024, 1024, 3], minval=0, maxval=1) - base_model = detection.fast_tiny(pretrained=True, exportable=True) - base_model_params = np.sum([np.prod(v.shape) for v in base_model.trainable_variables]) - assert math.isclose(base_model_params, 13535296) # base model params - base_out = base_model(dummy_input, training=False)["logits"] - tf.keras.backend.clear_session() - rep_model = reparameterize(base_model) - rep_model_params = np.sum([np.prod(v.shape) for v in base_model.trainable_variables]) - assert math.isclose(rep_model_params, 8520256) # reparameterized model params - rep_out = rep_model(dummy_input, training=False)["logits"] - diff = base_out - rep_out - assert np.mean(diff) < 5e-2 - - -def test_erode(): - x = np.zeros((1, 3, 3, 1), dtype=np.float32) - x[:, 1, 1] = 1 - x = tf.convert_to_tensor(x) - expected = tf.zeros((1, 3, 3, 1)) - out = erode(x, 3) - assert tf.math.reduce_all(out == expected) - - -def test_dilate(): - x = np.zeros((1, 3, 3, 1), dtype=np.float32) - x[:, 1, 1] = 1 - x = tf.convert_to_tensor(x) - expected = tf.ones((1, 3, 3, 1)) - out = dilate(x, 3) - assert tf.math.reduce_all(out == expected) - - -@pytest.mark.parametrize( - "arch_name, input_shape, output_size", - [ - ["db_mobilenet_v3_large", (512, 512, 3), (512, 512, 1)], - ["linknet_resnet18", (1024, 1024, 3), (1024, 1024, 1)], - ["fast_tiny", (1024, 1024, 3), (1024, 1024, 1)], - ["fast_tiny_rep", (1024, 1024, 3), (1024, 1024, 1)], # Reparameterized model - ["fast_small", (1024, 1024, 3), (1024, 1024, 1)], - pytest.param( - "db_resnet50", - (512, 512, 3), - (512, 512, 1), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "linknet_resnet34", - (1024, 1024, 3), - (1024, 1024, 1), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "linknet_resnet50", - (512, 512, 3), - (512, 512, 1), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "fast_base", - (512, 512, 3), - (512, 512, 1), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - ], -) -def test_models_onnx_export(arch_name, input_shape, output_size): - # Model - batch_size = 2 - tf.keras.backend.clear_session() - if arch_name == "fast_tiny_rep": - model = reparameterize(detection.fast_tiny(pretrained=True, exportable=True, input_shape=input_shape)) - else: - model = detection.__dict__[arch_name](pretrained=True, exportable=True, input_shape=input_shape) - # batch_size = None for dynamic batch size - dummy_input = [tf.TensorSpec([None, *input_shape], tf.float32, name="input")] - np_dummy_input = np.random.rand(batch_size, *input_shape).astype(np.float32) - tf_logits = model(np_dummy_input, training=False)["logits"].numpy() - with tempfile.TemporaryDirectory() as tmpdir: - # Export - model_path, output = export_model_to_onnx( - model, model_name=os.path.join(tmpdir, "model"), dummy_input=dummy_input - ) - assert os.path.exists(model_path) - # Inference - ort_session = onnxruntime.InferenceSession( - os.path.join(tmpdir, "model.onnx"), providers=["CPUExecutionProvider"] - ) - ort_outs = ort_session.run(output, {"input": np_dummy_input}) - - assert isinstance(ort_outs, list) and len(ort_outs) == 1 - assert ort_outs[0].shape == (batch_size, *output_size) - # Check that the output is close to the TensorFlow output - only warn if not close - try: - assert np.allclose(ort_outs[0], tf_logits, atol=1e-4) - except AssertionError: - pytest.skip(f"Output of {arch_name}:\nMax element-wise difference: {np.max(np.abs(tf_logits - ort_outs[0]))}") diff --git a/tests/tensorflow/test_models_factory.py b/tests/tensorflow/test_models_factory.py deleted file mode 100644 index e470a4d..0000000 --- a/tests/tensorflow/test_models_factory.py +++ /dev/null @@ -1,70 +0,0 @@ -import json -import os -import tempfile - -import pytest -import tensorflow as tf -from doctr import models -from doctr.models.factory import _save_model_and_config_for_hf_hub, from_hub, push_to_hf_hub - - -def test_push_to_hf_hub(): - model = models.classification.resnet18(pretrained=False) - with pytest.raises(ValueError): - # run_config and/or arch must be specified - push_to_hf_hub(model, model_name="test", task="classification") - with pytest.raises(ValueError): - # task must be one of classification, detection, recognition, obj_detection - push_to_hf_hub(model, model_name="test", task="invalid_task", arch="mobilenet_v3_small") - with pytest.raises(ValueError): - # arch not in available architectures for task - push_to_hf_hub(model, model_name="test", task="detection", arch="crnn_mobilenet_v3_large") - - -@pytest.mark.parametrize( - "arch_name, task_name, dummy_model_id", - [ - ["vgg16_bn_r", "classification", "Felix92/doctr-dummy-tf-vgg16-bn-r"], - ["resnet18", "classification", "Felix92/doctr-dummy-tf-resnet18"], - ["resnet31", "classification", "Felix92/doctr-dummy-tf-resnet31"], - ["resnet34", "classification", "Felix92/doctr-dummy-tf-resnet34"], - ["resnet34_wide", "classification", "Felix92/doctr-dummy-tf-resnet34-wide"], - ["resnet50", "classification", "Felix92/doctr-dummy-tf-resnet50"], - ["magc_resnet31", "classification", "Felix92/doctr-dummy-tf-magc-resnet31"], - ["mobilenet_v3_large", "classification", "Felix92/doctr-dummy-tf-mobilenet-v3-large"], - ["vit_b", "classification", "Felix92/doctr-dummy-tf-vit-b"], - ["textnet_tiny", "classification", "Felix92/doctr-dummy-tf-textnet-tiny"], - ["db_resnet50", "detection", "Felix92/doctr-dummy-tf-db-resnet50"], - ["db_mobilenet_v3_large", "detection", "Felix92/doctr-dummy-tf-db-mobilenet-v3-large"], - ["linknet_resnet18", "detection", "Felix92/doctr-dummy-tf-linknet-resnet18"], - ["linknet_resnet34", "detection", "Felix92/doctr-dummy-tf-linknet-resnet34"], - ["linknet_resnet50", "detection", "Felix92/doctr-dummy-tf-linknet-resnet50"], - ["crnn_vgg16_bn", "recognition", "Felix92/doctr-dummy-tf-crnn-vgg16-bn"], - ["crnn_mobilenet_v3_large", "recognition", "Felix92/doctr-dummy-tf-crnn-mobilenet-v3-large"], - ["sar_resnet31", "recognition", "Felix92/doctr-dummy-tf-sar-resnet31"], - ["master", "recognition", "Felix92/doctr-dummy-tf-master"], - ["vitstr_small", "recognition", "Felix92/doctr-dummy-tf-vitstr-small"], - ["parseq", "recognition", "Felix92/doctr-dummy-tf-parseq"], - ], -) -def test_models_for_hub(arch_name, task_name, dummy_model_id, tmpdir): - with tempfile.TemporaryDirectory() as tmp_dir: - tf.keras.backend.clear_session() - model = models.__dict__[task_name].__dict__[arch_name](pretrained=True) - - _save_model_and_config_for_hf_hub(model, arch=arch_name, task=task_name, save_dir=tmp_dir) - - assert hasattr(model, "cfg") - assert len(os.listdir(tmp_dir)) == 2 - assert os.path.exists(tmp_dir + "/tf_model") - assert len(os.listdir(tmp_dir + "/tf_model")) == 3 - assert os.path.exists(tmp_dir + "/config.json") - tmp_config = json.load(open(tmp_dir + "/config.json")) - assert arch_name == tmp_config["arch"] - assert task_name == tmp_config["task"] - assert all(key in model.cfg.keys() for key in tmp_config.keys()) - - # test from hub - tf.keras.backend.clear_session() - hub_model = from_hub(repo_id=dummy_model_id) - assert isinstance(hub_model, type(model)) diff --git a/tests/tensorflow/test_models_recognition_tf.py b/tests/tensorflow/test_models_recognition_tf.py deleted file mode 100644 index d562abf..0000000 --- a/tests/tensorflow/test_models_recognition_tf.py +++ /dev/null @@ -1,233 +0,0 @@ -import os -import shutil -import tempfile - -import numpy as np -import onnxruntime -import psutil -import pytest -import tensorflow as tf -from doctr.io import DocumentFile -from doctr.models import recognition -from doctr.models.preprocessor import PreProcessor -from doctr.models.recognition.crnn.tensorflow import CTCPostProcessor -from doctr.models.recognition.master.tensorflow import MASTERPostProcessor -from doctr.models.recognition.parseq.tensorflow import PARSeqPostProcessor -from doctr.models.recognition.predictor import RecognitionPredictor -from doctr.models.recognition.sar.tensorflow import SARPostProcessor -from doctr.models.recognition.vitstr.tensorflow import ViTSTRPostProcessor -from doctr.models.utils import export_model_to_onnx -from doctr.utils.geometry import extract_crops - -system_available_memory = int(psutil.virtual_memory().available / 1024**3) - - -@pytest.mark.parametrize("train_mode", [True, False]) -@pytest.mark.parametrize( - "arch_name, input_shape", - [ - ["crnn_vgg16_bn", (32, 128, 3)], - ["crnn_mobilenet_v3_small", (32, 128, 3)], - ["crnn_mobilenet_v3_large", (32, 128, 3)], - ["sar_resnet31", (32, 128, 3)], - ["master", (32, 128, 3)], - ["vitstr_small", (32, 128, 3)], - ["vitstr_base", (32, 128, 3)], - ["parseq", (32, 128, 3)], - ], -) -def test_recognition_models(arch_name, input_shape, train_mode): - batch_size = 4 - reco_model = recognition.__dict__[arch_name](pretrained=True, input_shape=input_shape) - assert isinstance(reco_model, tf.keras.Model) - input_tensor = tf.random.uniform(shape=[batch_size, *input_shape], minval=0, maxval=1) - target = ["i", "am", "a", "jedi"] - - out = reco_model( - input_tensor, - target, - return_model_output=True, - return_preds=not train_mode, - training=train_mode, - ) - assert isinstance(out, dict) - assert len(out) == 3 if not train_mode else len(out) == 2 - assert isinstance(out["out_map"], tf.Tensor) - assert out["out_map"].dtype == tf.float32 - if not train_mode: - assert isinstance(out["preds"], list) - assert len(out["preds"]) == batch_size - assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in out["preds"]) - assert isinstance(out["loss"], tf.Tensor) - # test model in train mode needs targets - with pytest.raises(ValueError): - reco_model(input_tensor, None, training=True) - - -@pytest.mark.parametrize( - "post_processor, input_shape", - [ - [SARPostProcessor, [2, 30, 119]], - [CTCPostProcessor, [2, 30, 119]], - [MASTERPostProcessor, [2, 30, 119]], - [ViTSTRPostProcessor, [2, 30, 119]], - [PARSeqPostProcessor, [2, 30, 119]], - ], -) -def test_reco_postprocessors(post_processor, input_shape, mock_vocab): - processor = post_processor(mock_vocab) - decoded = processor(tf.random.uniform(shape=input_shape, minval=0, maxval=1, dtype=tf.float32)) - assert isinstance(decoded, list) - assert all(isinstance(word, str) and isinstance(conf, float) and 0 <= conf <= 1 for word, conf in decoded) - assert len(decoded) == input_shape[0] - assert all(char in mock_vocab for word, _ in decoded for char in word) - # Repr - assert repr(processor) == f"{post_processor.__name__}(vocab_size={len(mock_vocab)})" - - -@pytest.fixture(scope="session") -def test_recognitionpredictor(mock_pdf, mock_vocab): - batch_size = 4 - predictor = RecognitionPredictor( - PreProcessor(output_size=(32, 128), batch_size=batch_size, preserve_aspect_ratio=True), - recognition.crnn_vgg16_bn(vocab=mock_vocab, input_shape=(32, 128, 3)), - ) - - pages = DocumentFile.from_pdf(mock_pdf) - # Create bounding boxes - boxes = np.array([[0.5, 0.5, 0.75, 0.75], [0.5, 0.5, 1.0, 1.0]], dtype=np.float32) - crops = extract_crops(pages[0], boxes) - - out = predictor(crops) - - # One prediction per crop - assert len(out) == boxes.shape[0] - assert all(isinstance(val, str) and isinstance(conf, float) for val, conf in out) - - # Dimension check - with pytest.raises(ValueError): - input_crop = (255 * np.random.rand(1, 128, 64, 3)).astype(np.uint8) - _ = predictor([input_crop]) - - return predictor - - -@pytest.mark.parametrize( - "arch_name", - [ - "crnn_vgg16_bn", - "crnn_mobilenet_v3_small", - "crnn_mobilenet_v3_large", - "sar_resnet31", - "master", - "vitstr_small", - "vitstr_base", - "parseq", - ], -) -def test_recognition_zoo(arch_name): - batch_size = 2 - # Model - predictor = recognition.zoo.recognition_predictor(arch_name, pretrained=False) - # object check - assert isinstance(predictor, RecognitionPredictor) - input_tensor = tf.random.uniform(shape=[batch_size, 128, 128, 3], minval=0, maxval=1) - out = predictor(input_tensor) - assert isinstance(out, list) and len(out) == batch_size - assert all(isinstance(word, str) and isinstance(conf, float) for word, conf in out) - - -@pytest.mark.parametrize( - "arch_name", - [ - "crnn_vgg16_bn", - "crnn_mobilenet_v3_small", - "crnn_mobilenet_v3_large", - ], -) -def test_crnn_beam_search(arch_name): - batch_size = 2 - # Model - predictor = recognition.zoo.recognition_predictor(arch_name, pretrained=False) - # object check - assert isinstance(predictor, RecognitionPredictor) - input_tensor = tf.random.uniform(shape=[batch_size, 128, 128, 3], minval=0, maxval=1) - out = predictor(input_tensor, beam_width=10, top_paths=10) - assert isinstance(out, list) and len(out) == batch_size - assert all( - isinstance(words, list) - and isinstance(confs, list) - and all(isinstance(word, str) for word in words) - and all(isinstance(conf, float) for conf in confs) - for words, confs in out - ) - - -def test_recognition_zoo_error(): - with pytest.raises(ValueError): - _ = recognition.zoo.recognition_predictor("my_fancy_model", pretrained=False) - - -@pytest.mark.parametrize( - "arch_name, input_shape", - [ - ["crnn_vgg16_bn", (32, 128, 3)], - ["crnn_mobilenet_v3_small", (32, 128, 3)], - ["crnn_mobilenet_v3_large", (32, 128, 3)], - ["vitstr_small", (32, 128, 3)], # testing one vitstr version is enough - pytest.param( - "sar_resnet31", - (32, 128, 3), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - pytest.param( - "master", (32, 128, 3), marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory") - ), - pytest.param( - "parseq", - (32, 128, 3), - marks=pytest.mark.skipif(system_available_memory < 16, reason="too less memory"), - ), - ], -) -def test_models_onnx_export(arch_name, input_shape): - # Model - batch_size = 2 - tf.keras.backend.clear_session() - model = recognition.__dict__[arch_name](pretrained=True, exportable=True, input_shape=input_shape) - # SAR, MASTER, ViTSTR export currently only available with constant batch size - if arch_name in ["sar_resnet31", "master", "vitstr_small", "parseq"]: - dummy_input = [tf.TensorSpec([batch_size, *input_shape], tf.float32, name="input")] - else: - # batch_size = None for dynamic batch size - dummy_input = [tf.TensorSpec([None, *input_shape], tf.float32, name="input")] - np_dummy_input = np.random.rand(batch_size, *input_shape).astype(np.float32) - tf_logits = model(np_dummy_input, training=False)["logits"].numpy() - with tempfile.TemporaryDirectory() as tmpdir: - # Export - model_path, output = export_model_to_onnx( - model, - model_name=os.path.join(tmpdir, "model"), - dummy_input=dummy_input, - large_model=True if arch_name == "master" else False, - ) - assert os.path.exists(model_path) - - if arch_name == "master": - # large models are exported as zip archive - shutil.unpack_archive(model_path, tmpdir, "zip") - model_path = os.path.join(tmpdir, "__MODEL_PROTO.onnx") - else: - model_path = os.path.join(tmpdir, "model.onnx") - - # Inference - ort_session = onnxruntime.InferenceSession(model_path, providers=["CPUExecutionProvider"]) - ort_outs = ort_session.run(output, {"input": np_dummy_input}) - - assert isinstance(ort_outs, list) and len(ort_outs) == 1 - assert ort_outs[0].shape == tf_logits.shape - # Check that the output is close to the TensorFlow output - only warn if not close - try: - assert np.allclose(tf_logits, ort_outs[0], atol=1e-4) - except AssertionError: - pytest.skip(f"Output of {arch_name}:\nMax element-wise difference: {np.max(np.abs(tf_logits - ort_outs[0]))}") diff --git a/tests/tensorflow/test_models_utils_tf.py b/tests/tensorflow/test_models_utils_tf.py deleted file mode 100644 index 3d35069..0000000 --- a/tests/tensorflow/test_models_utils_tf.py +++ /dev/null @@ -1,60 +0,0 @@ -import os - -import pytest -import tensorflow as tf -from doctr.models.utils import ( - IntermediateLayerGetter, - _bf16_to_float32, - _copy_tensor, - conv_sequence, - load_pretrained_params, -) -from tensorflow.keras import Sequential, layers -from tensorflow.keras.applications import ResNet50 - - -def test_copy_tensor(): - x = tf.random.uniform(shape=[8], minval=0, maxval=1) - m = _copy_tensor(x) - assert m.device == x.device and m.dtype == x.dtype and m.shape == x.shape and tf.reduce_all(tf.equal(m, x)) - - -def test_bf16_to_float32(): - x = tf.random.uniform(shape=[8], minval=0, maxval=1, dtype=tf.bfloat16) - m = _bf16_to_float32(x) - assert x.dtype == tf.bfloat16 and m.dtype == tf.float32 and tf.reduce_all(tf.equal(m, tf.cast(x, tf.float32))) - - -def test_load_pretrained_params(tmpdir_factory): - model = Sequential([layers.Dense(8, activation="relu", input_shape=(4,)), layers.Dense(4)]) - # Retrieve this URL - url = "https://doctr-static.mindee.com/models?id=v0.1-models/tmp_checkpoint-4a98e492.zip&src=0" - # Temp cache dir - cache_dir = tmpdir_factory.mktemp("cache") - # Pass an incorrect hash - with pytest.raises(ValueError): - load_pretrained_params(model, url, "mywronghash", cache_dir=str(cache_dir), internal_name="") - # Let tit resolve the hash from the file name - load_pretrained_params(model, url, cache_dir=str(cache_dir), internal_name="") - # Check that the file was downloaded & the archive extracted - assert os.path.exists(cache_dir.join("models").join("tmp_checkpoint-4a98e492")) - # Check that archive was deleted - assert os.path.exists(cache_dir.join("models").join("tmp_checkpoint-4a98e492.zip")) - - -def test_conv_sequence(): - assert len(conv_sequence(8, kernel_size=3)) == 1 - assert len(conv_sequence(8, "relu", kernel_size=3)) == 1 - assert len(conv_sequence(8, None, True, kernel_size=3)) == 2 - assert len(conv_sequence(8, "relu", True, kernel_size=3)) == 3 - - -def test_intermediate_layer_getter(): - backbone = ResNet50(include_top=False, weights=None, pooling=None) - feat_extractor = IntermediateLayerGetter(backbone, ["conv2_block3_out", "conv3_block4_out"]) - # Check num of output features - input_tensor = tf.random.uniform(shape=[1, 224, 224, 3], minval=0, maxval=1) - assert len(feat_extractor(input_tensor)) == 2 - - # Repr - assert repr(feat_extractor) == "IntermediateLayerGetter()" diff --git a/tests/tensorflow/test_models_zoo_tf.py b/tests/tensorflow/test_models_zoo_tf.py deleted file mode 100644 index 50b5d37..0000000 --- a/tests/tensorflow/test_models_zoo_tf.py +++ /dev/null @@ -1,325 +0,0 @@ -import numpy as np -import pytest -from doctr import models -from doctr.file_utils import CLASS_NAME -from doctr.io import Document, DocumentFile -from doctr.io.elements import KIEDocument -from doctr.models import detection, recognition -from doctr.models.detection.predictor import DetectionPredictor -from doctr.models.detection.zoo import detection_predictor -from doctr.models.kie_predictor import KIEPredictor -from doctr.models.predictor import OCRPredictor -from doctr.models.preprocessor import PreProcessor -from doctr.models.recognition.predictor import RecognitionPredictor -from doctr.models.recognition.zoo import recognition_predictor -from doctr.utils.repr import NestedObject - - -# Create a dummy callback -class _DummyCallback: - def __call__(self, loc_preds): - return loc_preds - - -@pytest.mark.parametrize( - "assume_straight_pages, straighten_pages", - [ - [True, False], - [False, False], - [True, True], - ], -) -def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages): - det_bsize = 4 - det_predictor = DetectionPredictor( - PreProcessor(output_size=(512, 512), batch_size=det_bsize), - detection.db_mobilenet_v3_large( - pretrained=True, - pretrained_backbone=False, - input_shape=(512, 512, 3), - assume_straight_pages=assume_straight_pages, - ), - ) - - reco_bsize = 16 - reco_predictor = RecognitionPredictor( - PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True), - recognition.crnn_vgg16_bn(pretrained=False, pretrained_backbone=False, vocab=mock_vocab), - ) - - doc = DocumentFile.from_pdf(mock_pdf) - - predictor = OCRPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=assume_straight_pages, - straighten_pages=straighten_pages, - detect_orientation=True, - detect_language=True, - ) - - if assume_straight_pages: - assert predictor.crop_orientation_predictor is None - else: - assert isinstance(predictor.crop_orientation_predictor, NestedObject) - - out = predictor(doc) - assert isinstance(out, Document) - assert len(out.pages) == 2 - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - orientation = 0 - assert out.pages[0].orientation["value"] == orientation - language = "unknown" - assert out.pages[0].language["value"] == language - - -def test_trained_ocr_predictor(mock_payslip): - doc = DocumentFile.from_images(mock_payslip) - - det_predictor = detection_predictor( - "db_resnet50", - pretrained=True, - batch_size=2, - assume_straight_pages=True, - symmetric_pad=True, - preserve_aspect_ratio=False, - ) - reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128) - - predictor = OCRPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=True, - straighten_pages=True, - preserve_aspect_ratio=False, - ) - # test hooks - predictor.add_hook(_DummyCallback()) - - out = predictor(doc) - - assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr." - geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]]) - assert np.allclose(np.array(out.pages[0].blocks[0].lines[0].words[0].geometry), geometry_mr, rtol=0.05) - - assert out.pages[0].blocks[1].lines[0].words[-1].value == "revised" - geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]]) - assert np.allclose(np.array(out.pages[0].blocks[1].lines[0].words[-1].geometry), geometry_revised, rtol=0.05) - - det_predictor = detection_predictor( - "db_resnet50", - pretrained=True, - batch_size=2, - assume_straight_pages=True, - preserve_aspect_ratio=True, - symmetric_pad=True, - ) - - predictor = OCRPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=True, - straighten_pages=True, - preserve_aspect_ratio=True, - symmetric_pad=True, - ) - - out = predictor(doc) - - assert out.pages[0].blocks[0].lines[0].words[0].value == "Mr." - - -@pytest.mark.parametrize( - "assume_straight_pages, straighten_pages", - [ - [True, False], - [False, False], - [True, True], - ], -) -def test_kiepredictor(mock_pdf, mock_vocab, assume_straight_pages, straighten_pages): - det_bsize = 4 - det_predictor = DetectionPredictor( - PreProcessor(output_size=(512, 512), batch_size=det_bsize), - detection.db_mobilenet_v3_large( - pretrained=True, - pretrained_backbone=False, - input_shape=(512, 512, 3), - assume_straight_pages=assume_straight_pages, - ), - ) - - reco_bsize = 16 - reco_predictor = RecognitionPredictor( - PreProcessor(output_size=(32, 128), batch_size=reco_bsize, preserve_aspect_ratio=True), - recognition.crnn_vgg16_bn(pretrained=False, pretrained_backbone=False, vocab=mock_vocab), - ) - - doc = DocumentFile.from_pdf(mock_pdf) - - predictor = KIEPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=assume_straight_pages, - straighten_pages=straighten_pages, - detect_orientation=True, - detect_language=True, - ) - - if assume_straight_pages: - assert predictor.crop_orientation_predictor is None - else: - assert isinstance(predictor.crop_orientation_predictor, NestedObject) - - out = predictor(doc) - assert isinstance(out, KIEDocument) - assert len(out.pages) == 2 - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - orientation = 0 - assert out.pages[0].orientation["value"] == orientation - language = "unknown" - assert out.pages[0].language["value"] == language - - -def test_trained_kie_predictor(mock_payslip): - doc = DocumentFile.from_images(mock_payslip) - - det_predictor = detection_predictor( - "db_resnet50", - pretrained=True, - batch_size=2, - assume_straight_pages=True, - symmetric_pad=True, - preserve_aspect_ratio=False, - ) - reco_predictor = recognition_predictor("crnn_vgg16_bn", pretrained=True, batch_size=128) - - predictor = KIEPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=True, - straighten_pages=True, - preserve_aspect_ratio=False, - ) - # test hooks - predictor.add_hook(_DummyCallback()) - - out = predictor(doc) - - assert isinstance(out, KIEDocument) - assert out.pages[0].predictions[CLASS_NAME][0].value == "Mr." - geometry_mr = np.array([[0.1083984375, 0.0634765625], [0.1494140625, 0.0859375]]) - assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][0].geometry), geometry_mr, rtol=0.05) - - assert out.pages[0].predictions[CLASS_NAME][3].value == "revised" - geometry_revised = np.array([[0.7548828125, 0.126953125], [0.8388671875, 0.1484375]]) - assert np.allclose(np.array(out.pages[0].predictions[CLASS_NAME][3].geometry), geometry_revised, rtol=0.05) - - det_predictor = detection_predictor( - "db_resnet50", - pretrained=True, - batch_size=2, - assume_straight_pages=True, - preserve_aspect_ratio=True, - symmetric_pad=True, - ) - - predictor = KIEPredictor( - det_predictor, - reco_predictor, - assume_straight_pages=True, - straighten_pages=True, - preserve_aspect_ratio=True, - symmetric_pad=True, - ) - - out = predictor(doc) - - assert isinstance(out, KIEDocument) - assert out.pages[0].predictions[CLASS_NAME][0].value == "Mr." - - -def _test_predictor(predictor): - # Output checks - assert isinstance(predictor, OCRPredictor) - - doc = [np.zeros((512, 512, 3), dtype=np.uint8)] - out = predictor(doc) - # Document - assert isinstance(out, Document) - - # The input doc has 1 page - assert len(out.pages) == 1 - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - -def _test_kiepredictor(predictor): - # Output checks - assert isinstance(predictor, KIEPredictor) - - doc = [np.zeros((512, 512, 3), dtype=np.uint8)] - out = predictor(doc) - # Document - assert isinstance(out, KIEDocument) - - # The input doc has 1 page - assert len(out.pages) == 1 - # Dimension check - with pytest.raises(ValueError): - input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8) - _ = predictor([input_page]) - - -@pytest.mark.parametrize( - "det_arch, reco_arch", - [ - ["db_mobilenet_v3_large", "crnn_vgg16_bn"], - ], -) -def test_zoo_models(det_arch, reco_arch): - # Model - predictor = models.ocr_predictor(det_arch, reco_arch, pretrained=True) - _test_predictor(predictor) - - # passing model instance directly - det_model = detection.__dict__[det_arch](pretrained=True) - reco_model = recognition.__dict__[reco_arch](pretrained=True) - predictor = models.ocr_predictor(det_model, reco_model) - _test_predictor(predictor) - - # passing recognition model as detection model - with pytest.raises(ValueError): - models.ocr_predictor(det_arch=reco_model, pretrained=True) - - # passing detection model as recognition model - with pytest.raises(ValueError): - models.ocr_predictor(reco_arch=det_model, pretrained=True) - - # KIE predictor - predictor = models.kie_predictor(det_arch, reco_arch, pretrained=True) - _test_kiepredictor(predictor) - - # passing model instance directly - det_model = detection.__dict__[det_arch](pretrained=True) - reco_model = recognition.__dict__[reco_arch](pretrained=True) - predictor = models.kie_predictor(det_model, reco_model) - _test_kiepredictor(predictor) - - # passing recognition model as detection model - with pytest.raises(ValueError): - models.kie_predictor(det_arch=reco_model, pretrained=True) - - # passing detection model as recognition model - with pytest.raises(ValueError): - models.kie_predictor(reco_arch=det_model, pretrained=True) diff --git a/tests/tensorflow/test_transforms_tf.py b/tests/tensorflow/test_transforms_tf.py deleted file mode 100644 index d1db73d..0000000 --- a/tests/tensorflow/test_transforms_tf.py +++ /dev/null @@ -1,492 +0,0 @@ -import math - -import numpy as np -import pytest -import tensorflow as tf -from doctr import transforms as T -from doctr.transforms.functional import crop_detection, rotate_sample - - -def test_resize(): - output_size = (32, 32) - transfo = T.Resize(output_size) - input_t = tf.cast(tf.fill([64, 64, 3], 1), dtype=tf.float32) - out = transfo(input_t) - - assert tf.math.reduce_all(tf.math.abs(out - 1) < 1e-6) - assert out.shape[:2] == output_size - assert repr(transfo) == f"Resize(output_size={output_size}, method='bilinear')" - - transfo = T.Resize(output_size, preserve_aspect_ratio=True) - input_t = tf.cast(tf.fill([32, 64, 3], 1), dtype=tf.float32) - out = transfo(input_t) - - assert not tf.reduce_all(out == 1) - # Asymetric padding - assert tf.reduce_all(out[-1] == 0) and tf.math.reduce_all(tf.math.abs(out[0] - 1) < 1e-6) - assert out.shape[:2] == output_size - - # Symetric padding - transfo = T.Resize(output_size, preserve_aspect_ratio=True, symmetric_pad=True) - assert repr(transfo) == ( - f"Resize(output_size={output_size}, method='bilinear', " f"preserve_aspect_ratio=True, symmetric_pad=True)" - ) - out = transfo(input_t) - # Asymetric padding - assert tf.reduce_all(out[-1] == 0) and tf.reduce_all(out[0] == 0) - - # Inverse aspect ratio - input_t = tf.cast(tf.fill([64, 32, 3], 1), dtype=tf.float32) - out = transfo(input_t) - - assert not tf.reduce_all(out == 1) - assert out.shape[:2] == output_size - - # FP16 - input_t = tf.cast(tf.fill([64, 64, 3], 1), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -def test_compose(): - output_size = (16, 16) - transfo = T.Compose([T.Resize((32, 32)), T.Resize(output_size)]) - input_t = tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1) - out = transfo(input_t) - - assert out.shape[:2] == output_size - assert len(repr(transfo).split("\n")) == 6 - - -@pytest.mark.parametrize( - "input_shape", - [ - [8, 32, 32, 3], - [32, 32, 3], - [32, 3], - ], -) -def test_normalize(input_shape): - mean, std = [0.5, 0.5, 0.5], [0.5, 0.5, 0.5] - transfo = T.Normalize(mean, std) - input_t = tf.cast(tf.fill(input_shape, 1), dtype=tf.float32) - - out = transfo(input_t) - - assert tf.reduce_all(out == 1) - assert repr(transfo) == f"Normalize(mean={mean}, std={std})" - - # FP16 - input_t = tf.cast(tf.fill(input_shape, 1), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -def test_lambatransformation(): - transfo = T.LambdaTransformation(lambda x: x / 2) - input_t = tf.cast(tf.fill([8, 32, 32, 3], 1), dtype=tf.float32) - out = transfo(input_t) - - assert tf.reduce_all(out == 0.5) - - -def test_togray(): - transfo = T.ToGray() - r = tf.fill([8, 32, 32, 1], 0.2) - g = tf.fill([8, 32, 32, 1], 0.6) - b = tf.fill([8, 32, 32, 1], 0.7) - input_t = tf.cast(tf.concat([r, g, b], axis=-1), dtype=tf.float32) - out = transfo(input_t) - - assert tf.reduce_all(out <= 0.51) - assert tf.reduce_all(out >= 0.49) - - # FP16 - input_t = tf.cast(tf.concat([r, g, b], axis=-1), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -@pytest.mark.parametrize( - "rgb_min", - [ - 0.2, - 0.4, - 0.6, - ], -) -def test_invert_colorize(rgb_min): - transfo = T.ColorInversion(min_val=rgb_min) - input_t = tf.cast(tf.fill([8, 32, 32, 3], 1), dtype=tf.float32) - out = transfo(input_t) - assert tf.reduce_all(out <= 1 - rgb_min + 1e-4) - assert tf.reduce_all(out >= 0) - - input_t = tf.cast(tf.fill([8, 32, 32, 3], 255), dtype=tf.uint8) - out = transfo(input_t) - assert tf.reduce_all(out <= int(math.ceil(255 * (1 - rgb_min)))) - assert tf.reduce_all(out >= 0) - - # FP16 - input_t = tf.cast(tf.fill([8, 32, 32, 3], 1), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -def test_brightness(): - transfo = T.RandomBrightness(max_delta=0.1) - input_t = tf.cast(tf.fill([8, 32, 32, 3], 0.5), dtype=tf.float32) - out = transfo(input_t) - - assert tf.reduce_all(out >= 0.4) - assert tf.reduce_all(out <= 0.6) - - # FP16 - input_t = tf.cast(tf.fill([8, 32, 32, 3], 0.5), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -def test_contrast(): - transfo = T.RandomContrast(delta=0.2) - input_t = tf.cast(tf.fill([8, 32, 32, 3], 0.5), dtype=tf.float32) - out = transfo(input_t) - - assert tf.reduce_all(out == 0.5) - - # FP16 - if any(tf.config.list_physical_devices("GPU")): - input_t = tf.cast(tf.fill([8, 32, 32, 3], 0.5), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -def test_saturation(): - transfo = T.RandomSaturation(delta=0.2) - input_t = tf.cast(tf.fill([8, 32, 32, 3], 0.5), dtype=tf.float32) - input_t = tf.image.hsv_to_rgb(input_t) - out = transfo(input_t) - hsv = tf.image.rgb_to_hsv(out) - - assert tf.reduce_all(hsv[:, :, :, 1] >= 0.4) - assert tf.reduce_all(hsv[:, :, :, 1] <= 0.6) - - # FP16 - if any(tf.config.list_physical_devices("GPU")): - input_t = tf.cast(tf.fill([8, 32, 32, 3], 0.5), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -def test_hue(): - transfo = T.RandomHue(max_delta=0.2) - input_t = tf.cast(tf.fill([8, 32, 32, 3], 0.5), dtype=tf.float32) - input_t = tf.image.hsv_to_rgb(input_t) - out = transfo(input_t) - hsv = tf.image.rgb_to_hsv(out) - - assert tf.reduce_all(hsv[:, :, :, 0] <= 0.7) - assert tf.reduce_all(hsv[:, :, :, 0] >= 0.3) - - # FP16 - if any(tf.config.list_physical_devices("GPU")): - input_t = tf.cast(tf.fill([8, 32, 32, 3], 0.5), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -def test_gamma(): - transfo = T.RandomGamma(min_gamma=1.0, max_gamma=2.0, min_gain=0.8, max_gain=1.0) - input_t = tf.cast(tf.fill([8, 32, 32, 3], 2.0), dtype=tf.float32) - out = transfo(input_t) - - assert tf.reduce_all(out >= 1.6) - assert tf.reduce_all(out <= 4.0) - - # FP16 - input_t = tf.cast(tf.fill([8, 32, 32, 3], 2.0), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -def test_jpegquality(): - transfo = T.RandomJpegQuality(min_quality=50) - input_t = tf.cast(tf.fill([32, 32, 3], 1), dtype=tf.float32) - out = transfo(input_t) - assert out.shape == input_t.shape - - # FP16 - input_t = tf.cast(tf.fill([32, 32, 3], 1), dtype=tf.float16) - out = transfo(input_t) - assert out.dtype == tf.float16 - - -def test_rotate_sample(): - img = tf.ones((200, 100, 3), dtype=tf.float32) - boxes = np.array([0, 0, 100, 200])[None, ...] - polys = np.stack((boxes[..., [0, 1]], boxes[..., [2, 1]], boxes[..., [2, 3]], boxes[..., [0, 3]]), axis=1) - rel_boxes = np.array([0, 0, 1, 1], dtype=np.float32)[None, ...] - rel_polys = np.stack( - (rel_boxes[..., [0, 1]], rel_boxes[..., [2, 1]], rel_boxes[..., [2, 3]], rel_boxes[..., [0, 3]]), axis=1 - ) - - # No angle - rotated_img, rotated_geoms = rotate_sample(img, boxes, 0, False) - assert tf.math.reduce_all(rotated_img == img) and np.all(rotated_geoms == rel_polys) - rotated_img, rotated_geoms = rotate_sample(img, boxes, 0, True) - assert tf.math.reduce_all(rotated_img == img) and np.all(rotated_geoms == rel_polys) - rotated_img, rotated_geoms = rotate_sample(img, polys, 0, False) - assert tf.math.reduce_all(rotated_img == img) and np.all(rotated_geoms == rel_polys) - rotated_img, rotated_geoms = rotate_sample(img, polys, 0, True) - assert tf.math.reduce_all(rotated_img == img) and np.all(rotated_geoms == rel_polys) - - # No expansion - expected_img = np.zeros((200, 100, 3), dtype=np.float32) - expected_img[50:150] = 1 - expected_img = tf.convert_to_tensor(expected_img) - expected_polys = np.array([[0, 0.75], [0, 0.25], [1, 0.25], [1, 0.75]])[None, ...] - rotated_img, rotated_geoms = rotate_sample(img, boxes, 90, False) - assert tf.math.reduce_all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, polys, 90, False) - assert tf.math.reduce_all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, rel_boxes, 90, False) - assert tf.math.reduce_all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, rel_polys, 90, False) - assert tf.math.reduce_all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - - # Expansion - expected_img = tf.ones((100, 200, 3), dtype=tf.float32) - expected_polys = np.array([[0, 1], [0, 0], [1, 0], [1, 1]], dtype=np.float32)[None, ...] - rotated_img, rotated_geoms = rotate_sample(img, boxes, 90, True) - assert tf.math.reduce_all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, polys, 90, True) - assert tf.math.reduce_all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, rel_boxes, 90, True) - assert tf.math.reduce_all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - rotated_img, rotated_geoms = rotate_sample(img, rel_polys, 90, True) - assert tf.math.reduce_all(rotated_img == expected_img) and np.all(rotated_geoms == expected_polys) - - with pytest.raises(AssertionError): - rotate_sample(img, boxes[None, ...], 90, False) - - -def test_random_rotate(): - rotator = T.RandomRotate(max_angle=10.0, expand=False) - input_t = tf.ones((50, 50, 3), dtype=tf.float32) - boxes = np.array([[15, 20, 35, 30]]) - r_img, _r_boxes = rotator(input_t, boxes) - assert r_img.shape == input_t.shape - - rotator = T.RandomRotate(max_angle=10.0, expand=True) - r_img, _r_boxes = rotator(input_t, boxes) - assert r_img.shape != input_t.shape - - # FP16 - input_t = tf.ones((50, 50, 3), dtype=tf.float16) - r_img, _ = rotator(input_t, boxes) - assert r_img.dtype == tf.float16 - - -def test_crop_detection(): - img = tf.ones((50, 50, 3), dtype=tf.float32) - abs_boxes = np.array([ - [15, 20, 35, 30], - [5, 10, 10, 20], - ]) - crop_box = (12 / 50, 23 / 50, 1.0, 1.0) - c_img, c_boxes = crop_detection(img, abs_boxes, crop_box) - assert c_img.shape == (26, 37, 3) - assert c_boxes.shape == (1, 4) - assert np.all(c_boxes == np.array([15 - 12, 0, 35 - 12, 30 - 23])[None, ...]) - - rel_boxes = np.array([ - [0.3, 0.4, 0.7, 0.6], - [0.1, 0.2, 0.2, 0.4], - ]) - c_img, c_boxes = crop_detection(img, rel_boxes, crop_box) - assert c_img.shape == (26, 37, 3) - assert c_boxes.shape == (1, 4) - assert np.abs(c_boxes - np.array([0.06 / 0.76, 0.0, 0.46 / 0.76, 0.14 / 0.54])[None, ...]).mean() < 1e-7 - - # FP16 - img = tf.ones((50, 50, 3), dtype=tf.float16) - c_img, _ = crop_detection(img, rel_boxes, crop_box) - assert c_img.dtype == tf.float16 - - with pytest.raises(AssertionError): - crop_detection(img, abs_boxes, (2, 6, 24, 56)) - - -@pytest.mark.parametrize( - "target", - [ - np.array([[15, 20, 35, 30]]), # box - np.array([[[15, 20], [35, 20], [35, 30], [15, 30]]]), # polygon - ], -) -def test_random_crop(target): - transfo = T.RandomCrop(scale=(0.5, 1.0), ratio=(0.75, 1.33)) - input_t = tf.ones((50, 50, 3), dtype=tf.float32) - img, target = transfo(input_t, target) - # Check the scale (take a margin) - assert img.shape[0] * img.shape[1] >= 0.4 * input_t.shape[0] * input_t.shape[1] - # Check aspect ratio (take a margin) - assert 0.65 <= img.shape[0] / img.shape[1] <= 1.5 - # Check the target - assert np.all(target >= 0) - if target.ndim == 2: - assert np.all(target[:, [0, 2]] <= img.shape[-1]) and np.all(target[:, [1, 3]] <= img.shape[-2]) - else: - assert np.all(target[..., 0] <= img.shape[-1]) and np.all(target[..., 1] <= img.shape[-2]) - - -def test_gaussian_blur(): - blur = T.GaussianBlur(3, (0.1, 3)) - input_t = np.ones((31, 31, 3), dtype=np.float32) - input_t[15, 15] = 0 - blur_img = blur(tf.convert_to_tensor(input_t)).numpy() - assert blur_img.shape == input_t.shape - assert np.all(blur_img[15, 15] > 0) - - -@pytest.mark.parametrize( - "input_dtype, input_size", - [ - [tf.float32, (32, 32, 3)], - [tf.uint8, (32, 32, 3)], - ], -) -def test_channel_shuffle(input_dtype, input_size): - transfo = T.ChannelShuffle() - input_t = tf.random.uniform(input_size, dtype=tf.float32) - if input_dtype == tf.uint8: - input_t = tf.math.round(255 * input_t) - input_t = tf.cast(input_t, dtype=input_dtype) - out = transfo(input_t) - assert isinstance(out, tf.Tensor) - assert out.shape == input_size - assert out.dtype == input_dtype - # Ensure that nothing has changed apart from channel order - assert tf.math.reduce_all(tf.math.reduce_sum(input_t, -1) == tf.math.reduce_sum(out, -1)) - - -@pytest.mark.parametrize( - "input_dtype,input_shape", - [ - [tf.float32, (32, 32, 3)], - [tf.uint8, (32, 32, 3)], - ], -) -def test_gaussian_noise(input_dtype, input_shape): - transform = T.GaussianNoise(0.0, 1.0) - input_t = tf.random.uniform(input_shape, dtype=tf.float32) - if input_dtype == tf.uint8: - input_t = tf.math.round((255 * input_t)) - input_t = tf.cast(input_t, dtype=input_dtype) - transformed = transform(input_t) - assert isinstance(transformed, tf.Tensor) - assert transformed.shape == input_shape - assert transformed.dtype == input_dtype - assert tf.math.reduce_any(transformed != input_t) - assert tf.math.reduce_all(transformed >= 0) - if input_dtype == tf.uint8: - assert tf.reduce_all(transformed <= 255) - else: - assert tf.reduce_all(transformed <= 1.0) - - -@pytest.mark.parametrize( - "p,target", - [ - [1, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)], - [0, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)], - [1, np.array([[[0.1, 0.1], [0.3, 0.1], [0.3, 0.4], [0.1, 0.4]]], dtype=np.float32)], - [0, np.array([[[0.1, 0.1], [0.3, 0.1], [0.3, 0.4], [0.1, 0.4]]], dtype=np.float32)], - ], -) -def test_randomhorizontalflip(p, target): - # testing for 2 cases, with flip probability 1 and 0. - transform = T.RandomHorizontalFlip(p) - input_t = np.ones((32, 32, 3)) - input_t[:, :16, :] = 0 - input_t = tf.convert_to_tensor(input_t) - transformed, _target = transform(input_t, target) - assert isinstance(transformed, tf.Tensor) - assert transformed.shape == input_t.shape - assert transformed.dtype == input_t.dtype - # integrity check of targets - assert isinstance(_target, np.ndarray) - assert _target.dtype == np.float32 - if _target.ndim == 2: - if p == 1: - assert np.all(_target == np.array([[0.7, 0.1, 0.9, 0.4]], dtype=np.float32)) - assert tf.reduce_all( - tf.math.reduce_mean(transformed, (0, 2)) == tf.constant([1] * 16 + [0] * 16, dtype=tf.float64) - ) - elif p == 0: - assert np.all(_target == np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)) - assert tf.reduce_all( - tf.math.reduce_mean(transformed, (0, 2)) == tf.constant([0] * 16 + [1] * 16, dtype=tf.float64) - ) - else: - if p == 1: - assert np.all(_target == np.array([[[0.9, 0.1], [0.7, 0.1], [0.7, 0.4], [0.9, 0.4]]], dtype=np.float32)) - assert tf.reduce_all( - tf.math.reduce_mean(transformed, (0, 2)) == tf.constant([1] * 16 + [0] * 16, dtype=tf.float64) - ) - elif p == 0: - assert np.all(_target == np.array([[[0.1, 0.1], [0.3, 0.1], [0.3, 0.4], [0.1, 0.4]]], dtype=np.float32)) - assert tf.reduce_all( - tf.math.reduce_mean(transformed, (0, 2)) == tf.constant([0] * 16 + [1] * 16, dtype=tf.float64) - ) - - -@pytest.mark.parametrize( - "input_dtype,input_shape", - [ - [tf.float32, (32, 32, 3)], - [tf.uint8, (32, 32, 3)], - [tf.float32, (64, 32, 3)], - [tf.uint8, (64, 32, 3)], - ], -) -def test_random_shadow(input_dtype, input_shape): - transform = T.RandomShadow((0.2, 0.8)) - input_t = tf.random.uniform(input_shape, dtype=tf.float32) - if input_dtype == tf.uint8: - input_t = tf.math.round((255 * input_t)) - input_t = tf.cast(input_t, dtype=input_dtype) - transformed = transform(input_t) - assert isinstance(transformed, tf.Tensor) - assert transformed.shape == input_shape - assert transformed.dtype == input_dtype - # The shadow will darken the picture - assert tf.math.reduce_mean(input_t) >= tf.math.reduce_mean(transformed) - assert tf.math.reduce_all(transformed >= 0) - if input_dtype == tf.uint8: - assert tf.reduce_all(transformed <= 255) - else: - assert tf.reduce_all(transformed <= 1.0) - - -@pytest.mark.parametrize( - "p,target", - [ - [1, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)], - [0, np.array([[0.1, 0.1, 0.3, 0.4]], dtype=np.float32)], - [1, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)], - [0, np.array([[[0.1, 0.8], [0.3, 0.1], [0.3, 0.4], [0.8, 0.4]]], dtype=np.float32)], - ], -) -def test_random_resize(p, target): - transfo = T.RandomResize(scale_range=(0.3, 1.3), p=p) - assert repr(transfo) == f"RandomResize(scale_range=(0.3, 1.3), p={p})" - - img = tf.random.uniform((64, 64, 3)) - # Apply the transformation - out_img, out_target = transfo(img, target) - assert isinstance(out_img, tf.Tensor) - assert isinstance(out_target, np.ndarray) - # Resize is already well-tested - assert tf.reduce_all(tf.equal(out_img, img)) if p == 0 else out_img.shape != img.shape - assert out_target.shape == target.shape