From e8782d24f38e0c32697823ed6a63c82c5d0cc604 Mon Sep 17 00:00:00 2001 From: Setu Shah Date: Tue, 17 Dec 2024 10:37:14 -0800 Subject: [PATCH] chore(deps): Create separate group for CV dependencies (#882) --- dataquality/__init__.py | 2 +- .../data_logger/image_classification.py | 4 +- poetry.lock | 9 ++-- pyproject.toml | 54 ++++++++++--------- tasks.py | 3 +- 5 files changed, 39 insertions(+), 33 deletions(-) diff --git a/dataquality/__init__.py b/dataquality/__init__.py index 0dda4b4fc..b6d5ce384 100644 --- a/dataquality/__init__.py +++ b/dataquality/__init__.py @@ -30,7 +30,7 @@ dataquality.get_insights() """ -__version__ = "2.2.1" +__version__ = "2.3.0" import sys from typing import Any, List, Optional diff --git a/dataquality/loggers/data_logger/image_classification.py b/dataquality/loggers/data_logger/image_classification.py index 7588bbf62..bfaa30e0d 100644 --- a/dataquality/loggers/data_logger/image_classification.py +++ b/dataquality/loggers/data_logger/image_classification.py @@ -25,7 +25,6 @@ from dataquality.schemas.cv import GAL_LOCAL_IMAGES_PATHS from dataquality.schemas.dataframe import BaseLoggerDataFrames from dataquality.schemas.split import Split -from dataquality.utils.cv_smart_features import generate_smart_features from dataquality.utils.upload import chunk_load_then_upload_df # smaller than ITER_CHUNK_SIZE from base_data_logger because very large chunks @@ -421,6 +420,9 @@ def add_cv_smart_features(cls, in_frame: DataFrame, split: str) -> DataFrame: if GAL_LOCAL_IMAGES_PATHS not in in_frame.get_column_names(): return in_frame + # Import here because `imagededup` is not a required dependency. + from dataquality.utils.cv_smart_features import generate_smart_features + print( f"🔲 Calculating Smart Features for split {split} (can take a few minutes " "depending on the size of your dataset)" diff --git a/poetry.lock b/poetry.lock index 2c0733056..68d3e5109 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand. [[package]] name = "absl-py" @@ -1975,7 +1975,7 @@ files = [ name = "imagededup" version = "0.3.1" description = "Package for image deduplication" -optional = false +optional = true python-versions = "*" files = [ {file = "imagededup-0.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5200577e44536d659039e6cc491024cca23b80237d8fd1c2091f436cd21ba62b"}, @@ -4533,7 +4533,7 @@ files = [ name = "pywavelets" version = "1.6.0" description = "PyWavelets, wavelet transform module" -optional = false +optional = true python-versions = ">=3.9" files = [ {file = "pywavelets-1.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ddc1ff5ad706313d930f857f9656f565dfb81b85bbe58a9db16ad8fa7d1537c5"}, @@ -7411,10 +7411,11 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-it cuda = ["cudf-cu12", "cuml-cu12", "dask-cudf-cu12", "pylibraft-cu12", "raft-dask-cu12", "rmm-cu12", "ucx-py-cu12"] cuda11 = [] cuda12 = ["cudf-cu12", "cuml-cu12", "dask-cudf-cu12", "pylibraft-cu12", "raft-dask-cu12", "rmm-cu12", "ucx-py-cu12"] +cv = ["Pillow", "imagededup", "opencv-python"] minio = ["minio"] setfit = ["setfit"] [metadata] lock-version = "2.0" python-versions = "^3.9,<3.11" -content-hash = "3eb4814e38ec5f561d112aba3659a9fe36081b4ebdd5c78a7a7d428d0376327c" +content-hash = "e176c523d121ed27bc444c7b30abbdc5ffb21e6ab58245fe11c0f5a570126830" diff --git a/pyproject.toml b/pyproject.toml index aff2332bf..d52b6c3fd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "dataquality" -version = "2.2.1" +version = "2.3.0" description = "" authors = ["Galileo Technologies, Inc. "] readme = "README.md" @@ -17,7 +17,6 @@ scripts = { dqyolo = "dataquality.dqyolo:main" } python = "^3.9,<3.11" pydantic = ">=2.0.0" requests = ">=2.25.1" -types-requests = ">=2.25.2" pandas = ">=0.20.0" pyarrow = ">=5.0.0" vaex-core = "4.17.1" @@ -34,43 +33,40 @@ datasets = ">=2.14.6" transformers = ">=4.17.0" seqeval = "*" sentence-transformers = ">=2.2" -Pillow = "*" h5py = ">=3.1.0" numpy = "<1.24.0" tenacity = ">=8.1.0" evaluate = "*" accelerate = "*" ipywidgets = ">=8.1.0" -imagededup = ">=0.3.1,<0.3.2" pyjwt = ">=2.8.0" peft = "*" -# Pin opencv for linting incompatibility -opencv-python = "<=4.8.1.78" pydantic-settings = ">=2.0.0" -minio = {version = ">=7.1.0,<7.2.0", optional=true} -setfit = {version ="==0.7.0", optional = true} sentencepiece = "^0.2.0" -# cuda dependencies -ucx-py-cu12 = { version ="==0.36", source = "nvidia", optional = true } -rmm-cu12 = {version = "==24.2.0", source = "nvidia", optional=true} -raft-dask-cu12 = {version = "==24.2.0", source = "nvidia", optional=true} -pylibraft-cu12 = {version = "==24.2.0", source = "nvidia", optional=true} -dask-cudf-cu12 = {version = "==24.2.0", source = "nvidia", optional=true} -cudf-cu12 = {version = "==24.2.0", source = "nvidia", optional=true} -cuml-cu12 = {version = "==24.2.0", source = "nvidia", optional=true} -tensorflow = ">=2.9.1,<2.15.0" - +minio = { version = ">=7.1.0,<7.2.0", optional = true } +setfit = { version = "==0.7.0", optional = true } +# * CV dependencies. +imagededup = { version = ">=0.3.1,<0.3.2", optional = true } +Pillow = { version = "*", optional = true } +# Pin opencv for linting incompatibility +opencv-python = { version = "<=4.8.1.78", optional = true } -[tool.poetry.group.dev.dependencies] -mypy = "^1.8.0" -invoke = ">=1.6.0" -pre-commit = "^3.6.1" +# * Cuda dependencies. +ucx-py-cu12 = { version = "==0.36", source = "nvidia", optional = true } +rmm-cu12 = { version = "==24.2.0", source = "nvidia", optional = true } +raft-dask-cu12 = { version = "==24.2.0", source = "nvidia", optional = true } +pylibraft-cu12 = { version = "==24.2.0", source = "nvidia", optional = true } +dask-cudf-cu12 = { version = "==24.2.0", source = "nvidia", optional = true } +cudf-cu12 = { version = "==24.2.0", source = "nvidia", optional = true } +cuml-cu12 = { version = "==24.2.0", source = "nvidia", optional = true } +tensorflow = ">=2.9.1,<2.15.0" [tool.poetry.extras] +cv = ["imagededup", "opencv-python", "Pillow"] cuda = [ "ucx-py-cu12", "rmm-cu12", @@ -78,7 +74,7 @@ cuda = [ "pylibraft-cu12", "dask-cudf-cu12", "cudf-cu12", - "cuml-cu12" + "cuml-cu12", ] cuda11 = [ "ucx-py-cu11", @@ -87,7 +83,7 @@ cuda11 = [ "pylibraft-cu11", "dask-cudf-cu11", "cudf-cu11", - "cuml-cu11" + "cuml-cu11", ] cuda12 = [ "ucx-py-cu12", @@ -96,7 +92,7 @@ cuda12 = [ "pylibraft-cu12", "dask-cudf-cu12", "cudf-cu12", - "cuml-cu12" + "cuml-cu12", ] minio = ["minio"] setfit = ["setfit"] @@ -123,6 +119,7 @@ pytest-env = ">=0.8.1" pytest-xdist = ">=2.4.0" types-setuptools = ">=67.3.0.1" types-cachetools = ">=4.2.4" +types-requests = ">=2.25.2" torchvision = ">=0.13.1" torch = ">=1.12.1" torchtext = ">=0.13.1" @@ -136,10 +133,15 @@ setfit = "==0.7.0" accelerate = ">=0.19.0" typing-inspect = "==0.8.0" typing-extensions = ">=4.9.0" -lightning = "^2.3.1" # Assuming you want the latest version as no version was specified +lightning = "^2.3.1" # Assuming you want the latest version as no version was specified pytest-mock = "^3.14.0" +[tool.poetry.group.dev.dependencies] +mypy = "^1.8.0" +invoke = ">=1.6.0" +pre-commit = "^3.6.1" + [[tool.poetry.source]] name = "nvidia" diff --git a/tasks.py b/tasks.py index 9bf4514e6..bf4431276 100644 --- a/tasks.py +++ b/tasks.py @@ -5,7 +5,8 @@ @task def install(ctx: Context) -> None: ctx.run( - "poetry install --extras minio --extras setfit --with test,dev --no-root", + "poetry install --extras minio --extras setfit --extras cv" + " --with test,dev --no-root", echo=True, )