Skip to content

Commit

Permalink
Merge branch 'release/0.2.1'
Browse files Browse the repository at this point in the history
  • Loading branch information
cpvannier committed Nov 13, 2023
2 parents 8ddec6d + 1887bc4 commit aaf6761
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 5 deletions.
13 changes: 12 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,22 @@ All notable changes to Pixano will be documented in this file.

## [Unreleased]



## [0.2.1] - 2023-11-13

### Added
- Add CLIP model for **semantic search** on images



## [0.2.0] - 2023-10-26

### Changed
- **Breaking:** Update models to the new **PixanoTypes** and **lancedb storage format** of Pixano 0.4.0



## [0.1.6] - 2023-07-10

### Added
Expand Down Expand Up @@ -81,7 +91,8 @@ All notable changes to Pixano will be documented in this file.



[Unreleased]: https://github.com/pixano/pixano-inference/compare/v0.2.0...develop
[Unreleased]: https://github.com/pixano/pixano/compare/main...develop
[0.2.1]: https://github.com/pixano/pixano-inference/compare/v0.2.0...v0.2.1
[0.2.0]: https://github.com/pixano/pixano-inference/compare/v0.1.6...v0.2.0
[0.1.6]: https://github.com/pixano/pixano-inference/compare/v0.1.5...v0.1.6
[0.1.5]: https://github.com/pixano/pixano-inference/compare/v0.1.4...v0.1.5
Expand Down
2 changes: 1 addition & 1 deletion pixano_inference/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,4 @@
#
# http://www.cecill.info

__version__ = "0.2.0"
__version__ = "0.2.1"
1 change: 1 addition & 0 deletions pixano_inference/segment_anything/segment_anything.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class SAM(InferenceModel):
device (str): Model GPU or CPU device (e.g. "cuda", "cpu")
description (str): Model description
model (torch.nn.Module): SAM model
checkpoint_path (Path): Model checkpoint path
"""

def __init__(
Expand Down
18 changes: 18 additions & 0 deletions pixano_inference/transformers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# @Copyright: CEA-LIST/DIASI/SIALV/LVA (2023)
# @Author: CEA-LIST/DIASI/SIALV/LVA <[email protected]>
# @License: CECILL-C
#
# This software is a collaborative computer program whose purpose is to
# generate and explore labeled data for computer vision applications.
# This software is governed by the CeCILL-C license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL-C
# license as circulated by CEA, CNRS and INRIA at the following URL
#
# http://www.cecill.info

from .clip import CLIP

__all__ = [
"CLIP",
]
118 changes: 118 additions & 0 deletions pixano_inference/transformers/clip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
# @Copyright: CEA-LIST/DIASI/SIALV/LVA (2023)
# @Author: CEA-LIST/DIASI/SIALV/LVA <[email protected]>
# @License: CECILL-C
#
# This software is a collaborative computer program whose purpose is to
# generate and explore labeled data for computer vision applications.
# This software is governed by the CeCILL-C license under French law and
# abiding by the rules of distribution of free software. You can use,
# modify and/ or redistribute the software under the terms of the CeCILL-C
# license as circulated by CEA, CNRS and INRIA at the following URL
#
# http://www.cecill.info


import numpy as np
import pyarrow as pa
from pixano.core import Image
from pixano.models import InferenceModel
from transformers import CLIPModel, CLIPProcessor, CLIPTokenizerFast


class CLIP(InferenceModel):
"""CLIP: Connecting text and images
Attributes:
name (str): Model name
id (str): Model ID
device (str): Model GPU or CPU device (e.g. "cuda", "cpu")
description (str): Model description
model (CLIPModel): CLIP model
processor (CLIPProcessor): CLIP processor
tokenizer (CLIPTokenizerFast): CLIP tokenizer
pretrained_model (str): Pretrained model name or path
"""

def __init__(
self,
pretrained_model: str = "openai/clip-vit-base-patch32",
id: str = "",
) -> None:
"""Initialize model
Args:
pretrained_model (str): Pretrained model name or path
id (str, optional): Previously used ID, generate new ID if "". Defaults to "".
"""

super().__init__(
name=f"CLIP",
id=id,
device="cpu",
description=f"From HuggingFace Transformers. CLIP: Connecting text and images. {pretrained_model}.",
)

# Model
self.model = CLIPModel.from_pretrained(pretrained_model)
self.processor = CLIPProcessor.from_pretrained(pretrained_model)
self.tokenizer = CLIPTokenizerFast.from_pretrained(pretrained_model)

# Model name or path
self.pretrained_model = pretrained_model

def precompute_embeddings(
self,
batch: pa.RecordBatch,
views: list[str],
uri_prefix: str,
) -> list[dict]:
"""Embedding precomputing for a batch
Args:
batch (pa.RecordBatch): Input batch
views (list[str]): Dataset views
uri_prefix (str): URI prefix for media files
Returns:
pa.RecordBatch: Embedding rows
"""

rows = [
{
"id": batch["id"][x].as_py(),
}
for x in range(batch.num_rows)
]

for view in views:
# Iterate manually
for x in range(batch.num_rows):
# Preprocess image
im = Image.from_dict(batch[view][x].as_py())
im.uri_prefix = uri_prefix
im = im.as_pillow()

# Inference
inputs = self.processor(images=im, padded=True, return_tensors="pt")
image_features = self.model.get_image_features(**inputs)
vect = image_features.detach().numpy()[0]

# Process model outputs
rows[x][view] = vect

return rows

def semantic_search(self, query: str) -> np.ndarray:
"""Process semantic search query with CLIP
Args:
query (str): Search query text
Returns:
np.ndarray: Search query vector
"""

inputs = self.tokenizer([query], padding=True, return_tensors="pt")
text_features = self.model.get_text_features(**inputs)

return text_features.detach().numpy()[0]
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@ classifiers = [
"License :: CeCILL-C Free Software License Agreement (CECILL-C)",
]
dependencies = [
"pixano >= 0.2.0",
"pixano ~= 0.4.0",
"torch >= 2.0.0",
"torchaudio >= 2.0.0",
"torchvision >= 0.15.0",
"tensorflow >= 2.12.0",
"tensorflow-hub >= 0.13.0",
"segment-anything@git+https://github.com/facebookresearch/segment-anything",
"transformers >= 4.33.0",
"gitpython >= 3.1.30",
"matplotlib >= 3.3",
"psutil",
Expand All @@ -42,8 +43,8 @@ dependencies = [

[project.optional-dependencies]
documentation = [
"mkdocs-material ~= 9.3.0",
"mkdocstrings-python ~= 1.6.0",
"mkdocs-material ~= 9.4.0",
"mkdocstrings-python ~= 1.7.0",
"mkdocs-gen-files ~= 0.5.0",
"mkdocs-literate-nav ~= 0.6.0",
]
Expand Down

0 comments on commit aaf6761

Please sign in to comment.