Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

community: Add configurable VisualFeatures to the AzureAiServicesImageAnalysisTool #27444

Merged
merged 9 commits into from
Dec 16, 2024
1 change: 1 addition & 0 deletions libs/community/extended_testing_deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ atlassian-python-api>=3.36.0,<4
azure-ai-documentintelligence>=1.0.0b1,<2
azure-identity>=1.15.0,<2
azure-search-documents==11.4.0
azure.ai.vision.imageanalysis>=1.0.0,<2
beautifulsoup4>=4,<5
bibtexparser>=1.4.0,<2
cassio>=0.1.6,<0.2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,31 @@ class AzureAiServicesImageAnalysisTool(BaseTool): # type: ignore[override]
"""Tool that queries the Azure AI Services Image Analysis API.

In order to set this up, follow instructions at:
https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
https://learn.microsoft.com/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40

Attributes:
azure_ai_services_key (Optional[str]): The API key for Azure AI Services.
azure_ai_services_endpoint (Optional[str]): The endpoint URL for Azure AI Services.
visual_features Any: The visual features to analyze in the image, can be set as
either strings or azure.ai.vision.imageanalysis.models.VisualFeatures.
(e.g. 'TAGS', VisualFeatures.CAPTION).
image_analysis_client (Any): The client for interacting
with Azure AI Services Image Analysis.
name (str): The name of the tool.
description (str): A description of the tool,
including its purpose and expected input.
"""

azure_ai_services_key: str = "" #: :meta private:
azure_ai_services_endpoint: str = "" #: :meta private:
image_analysis_client: Any #: :meta private:
visual_features: Any #: :meta private:
azure_ai_services_key: Optional[str] = None #: :meta private:
azure_ai_services_endpoint: Optional[str] = None #: :meta private:
visual_features: Any = None
image_analysis_client: Any = None #: :meta private:

name: str = "azure_ai_services_image_analysis"
description: str = (
"A wrapper around Azure AI Services Image Analysis. "
"Useful for when you need to analyze images. "
"Input should be a url to an image."
"Input must be a url string or path string to an image."
)

@model_validator(mode="before")
Expand Down Expand Up @@ -68,13 +80,16 @@ def validate_environment(cls, values: Dict) -> Any:
f"Initialization of Azure AI Vision Image Analysis client failed: {e}"
)

values["visual_features"] = [
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.CAPTION,
VisualFeatures.READ,
]

visual_features = values.get(
"visual_features",
[
VisualFeatures.TAGS,
VisualFeatures.OBJECTS,
VisualFeatures.CAPTION,
VisualFeatures.READ,
],
)
values["visual_features"] = visual_features
return values

def _image_analysis(self, image_path: str) -> Dict:
Expand Down Expand Up @@ -115,6 +130,17 @@ def _image_analysis(self, image_path: str) -> Dict:
if result.read is not None and len(result.read.blocks) > 0:
res_dict["text"] = [line.text for line in result.read.blocks[0].lines]

if result.dense_captions is not None and len(result.dense_captions) > 0:
res_dict["dense_captions"] = [
str(dc) for dc in result.dense_captions.list
]

if result.smart_crops is not None and len(result.smart_crops) > 0:
res_dict["smart_crops"] = [str(sc) for sc in result.smart_crops.list]

if result.people is not None and len(result.people) > 0:
res_dict["people"] = [str(p) for p in result.people.list]

return res_dict

def _format_image_analysis_result(self, image_analysis_result: Dict) -> str:
Expand All @@ -136,6 +162,21 @@ def _format_image_analysis_result(self, image_analysis_result: Dict) -> str:
if "text" in image_analysis_result and len(image_analysis_result["text"]) > 0:
formatted_result.append("Text: " + ", ".join(image_analysis_result["text"]))

if "dense_captions" in image_analysis_result:
formatted_result.append(
"Dense Captions: " + ", ".join(image_analysis_result["dense_captions"])
)

if "smart_crops" in image_analysis_result:
formatted_result.append(
"Smart Crops: " + ", ".join(image_analysis_result["smart_crops"])
)

if "people" in image_analysis_result:
formatted_result.append(
"People: " + ", ".join(image_analysis_result["people"])
)

return "\n".join(formatted_result)

def _run(
Expand Down
Binary file added libs/community/tests/examples/building.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""Tests for the Azure AI Services Image Analysis Tool."""

from pathlib import Path
from typing import Any

import pytest

from langchain_community.tools.azure_ai_services.image_analysis import (
AzureAiServicesImageAnalysisTool,
)

this_dir = Path(__file__).parents[3]

examples_dir = this_dir / "examples"
building_path = examples_dir / "building.jpg"


@pytest.mark.requires("azure.ai.vision.imageanalysis")
def test_content_safety(mocker: Any) -> None:
mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)

key = "key"
endpoint = "endpoint"

tool = AzureAiServicesImageAnalysisTool(
azure_ai_services_key=key, azure_ai_services_endpoint=endpoint
)
assert tool.azure_ai_services_key == key
assert tool.azure_ai_services_endpoint == endpoint


@pytest.mark.requires("azure.ai.vision.imageanalysis")
def test_local_image_analysis(mocker: Any) -> None:
key = "key"
endpoint = "endpoint"

mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
mocker.patch(
"langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
return_value="local",
)

tool = AzureAiServicesImageAnalysisTool(
azure_ai_services_key=key,
azure_ai_services_endpoint=endpoint,
visual_features=["CAPTION"],
)

mock_content_client = mocker.Mock()
mock_content_client.analyze.return_value = mocker.Mock()
mock_content_client.analyze.return_value.caption.text = "A building corner."

mock_content_client.analyze.return_value.objects = None
mock_content_client.analyze.return_value.tags = None
mock_content_client.analyze.return_value.read = None
mock_content_client.analyze.return_value.dense_captions = None
mock_content_client.analyze.return_value.smart_crops = None
mock_content_client.analyze.return_value.people = None

tool.image_analysis_client = mock_content_client

input = str(building_path)
output = "Caption: A building corner."

result = tool._run(input)
assert result == output


@pytest.mark.requires("azure.ai.vision.imageanalysis")
def test_local_image_different_features(mocker: Any) -> None:
key = "key"
endpoint = "endpoint"

mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
mocker.patch(
"langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
return_value="local",
)

tool = AzureAiServicesImageAnalysisTool(
azure_ai_services_key=key,
azure_ai_services_endpoint=endpoint,
visual_features=["PEOPLE", "CAPTION", "SMARTCROPS"],
)

mock_content_client = mocker.Mock()
mock_content_client.analyze.return_value = mocker.Mock()
mock_content_client.analyze.return_value.caption.text = "A building corner."

mock_content_client.analyze.return_value.objects = None
mock_content_client.analyze.return_value.tags = None
mock_content_client.analyze.return_value.read = None
mock_content_client.analyze.return_value.dense_captions = None

mock_smart_crops = mocker.MagicMock()
mock_smart_crops.list = [
{"aspectRatio": 1.97, "boundingBox": {"x": 43, "y": 24, "w": 853, "h": 432}}
]
mock_smart_crops.__len__.return_value = 1
mock_content_client.analyze.return_value.smart_crops = mock_smart_crops

mock_people = mocker.MagicMock()
mock_people.list = [
{
"boundingBox": {"x": 454, "y": 44, "w": 408, "h": 531},
"confidence": 0.9601945281028748,
},
]
mock_people.__len__.return_value = 1
mock_content_client.analyze.return_value.people = mock_people

tool.image_analysis_client = mock_content_client

input = str(building_path)
output = (
"Caption: A building corner.\n"
"Smart Crops: {'aspectRatio': 1.97,"
" 'boundingBox': {'x': 43, 'y': 24, 'w': 853, 'h': 432}}\n"
"People: {'boundingBox': {'x': 454, 'y': 44, 'w': 408, 'h': 531},"
" 'confidence': 0.9601945281028748}"
)

result = tool._run(input)
assert result == output
Loading