langchain-ai · ccurme · Dec 16, 2024 · Oct 7, 2024 · Oct 7, 2024 · Oct 8, 2024
diff --git a/libs/community/extended_testing_deps.txt b/libs/community/extended_testing_deps.txt
@@ -7,6 +7,7 @@ atlassian-python-api>=3.36.0,<4
 azure-ai-documentintelligence>=1.0.0b1,<2
 azure-identity>=1.15.0,<2
 azure-search-documents==11.4.0
+azure.ai.vision.imageanalysis>=1.0.0,<2
 beautifulsoup4>=4,<5
 bibtexparser>=1.4.0,<2
 cassio>=0.1.6,<0.2

diff --git a/libs/community/langchain_community/tools/azure_ai_services/image_analysis.py b/libs/community/langchain_community/tools/azure_ai_services/image_analysis.py
@@ -19,19 +19,31 @@ class AzureAiServicesImageAnalysisTool(BaseTool):  # type: ignore[override]
     """Tool that queries the Azure AI Services Image Analysis API.
 
     In order to set this up, follow instructions at:
-    https://learn.microsoft.com/en-us/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
+    https://learn.microsoft.com/azure/ai-services/computer-vision/quickstarts-sdk/image-analysis-client-library-40
+
+    Attributes:
+    azure_ai_services_key (Optional[str]): The API key for Azure AI Services.
+    azure_ai_services_endpoint (Optional[str]): The endpoint URL for Azure AI Services.
+    visual_features Any: The visual features to analyze in the image, can be set as
+        either strings or azure.ai.vision.imageanalysis.models.VisualFeatures.
+        (e.g. 'TAGS', VisualFeatures.CAPTION).
+    image_analysis_client (Any): The client for interacting
+        with Azure AI Services Image Analysis.
+    name (str): The name of the tool.
+    description (str): A description of the tool,
+        including its purpose and expected input.
     """
 
-    azure_ai_services_key: str = ""  #: :meta private:
-    azure_ai_services_endpoint: str = ""  #: :meta private:
-    image_analysis_client: Any  #: :meta private:
-    visual_features: Any  #: :meta private:
+    azure_ai_services_key: Optional[str] = None  #: :meta private:
+    azure_ai_services_endpoint: Optional[str] = None  #: :meta private:
+    visual_features: Any = None
+    image_analysis_client: Any = None  #: :meta private:
 
     name: str = "azure_ai_services_image_analysis"
     description: str = (
         "A wrapper around Azure AI Services Image Analysis. "
         "Useful for when you need to analyze images. "
-        "Input should be a url to an image."
+        "Input must be a url string or path string to an image."
     )
 
     @model_validator(mode="before")
@@ -68,13 +80,16 @@ def validate_environment(cls, values: Dict) -> Any:
                 f"Initialization of Azure AI Vision Image Analysis client failed: {e}"
             )
 
-        values["visual_features"] = [
-            VisualFeatures.TAGS,
-            VisualFeatures.OBJECTS,
-            VisualFeatures.CAPTION,
-            VisualFeatures.READ,
-        ]
-
+        visual_features = values.get(
+            "visual_features",
+            [
+                VisualFeatures.TAGS,
+                VisualFeatures.OBJECTS,
+                VisualFeatures.CAPTION,
+                VisualFeatures.READ,
+            ],
+        )
+        values["visual_features"] = visual_features
         return values
 
     def _image_analysis(self, image_path: str) -> Dict:
@@ -115,6 +130,17 @@ def _image_analysis(self, image_path: str) -> Dict:
             if result.read is not None and len(result.read.blocks) > 0:
                 res_dict["text"] = [line.text for line in result.read.blocks[0].lines]
 
+            if result.dense_captions is not None and len(result.dense_captions) > 0:
+                res_dict["dense_captions"] = [
+                    str(dc) for dc in result.dense_captions.list
+                ]
+
+            if result.smart_crops is not None and len(result.smart_crops) > 0:
+                res_dict["smart_crops"] = [str(sc) for sc in result.smart_crops.list]
+
+            if result.people is not None and len(result.people) > 0:
+                res_dict["people"] = [str(p) for p in result.people.list]
+
         return res_dict
 
     def _format_image_analysis_result(self, image_analysis_result: Dict) -> str:
@@ -136,6 +162,21 @@ def _format_image_analysis_result(self, image_analysis_result: Dict) -> str:
         if "text" in image_analysis_result and len(image_analysis_result["text"]) > 0:
             formatted_result.append("Text: " + ", ".join(image_analysis_result["text"]))
 
+        if "dense_captions" in image_analysis_result:
+            formatted_result.append(
+                "Dense Captions: " + ", ".join(image_analysis_result["dense_captions"])
+            )
+
+        if "smart_crops" in image_analysis_result:
+            formatted_result.append(
+                "Smart Crops: " + ", ".join(image_analysis_result["smart_crops"])
+            )
+
+        if "people" in image_analysis_result:
+            formatted_result.append(
+                "People: " + ", ".join(image_analysis_result["people"])
+            )
+
         return "\n".join(formatted_result)
 
     def _run(

diff --git a/libs/community/tests/examples/building.jpg b/libs/community/tests/examples/building.jpg
diff --git a/libs/community/tests/unit_tests/tools/azure_ai_services/test_image_analysis.py b/libs/community/tests/unit_tests/tools/azure_ai_services/test_image_analysis.py
@@ -0,0 +1,127 @@
+"""Tests for the Azure AI Services Image Analysis Tool."""
+
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from langchain_community.tools.azure_ai_services.image_analysis import (
+    AzureAiServicesImageAnalysisTool,
+)
+
+this_dir = Path(__file__).parents[3]
+
+examples_dir = this_dir / "examples"
+building_path = examples_dir / "building.jpg"
+
+
+@pytest.mark.requires("azure.ai.vision.imageanalysis")
+def test_content_safety(mocker: Any) -> None:
+    mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
+    mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
+
+    key = "key"
+    endpoint = "endpoint"
+
+    tool = AzureAiServicesImageAnalysisTool(
+        azure_ai_services_key=key, azure_ai_services_endpoint=endpoint
+    )
+    assert tool.azure_ai_services_key == key
+    assert tool.azure_ai_services_endpoint == endpoint
+
+
+@pytest.mark.requires("azure.ai.vision.imageanalysis")
+def test_local_image_analysis(mocker: Any) -> None:
+    key = "key"
+    endpoint = "endpoint"
+
+    mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
+    mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
+    mocker.patch(
+        "langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
+        return_value="local",
+    )
+
+    tool = AzureAiServicesImageAnalysisTool(
+        azure_ai_services_key=key,
+        azure_ai_services_endpoint=endpoint,
+        visual_features=["CAPTION"],
+    )
+
+    mock_content_client = mocker.Mock()
+    mock_content_client.analyze.return_value = mocker.Mock()
+    mock_content_client.analyze.return_value.caption.text = "A building corner."
+
+    mock_content_client.analyze.return_value.objects = None
+    mock_content_client.analyze.return_value.tags = None
+    mock_content_client.analyze.return_value.read = None
+    mock_content_client.analyze.return_value.dense_captions = None
+    mock_content_client.analyze.return_value.smart_crops = None
+    mock_content_client.analyze.return_value.people = None
+
+    tool.image_analysis_client = mock_content_client
+
+    input = str(building_path)
+    output = "Caption: A building corner."
+
+    result = tool._run(input)
+    assert result == output
+
+
+@pytest.mark.requires("azure.ai.vision.imageanalysis")
+def test_local_image_different_features(mocker: Any) -> None:
+    key = "key"
+    endpoint = "endpoint"
+
+    mocker.patch("azure.ai.vision.imageanalysis.ImageAnalysisClient", autospec=True)
+    mocker.patch("azure.core.credentials.AzureKeyCredential", autospec=True)
+    mocker.patch(
+        "langchain_community.tools.azure_ai_services.utils.detect_file_src_type",
+        return_value="local",
+    )
+
+    tool = AzureAiServicesImageAnalysisTool(
+        azure_ai_services_key=key,
+        azure_ai_services_endpoint=endpoint,
+        visual_features=["PEOPLE", "CAPTION", "SMARTCROPS"],
+    )
+
+    mock_content_client = mocker.Mock()
+    mock_content_client.analyze.return_value = mocker.Mock()
+    mock_content_client.analyze.return_value.caption.text = "A building corner."
+
+    mock_content_client.analyze.return_value.objects = None
+    mock_content_client.analyze.return_value.tags = None
+    mock_content_client.analyze.return_value.read = None
+    mock_content_client.analyze.return_value.dense_captions = None
+
+    mock_smart_crops = mocker.MagicMock()
+    mock_smart_crops.list = [
+        {"aspectRatio": 1.97, "boundingBox": {"x": 43, "y": 24, "w": 853, "h": 432}}
+    ]
+    mock_smart_crops.__len__.return_value = 1
+    mock_content_client.analyze.return_value.smart_crops = mock_smart_crops
+
+    mock_people = mocker.MagicMock()
+    mock_people.list = [
+        {
+            "boundingBox": {"x": 454, "y": 44, "w": 408, "h": 531},
+            "confidence": 0.9601945281028748,
+        },
+    ]
+    mock_people.__len__.return_value = 1
+    mock_content_client.analyze.return_value.people = mock_people
+
+    tool.image_analysis_client = mock_content_client
+
+    input = str(building_path)
+    output = (
+        "Caption: A building corner.\n"
+        "Smart Crops: {'aspectRatio': 1.97,"
+        " 'boundingBox': {'x': 43, 'y': 24, 'w': 853, 'h': 432}}\n"
+        "People: {'boundingBox': {'x': 454, 'y': 44, 'w': 408, 'h': 531},"
+        " 'confidence': 0.9601945281028748}"
+    )
+
+    result = tool._run(input)
+    assert result == output