From 9a15e7d8a5673d5b82f9fb5715b5c77f7ee99654 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?= <pawel@roboflow.com>
Date: Tue, 7 Jan 2025 15:32:29 +0100
Subject: [PATCH 1/4] Get rid of install CI check in PR

---
 .github/workflows/test_package_install_inference.yml            | 2 --
 .github/workflows/test_package_install_inference_cli.yml        | 2 --
 .github/workflows/test_package_install_inference_gpu.yml        | 2 --
 .../test_package_install_inference_gpu_with_extras.yml          | 2 --
 .github/workflows/test_package_install_inference_sdk.yml        | 2 --
 .../workflows/test_package_install_inference_with_extras.yml    | 2 --
 6 files changed, 12 deletions(-)

diff --git a/.github/workflows/test_package_install_inference.yml b/.github/workflows/test_package_install_inference.yml
index e186180b9..23b46139f 100644
--- a/.github/workflows/test_package_install_inference.yml
+++ b/.github/workflows/test_package_install_inference.yml
@@ -1,8 +1,6 @@
 name: Test package install - inference
 
 on:
-  pull_request:
-    branches: [main]
   push:
     branches: [main]
   workflow_dispatch:
diff --git a/.github/workflows/test_package_install_inference_cli.yml b/.github/workflows/test_package_install_inference_cli.yml
index 2acfe0080..3b3c0bc9a 100644
--- a/.github/workflows/test_package_install_inference_cli.yml
+++ b/.github/workflows/test_package_install_inference_cli.yml
@@ -1,8 +1,6 @@
 name: Test package install - inference-cli
 
 on:
-  pull_request:
-    branches: [main]
   push:
     branches: [main]
   workflow_dispatch:
diff --git a/.github/workflows/test_package_install_inference_gpu.yml b/.github/workflows/test_package_install_inference_gpu.yml
index e992fd967..0f6dc753b 100644
--- a/.github/workflows/test_package_install_inference_gpu.yml
+++ b/.github/workflows/test_package_install_inference_gpu.yml
@@ -1,8 +1,6 @@
 name: Test package install - inference-gpu
 
 on:
-  pull_request:
-    branches: [main]
   push:
     branches: [main]
   workflow_dispatch:
diff --git a/.github/workflows/test_package_install_inference_gpu_with_extras.yml b/.github/workflows/test_package_install_inference_gpu_with_extras.yml
index 17ec28554..248036495 100644
--- a/.github/workflows/test_package_install_inference_gpu_with_extras.yml
+++ b/.github/workflows/test_package_install_inference_gpu_with_extras.yml
@@ -1,8 +1,6 @@
 name: Test package install - inference-gpu[extras]
 
 on:
-  pull_request:
-    branches: [main]
   push:
     branches: [main]
   workflow_dispatch:
diff --git a/.github/workflows/test_package_install_inference_sdk.yml b/.github/workflows/test_package_install_inference_sdk.yml
index 9e05e6ac3..eb357734b 100644
--- a/.github/workflows/test_package_install_inference_sdk.yml
+++ b/.github/workflows/test_package_install_inference_sdk.yml
@@ -1,8 +1,6 @@
 name: Test package install - inference-sdk
 
 on:
-  pull_request:
-    branches: [main]
   push:
     branches: [main]
   workflow_dispatch:
diff --git a/.github/workflows/test_package_install_inference_with_extras.yml b/.github/workflows/test_package_install_inference_with_extras.yml
index a490f385b..730e60a0f 100644
--- a/.github/workflows/test_package_install_inference_with_extras.yml
+++ b/.github/workflows/test_package_install_inference_with_extras.yml
@@ -1,8 +1,6 @@
 name: Test package install - inference[extras]
 
 on:
-  pull_request:
-    branches: [main]
   push:
     branches: [main]
   workflow_dispatch:

From 1877bf1967b5a1f5327f0ca042a504e455786c85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?= <pawel@roboflow.com>
Date: Tue, 7 Jan 2025 16:18:42 +0100
Subject: [PATCH 2/4] Add unit tests for llama model

---
 .../models/foundation/llama_vision/v1.py      |  20 +-
 inference/core/workflows/prototypes/block.py  |   3 +-
 .../foundation/test_llama_3_2_vision.py       | 336 ++++++++++++++++++
 3 files changed, 348 insertions(+), 11 deletions(-)
 create mode 100644 tests/workflows/unit_tests/core_steps/models/foundation/test_llama_3_2_vision.py

diff --git a/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py b/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py
index 9def8b6fb..f522fff79 100644
--- a/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py
@@ -223,12 +223,12 @@ class BlockManifest(WorkflowBlockManifest):
         description="Maximum number of tokens the model can generate in it's response.",
         gt=1,
     )
-    temperature: Optional[Union[float, Selector(kind=[FLOAT_KIND])]] = Field(
+    temperature: Union[float, Selector(kind=[FLOAT_KIND])] = Field(
         default=1,
         description="Temperature to sample from the model - value in range 0.0-2.0, the higher - the more "
         'random / "creative" the generations are.',
     )
-    top_p: Optional[Union[float, Selector(kind=[FLOAT_KIND])]] = Field(
+    top_p: Union[float, Selector(kind=[FLOAT_KIND])] = Field(
         default=1.0,
         description="Top-p to sample from the model - value in range 0.0-1.0, the higher - the more diverse and creative the generations are",
     )
@@ -258,8 +258,8 @@ def validate(self) -> "BlockManifest":
             )
         return self
 
-    @classmethod
     @field_validator("temperature")
+    @classmethod
     def validate_temperature(cls, value: Union[str, float]) -> Union[str, float]:
         if isinstance(value, str):
             return value
@@ -267,14 +267,16 @@ def validate_temperature(cls, value: Union[str, float]) -> Union[str, float]:
             raise ValueError(
                 "'temperature' parameter required to be in range [0.0, 2.0]"
             )
+        return value
 
-    @classmethod
     @field_validator("top_p")
-    def validate_temperature(cls, value: Union[str, float]) -> Union[str, float]:
+    @classmethod
+    def validate_top_p(cls, value: Union[str, float]) -> Union[str, float]:
         if isinstance(value, str):
             return value
         if value < 0.0 or value > 1.0:
             raise ValueError("'top_p' parameter required to be in range [0.0, 2.0]")
+        return value
 
     @classmethod
     def get_parameters_accepting_batches(cls) -> List[str]:
@@ -325,7 +327,7 @@ def run(
         model_version: ModelVersion,
         max_tokens: int,
         temperature: float,
-        top_p: Optional[float],
+        top_p: float,
         max_concurrent_requests: Optional[int],
     ) -> BlockResult:
         inference_images = [i.to_inference_format() for i in images]
@@ -357,7 +359,7 @@ def run_llama_vision_32_llm_prompting(
     llama_model_version: ModelVersion,
     max_tokens: int,
     temperature: float,
-    top_p: Optional[float],
+    top_p: float,
     max_concurrent_requests: Optional[int],
 ) -> List[str]:
     if task_type not in PROMPT_BUILDERS:
@@ -397,7 +399,7 @@ def execute_llama_vision_32_requests(
     model_version_id: str,
     max_tokens: int,
     temperature: float,
-    top_p: Optional[float],
+    top_p: float,
     max_concurrent_requests: Optional[int],
 ) -> List[str]:
     client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=llama_api_key)
@@ -429,7 +431,7 @@ def execute_llama_vision_32_request(
     llama_model_version: str,
     max_tokens: int,
     temperature: float,
-    top_p: Optional[float],
+    top_p: float,
 ) -> str:
     response = client.chat.completions.create(
         model=llama_model_version,
diff --git a/inference/core/workflows/prototypes/block.py b/inference/core/workflows/prototypes/block.py
index b3be8b4fe..cdb01e783 100644
--- a/inference/core/workflows/prototypes/block.py
+++ b/inference/core/workflows/prototypes/block.py
@@ -1,8 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional, Type, Union
 
-from openai import BaseModel
-from pydantic import ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 from inference.core.workflows.errors import BlockInterfaceError
 from inference.core.workflows.execution_engine.entities.base import OutputDefinition
diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_3_2_vision.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_3_2_vision.py
new file mode 100644
index 000000000..c315f70ae
--- /dev/null
+++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_3_2_vision.py
@@ -0,0 +1,336 @@
+import time
+from typing import Any
+from unittest.mock import MagicMock
+
+import pytest
+from openai.types.chat import ChatCompletion, ChatCompletionMessage
+from openai.types.chat.chat_completion import Choice
+from pydantic import ValidationError
+
+from inference.core.workflows.core_steps.models.foundation.llama_vision.v1 import (
+    BlockManifest,
+    execute_llama_vision_32_request,
+)
+
+
+@pytest.mark.parametrize("value", [None, 1, "a", True])
+def test_llama_3_2_vision_step_validation_when_image_is_invalid(
+    value: Any,
+) -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "name": "step_1",
+        "images": value,
+        "prompt": "$inputs.prompt",
+        "api_key": "$inputs.open_router_api_key",
+    }
+
+    # when
+    with pytest.raises(ValidationError):
+        _ = BlockManifest.model_validate(specification)
+
+
+def test_llama_3_2_vision_step_validation_when_prompt_is_given_directly() -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "name": "step_1",
+        "images": "$inputs.image",
+        "prompt": "This is my prompt",
+        "api_key": "$inputs.open_router_api_key",
+    }
+
+    # when
+    result = BlockManifest.model_validate(specification)
+
+    # then
+    assert result == BlockManifest(
+        type="roboflow_core/llama_3_2_vision@v1",
+        name="step_1",
+        images="$inputs.image",
+        prompt="This is my prompt",
+        api_key="$inputs.open_router_api_key",
+    )
+
+
+@pytest.mark.parametrize("value", [None, []])
+def test_llama_3_2_vision_step_validation_when_prompt_is_invalid(
+    value: Any,
+) -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "name": "step_1",
+        "images": "$inputs.image",
+        "prompt": value,
+        "api_key": "$inputs.open_router_api_key",
+    }
+
+    # when
+    with pytest.raises(ValidationError):
+        _ = BlockManifest.model_validate(specification)
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        "$inputs.model",
+        "11B (Free) - OpenRouter",
+        "11B (Regular) - OpenRouter",
+        "$inputs.model",
+        "90B (Free) - OpenRouter",
+        "90B (Regular) - OpenRouter",
+    ],
+)
+def test_llama_3_2_vision_step_validation_when_model_type_valid(
+    value: str,
+) -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "name": "step_1",
+        "images": "$inputs.image",
+        "prompt": "This is my prompt",
+        "api_key": "$inputs.open_router_api_key",
+        "model_version": value,
+    }
+
+    # when
+    result = BlockManifest.model_validate(specification)
+
+    assert result == BlockManifest(
+        type="roboflow_core/llama_3_2_vision@v1",
+        name="step_1",
+        images="$inputs.image",
+        prompt="This is my prompt",
+        api_key="$inputs.open_router_api_key",
+        model_version=value,
+    )
+
+
+@pytest.mark.parametrize("value", ["some", None])
+def test_llama_3_2_vision_step_validation_when_model_type_invalid(
+    value: Any,
+) -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "name": "step_1",
+        "images": "$inputs.image",
+        "prompt": "This is my prompt",
+        "api_key": "$inputs.open_router_api_key",
+        "model_version": value,
+    }
+
+    # when
+    with pytest.raises(ValidationError):
+        _ = BlockManifest.model_validate(specification)
+
+
+@pytest.mark.parametrize("value", ["$inputs.api_key", "my-api-key", None])
+def test_llama_3_2_vision_step_validation_when_api_key_not_given(
+    value: Any,
+) -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "name": "step_1",
+        "images": "$inputs.image",
+        "prompt": "This is my prompt",
+    }
+
+    # when
+    with pytest.raises(ValidationError):
+        _ = BlockManifest.model_validate(specification)
+
+
+def test_llama_3_2_vision_step_validation_when_output_structure_invalid() -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "name": "step_1",
+        "images": "$inputs.image",
+        "prompt": "This is my prompt",
+        "api_key": "$inputs.open_router_api_key",
+        "output_structure": "INVALID",
+    }
+
+    # when
+    with pytest.raises(ValidationError):
+        _ = BlockManifest.model_validate(specification)
+
+
+@pytest.mark.parametrize("value", [-0.5, 3.0])
+def test_llama_3_2_vision_step_validation_when_temperature_is_invalid(
+    value: float,
+) -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "name": "step_1",
+        "images": "$inputs.image",
+        "prompt": "This is my prompt",
+        "api_key": "$inputs.open_router_api_key",
+        "temperature": value,
+    }
+
+    # when
+    with pytest.raises(ValidationError):
+        _ = BlockManifest.model_validate(specification)
+
+
+@pytest.mark.parametrize("value", [-0.5, 3.0])
+def test_llama_3_2_vision_step_validation_when_top_p_is_invalid(value: float) -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "name": "step_1",
+        "images": "$inputs.image",
+        "prompt": "This is my prompt",
+        "api_key": "$inputs.open_router_api_key",
+        "top_p": value,
+    }
+
+    # when
+    with pytest.raises(ValidationError):
+        _ = BlockManifest.model_validate(specification)
+
+
+@pytest.mark.parametrize("value", ["unconstrained", "visual-question-answering"])
+def test_llama_3_2_vision_when_prompt_not_delivered_when_required(value: float) -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "task_type": value,
+        "name": "step_1",
+        "images": "$inputs.image",
+        "api_key": "$inputs.open_router_api_key",
+    }
+
+    # when
+    with pytest.raises(ValidationError) as e:
+        _ = BlockManifest.model_validate(specification)
+
+    # then
+    assert "`prompt`" in str(e.value)
+
+
+@pytest.mark.parametrize(
+    "value",
+    [
+        "classification",
+        "multi-label-classification",
+    ],
+)
+def test_llama_3_2_vision_when_classes_not_delivered_when_required(
+    value: float,
+) -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "task_type": value,
+        "name": "step_1",
+        "images": "$inputs.image",
+        "api_key": "$inputs.open_router_api_key",
+    }
+
+    # when
+    with pytest.raises(ValidationError) as e:
+        _ = BlockManifest.model_validate(specification)
+
+    # then
+    assert "`classes`" in str(e.value)
+
+
+def test_llama_3_2_vision_when_output_structure_not_delivered_when_required() -> None:
+    # given
+    specification = {
+        "type": "roboflow_core/llama_3_2_vision@v1",
+        "task_type": "structured-answering",
+        "name": "step_1",
+        "images": "$inputs.image",
+        "api_key": "$inputs.open_router_api_key",
+    }
+
+    # when
+    with pytest.raises(ValidationError) as e:
+        _ = BlockManifest.model_validate(specification)
+
+    # then
+    assert "`output_structure`" in str(e.value)
+
+
+def test_execute_llama_vision_32_request_when_request_succeeds() -> None:
+    # given
+    client = MagicMock()
+    client.chat.completions.create.return_value = ChatCompletion(
+        id="38",
+        choices=[
+            Choice(
+                finish_reason="stop",
+                index=0,
+                message=ChatCompletionMessage(
+                    role="assistant",
+                    content="This is content from GPT",
+                ),
+            )
+        ],
+        created=int(time.time()),
+        model="gpt-4o",
+        object="chat.completion",
+    )
+
+    # when
+    result = execute_llama_vision_32_request(
+        client=client,
+        prompt=[{"content": [{"text": "prompt"}]}],
+        llama_model_version="meta-llama/llama-3.2-11b-vision-instruct:free",
+        max_tokens=300,
+        temperature=0.5,
+        top_p=0.7,
+    )
+
+    # then
+    assert result == "This is content from GPT"
+    call_kwargs = client.chat.completions.create.call_args[1]
+    assert call_kwargs["model"] == "meta-llama/llama-3.2-11b-vision-instruct:free"
+    assert call_kwargs["max_tokens"] == 300
+    assert (
+        len(call_kwargs["messages"]) == 1
+    ), "Only single message is expected to be prompted"
+    assert (
+        call_kwargs["messages"][0]["content"][0]["text"] == "prompt"
+    ), "Text prompt is expected to be injected without modification"
+
+
+def test_execute_llama_vision_32_request_when_request_fails() -> None:
+    # given
+    client = MagicMock()
+    return_value = MagicMock()
+    return_value.choices = None
+    return_value.error = {"message": "Error MSG"}
+    client.chat.completions.create.return_value = return_value
+
+    # when
+    with pytest.raises(RuntimeError) as e:
+        _ = execute_llama_vision_32_request(
+            client=client,
+            prompt=[{"content": [{"text": "prompt"}]}],
+            llama_model_version="meta-llama/llama-3.2-11b-vision-instruct:free",
+            max_tokens=300,
+            temperature=0.5,
+            top_p=0.7,
+        )
+
+    # then
+    call_kwargs = client.chat.completions.create.call_args[1]
+    assert call_kwargs["model"] == "meta-llama/llama-3.2-11b-vision-instruct:free"
+    assert call_kwargs["max_tokens"] == 300
+    assert (
+        len(call_kwargs["messages"]) == 1
+    ), "Only single message is expected to be prompted"
+    assert (
+        call_kwargs["messages"][0]["content"][0]["text"] == "prompt"
+    ), "Text prompt is expected to be injected without modification"
+    assert "Details: Error MSG" in str(e.value)

From a359dcffa69c44b60cd04069c6270ae3185eeac8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?= <pawel@roboflow.com>
Date: Tue, 7 Jan 2025 16:58:19 +0100
Subject: [PATCH 3/4] Add integration tests for llama

---
 .../models/foundation/llama_vision/v1.py      |  32 +-
 .../test_workflow_with_llama_vision.py        | 811 ++++++++++++++++++
 .../foundation/test_llama_3_2_vision.py       |  19 -
 3 files changed, 820 insertions(+), 42 deletions(-)
 create mode 100644 tests/workflows/integration_tests/execution/test_workflow_with_llama_vision.py

diff --git a/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py b/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py
index f522fff79..2ffed31e2 100644
--- a/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/llama_vision/v1.py
@@ -73,6 +73,13 @@
 
 {RELEVANT_TASKS_DOCS_DESCRIPTION}
 
+!!! warning "Issues with structured prompting"
+
+    Model tends to be quite unpredictable when structured output (in our case JSON document) is expected.
+    That problems may impact tasks like `structured-answering`, `classification` or `multi-label-classification`.
+    
+    The cause seems to be quite sensitive "filters" of inappropriate content embedded in model.
+    
 
 #### 🛠️ API providers and model variants
 
@@ -219,19 +226,15 @@ class BlockManifest(WorkflowBlockManifest):
         examples=["11B (Free) - OpenRouter", "$inputs.llama_model"],
     )
     max_tokens: int = Field(
-        default=300,
+        default=500,
         description="Maximum number of tokens the model can generate in it's response.",
         gt=1,
     )
     temperature: Union[float, Selector(kind=[FLOAT_KIND])] = Field(
-        default=1,
+        default=0.1,
         description="Temperature to sample from the model - value in range 0.0-2.0, the higher - the more "
         'random / "creative" the generations are.',
     )
-    top_p: Union[float, Selector(kind=[FLOAT_KIND])] = Field(
-        default=1.0,
-        description="Top-p to sample from the model - value in range 0.0-1.0, the higher - the more diverse and creative the generations are",
-    )
     max_concurrent_requests: Optional[int] = Field(
         default=None,
         description="Number of concurrent requests that can be executed by block when batch of input images provided. "
@@ -269,15 +272,6 @@ def validate_temperature(cls, value: Union[str, float]) -> Union[str, float]:
             )
         return value
 
-    @field_validator("top_p")
-    @classmethod
-    def validate_top_p(cls, value: Union[str, float]) -> Union[str, float]:
-        if isinstance(value, str):
-            return value
-        if value < 0.0 or value > 1.0:
-            raise ValueError("'top_p' parameter required to be in range [0.0, 2.0]")
-        return value
-
     @classmethod
     def get_parameters_accepting_batches(cls) -> List[str]:
         return ["images"]
@@ -327,7 +321,6 @@ def run(
         model_version: ModelVersion,
         max_tokens: int,
         temperature: float,
-        top_p: float,
         max_concurrent_requests: Optional[int],
     ) -> BlockResult:
         inference_images = [i.to_inference_format() for i in images]
@@ -341,7 +334,6 @@ def run(
             llama_model_version=model_version,
             max_tokens=max_tokens,
             temperature=temperature,
-            top_p=top_p,
             max_concurrent_requests=max_concurrent_requests,
         )
         return [
@@ -359,7 +351,6 @@ def run_llama_vision_32_llm_prompting(
     llama_model_version: ModelVersion,
     max_tokens: int,
     temperature: float,
-    top_p: float,
     max_concurrent_requests: Optional[int],
 ) -> List[str]:
     if task_type not in PROMPT_BUILDERS:
@@ -388,7 +379,6 @@ def run_llama_vision_32_llm_prompting(
         model_version_id=model_version_id,
         max_tokens=max_tokens,
         temperature=temperature,
-        top_p=top_p,
         max_concurrent_requests=max_concurrent_requests,
     )
 
@@ -399,7 +389,6 @@ def execute_llama_vision_32_requests(
     model_version_id: str,
     max_tokens: int,
     temperature: float,
-    top_p: float,
     max_concurrent_requests: Optional[int],
 ) -> List[str]:
     client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=llama_api_key)
@@ -411,7 +400,6 @@ def execute_llama_vision_32_requests(
             llama_model_version=model_version_id,
             max_tokens=max_tokens,
             temperature=temperature,
-            top_p=top_p,
         )
         for prompt in llama_prompts
     ]
@@ -431,14 +419,12 @@ def execute_llama_vision_32_request(
     llama_model_version: str,
     max_tokens: int,
     temperature: float,
-    top_p: float,
 ) -> str:
     response = client.chat.completions.create(
         model=llama_model_version,
         messages=prompt,
         max_tokens=max_tokens,
         temperature=temperature,
-        top_p=top_p,
     )
     if response.choices is None:
         error_detail = getattr(response, "error", {}).get("message", "N/A")
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_llama_vision.py b/tests/workflows/integration_tests/execution/test_workflow_with_llama_vision.py
new file mode 100644
index 000000000..f35cbf21b
--- /dev/null
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_llama_vision.py
@@ -0,0 +1,811 @@
+"""
+This test module requires Open Router API key passed via env variable WORKFLOWS_TEST_OPEN_ROUTER_API_KEY.
+This is supposed to be used only locally, as that would be too much of a cost in CI
+"""
+
+import os
+
+import numpy as np
+import pytest
+
+from inference.core.env import WORKFLOWS_MAX_CONCURRENT_STEPS
+from inference.core.managers.base import ModelManager
+from inference.core.workflows.core_steps.common.entities import StepExecutionMode
+from inference.core.workflows.execution_engine.core import ExecutionEngine
+from tests.workflows.integration_tests.execution.workflows_gallery_collector.decorators import (
+    add_to_workflows_gallery,
+)
+
+OPEN_ROUTER_API_KEY = os.getenv("WORKFLOWS_TEST_OPEN_ROUTER_API_KEY")
+
+UNCONSTRAINED_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+        {"type": "WorkflowParameter", "name": "prompt"},
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/llama_3_2_vision@v1",
+            "name": "llama",
+            "images": "$inputs.image",
+            "task_type": "unconstrained",
+            "prompt": "$inputs.prompt",
+            "api_key": "$inputs.api_key",
+            "model_version": "11B (Regular) - OpenRouter",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "result",
+            "selector": "$steps.llama.output",
+        },
+    ],
+}
+
+
+@add_to_workflows_gallery(
+    category="Workflows with Visual Language Models",
+    use_case_title="Prompting LLama Vision 3.2 with arbitrary prompt",
+    use_case_description="""
+In this example, LLama Vision 3.2 model is prompted with arbitrary text from user 
+    """,
+    workflow_definition=UNCONSTRAINED_WORKFLOW,
+    workflow_name_in_app="llama-vision-3-2-arbitrary-prompt",
+)
+@pytest.mark.skipif(
+    condition=OPEN_ROUTER_API_KEY is None, reason="OpenRouter API key not provided"
+)
+def test_workflow_with_unconstrained_prompt(
+    model_manager: ModelManager,
+    dogs_image: np.ndarray,
+    license_plate_image: np.ndarray,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=UNCONSTRAINED_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": [dogs_image, license_plate_image],
+            "api_key": OPEN_ROUTER_API_KEY,
+            "prompt": "What is the topic of the image?",
+        }
+    )
+
+    # then
+    assert len(result) == 2, "Single image given, expected single output"
+    assert set(result[0].keys()) == {"result"}, "Expected all outputs to be delivered"
+    assert set(result[1].keys()) == {"result"}, "Expected all outputs to be delivered"
+    assert (
+        isinstance(result[0]["result"], str) and len(result[0]["result"]) > 0
+    ), "Expected non-empty string generated"
+    assert (
+        isinstance(result[1]["result"], str) and len(result[1]["result"]) > 0
+    ), "Expected non-empty string generated"
+
+
+OCR_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/llama_3_2_vision@v1",
+            "name": "llama",
+            "images": "$inputs.image",
+            "task_type": "ocr",
+            "api_key": "$inputs.api_key",
+            "model_version": "11B (Regular) - OpenRouter",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "result",
+            "selector": "$steps.llama.output",
+        },
+    ],
+}
+
+
+@add_to_workflows_gallery(
+    category="Workflows with Visual Language Models",
+    use_case_title="Using LLama Vision 3.2 as OCR model",
+    use_case_description="""
+In this example, LLama Vision 3.2 model is used as OCR system. User just points task type and do not need to provide
+any prompt.
+    """,
+    workflow_definition=OCR_WORKFLOW,
+    workflow_name_in_app="llama-vision-3-2-ocr",
+)
+@pytest.mark.skipif(
+    condition=OPEN_ROUTER_API_KEY is None, reason="OpenRouter API key not provided"
+)
+def test_workflow_with_ocr_prompt(
+    model_manager: ModelManager,
+    license_plate_image: np.ndarray,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=OCR_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": [license_plate_image],
+            "api_key": OPEN_ROUTER_API_KEY,
+        }
+    )
+
+    # then
+    assert len(result) == 1, "Single image given, expected single output"
+    assert set(result[0].keys()) == {"result"}, "Expected all outputs to be delivered"
+    assert (
+        isinstance(result[0]["result"], str) and len(result[0]["result"]) > 0
+    ), "Expected non-empty string generated"
+
+
+VQA_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+        {"type": "WorkflowParameter", "name": "prompt"},
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/llama_3_2_vision@v1",
+            "name": "llama",
+            "images": "$inputs.image",
+            "task_type": "visual-question-answering",
+            "prompt": "$inputs.prompt",
+            "api_key": "$inputs.api_key",
+            "model_version": "11B (Regular) - OpenRouter",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "result",
+            "selector": "$steps.llama.output",
+        },
+    ],
+}
+
+
+@add_to_workflows_gallery(
+    category="Workflows with Visual Language Models",
+    use_case_title="Using LLama Vision 3.2 as Visual Question Answering system",
+    use_case_description="""
+In this example, LLama Vision 3.2 model is used as VQA system. User provides question via prompt.
+    """,
+    workflow_definition=VQA_WORKFLOW,
+    workflow_name_in_app="llama-vision-3-2-vqa",
+)
+@pytest.mark.skipif(
+    condition=OPEN_ROUTER_API_KEY is None, reason="OpenRouter API key not provided"
+)
+def test_workflow_with_vqa_prompt(
+    model_manager: ModelManager,
+    license_plate_image: np.ndarray,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=VQA_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": [license_plate_image],
+            "api_key": OPEN_ROUTER_API_KEY,
+            "prompt": "What are the brands of the cars?",
+        }
+    )
+
+    # then
+    assert len(result) == 1, "Single image given, expected single output"
+    assert set(result[0].keys()) == {"result"}, "Expected all outputs to be delivered"
+    assert (
+        isinstance(result[0]["result"], str) and len(result[0]["result"]) > 0
+    ), "Expected non-empty string generated"
+
+
+CAPTION_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/llama_3_2_vision@v1",
+            "name": "llama",
+            "images": "$inputs.image",
+            "task_type": "caption",
+            "api_key": "$inputs.api_key",
+            "model_version": "11B (Regular) - OpenRouter",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "result",
+            "selector": "$steps.llama.output",
+        },
+    ],
+}
+
+
+@add_to_workflows_gallery(
+    category="Workflows with Visual Language Models",
+    use_case_title="Using LLama Vision 3.2 as Image Captioning system",
+    use_case_description="""
+In this example, LLama Vision 3.2 model is used as Image Captioning system.
+    """,
+    workflow_definition=CAPTION_WORKFLOW,
+    workflow_name_in_app="llama-vision-3-2-captioning",
+)
+@pytest.mark.skipif(
+    condition=OPEN_ROUTER_API_KEY is None, reason="OpenRouter API key not provided"
+)
+def test_workflow_with_captioning_prompt(
+    model_manager: ModelManager,
+    license_plate_image: np.ndarray,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=CAPTION_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": [license_plate_image],
+            "api_key": OPEN_ROUTER_API_KEY,
+        }
+    )
+
+    # then
+    assert len(result) == 1, "Single image given, expected single output"
+    assert set(result[0].keys()) == {"result"}, "Expected all outputs to be delivered"
+    assert (
+        isinstance(result[0]["result"], str) and len(result[0]["result"]) > 0
+    ), "Expected non-empty string generated"
+
+
+CLASSIFICATION_WORKFLOW_WITH_LEGACY_PARSER = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+        {"type": "WorkflowParameter", "name": "classes"},
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/llama_3_2_vision@v1",
+            "name": "llama",
+            "images": "$inputs.image",
+            "task_type": "classification",
+            "classes": "$inputs.classes",
+            "api_key": "$inputs.api_key",
+            "model_version": "11B (Regular) - OpenRouter",
+        },
+        {
+            "type": "roboflow_core/vlm_as_classifier@v1",
+            "name": "parser",
+            "image": "$inputs.image",
+            "vlm_output": "$steps.llama.output",
+            "classes": "$steps.llama.classes",
+        },
+        {
+            "type": "roboflow_core/property_definition@v1",
+            "name": "top_class",
+            "operations": [
+                {"type": "ClassificationPropertyExtract", "property_name": "top_class"}
+            ],
+            "data": "$steps.parser.predictions",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "llama_result",
+            "selector": "$steps.llama.output",
+        },
+        {
+            "type": "JsonField",
+            "name": "top_class",
+            "selector": "$steps.top_class.output",
+        },
+        {
+            "type": "JsonField",
+            "name": "parsed_prediction",
+            "selector": "$steps.parser.*",
+        },
+    ],
+}
+
+
+@pytest.mark.skipif(
+    condition=OPEN_ROUTER_API_KEY is None, reason="OpenRouter API key not provided"
+)
+def test_workflow_with_multi_class_classifier_prompt_and_legacy_parser(
+    model_manager: ModelManager,
+    dogs_image: np.ndarray,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=CLASSIFICATION_WORKFLOW_WITH_LEGACY_PARSER,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": [dogs_image],
+            "api_key": OPEN_ROUTER_API_KEY,
+            "classes": ["cat", "dog"],
+        }
+    )
+
+    # then
+    assert len(result) == 1, "Single image given, expected single output"
+    assert set(result[0].keys()) == {
+        "llama_result",
+        "top_class",
+        "parsed_prediction",
+    }, "Expected all outputs to be delivered"
+    print(result[0]["llama_result"])
+    assert (
+        isinstance(result[0]["llama_result"], str)
+        and len(result[0]["llama_result"]) > 0
+    ), "Expected non-empty string generated"
+    assert result[0]["parsed_prediction"]["error_status"] is False
+
+
+CLASSIFICATION_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+        {"type": "WorkflowParameter", "name": "classes"},
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/llama_3_2_vision@v1",
+            "name": "llama",
+            "images": "$inputs.image",
+            "task_type": "classification",
+            "classes": "$inputs.classes",
+            "api_key": "$inputs.api_key",
+            "model_version": "11B (Regular) - OpenRouter",
+        },
+        {
+            "type": "roboflow_core/vlm_as_classifier@v2",
+            "name": "parser",
+            "image": "$inputs.image",
+            "vlm_output": "$steps.llama.output",
+            "classes": "$steps.llama.classes",
+        },
+        {
+            "type": "roboflow_core/property_definition@v1",
+            "name": "top_class",
+            "operations": [
+                {"type": "ClassificationPropertyExtract", "property_name": "top_class"}
+            ],
+            "data": "$steps.parser.predictions",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "llama_result",
+            "selector": "$steps.llama.output",
+        },
+        {
+            "type": "JsonField",
+            "name": "top_class",
+            "selector": "$steps.top_class.output",
+        },
+        {
+            "type": "JsonField",
+            "name": "parsed_prediction",
+            "selector": "$steps.parser.*",
+        },
+    ],
+}
+
+
+@add_to_workflows_gallery(
+    category="Workflows with Visual Language Models",
+    use_case_title="Using LLama Vision 3.2 as multi-class classifier",
+    use_case_description="""
+In this example, LLama Vision 3.2 model is used as classifier. Output from the model is parsed by
+special `roboflow_core/vlm_as_classifier@v2` block which turns LLama Vision 3.2 output text into
+full-blown prediction, which can later be used by other blocks compatible with 
+classification predictions - in this case we extract top-class property.
+    """,
+    workflow_definition=CLASSIFICATION_WORKFLOW,
+    workflow_name_in_app="llama-vision-3-2-multi-class-classifier",
+)
+@pytest.mark.skipif(
+    condition=OPEN_ROUTER_API_KEY is None, reason="OpenRouter API key not provided"
+)
+def test_workflow_with_multi_class_classifier_prompt(
+    model_manager: ModelManager,
+    dogs_image: np.ndarray,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=CLASSIFICATION_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": [dogs_image],
+            "api_key": OPEN_ROUTER_API_KEY,
+            "classes": ["cat", "dog"],
+        }
+    )
+
+    # then
+    assert len(result) == 1, "Single image given, expected single output"
+    assert set(result[0].keys()) == {
+        "llama_result",
+        "top_class",
+        "parsed_prediction",
+    }, "Expected all outputs to be delivered"
+    print(result[0]["llama_result"])
+    assert (
+        isinstance(result[0]["llama_result"], str)
+        and len(result[0]["llama_result"]) > 0
+    ), "Expected non-empty string generated"
+    assert result[0]["parsed_prediction"]["error_status"] is False
+
+
+MULTI_LABEL_CLASSIFICATION_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+        {"type": "WorkflowParameter", "name": "classes"},
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/llama_3_2_vision@v1",
+            "name": "llama",
+            "images": "$inputs.image",
+            "task_type": "multi-label-classification",
+            "classes": "$inputs.classes",
+            "api_key": "$inputs.api_key",
+            "model_version": "11B (Regular) - OpenRouter",
+        },
+        {
+            "type": "roboflow_core/vlm_as_classifier@v2",
+            "name": "parser",
+            "image": "$inputs.image",
+            "vlm_output": "$steps.llama.output",
+            "classes": "$steps.llama.classes",
+        },
+        {
+            "type": "roboflow_core/property_definition@v1",
+            "name": "top_class",
+            "operations": [
+                {"type": "ClassificationPropertyExtract", "property_name": "top_class"}
+            ],
+            "data": "$steps.parser.predictions",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "result",
+            "selector": "$steps.top_class.output",
+        },
+        {
+            "type": "JsonField",
+            "name": "parsed_prediction",
+            "selector": "$steps.parser.*",
+        },
+    ],
+}
+
+
+@add_to_workflows_gallery(
+    category="Workflows with Visual Language Models",
+    use_case_title="Using LLama Vision 3.2 as multi-label classifier",
+    use_case_description="""
+In this example, LLama Vision 3.2 model is used as multi-label classifier. Output from the model is parsed by
+special `roboflow_core/vlm_as_classifier@v1` block which turns LLama Vision 3.2 output text into
+full-blown prediction, which can later be used by other blocks compatible with 
+classification predictions - in this case we extract top-class property.
+    """,
+    workflow_definition=MULTI_LABEL_CLASSIFICATION_WORKFLOW,
+    workflow_name_in_app="llama-vision-3-2-multi-label-classifier",
+)
+@pytest.mark.skipif(
+    condition=OPEN_ROUTER_API_KEY is None, reason="OpenRouter API key not provided"
+)
+def test_workflow_with_multi_label_classifier_prompt(
+    model_manager: ModelManager,
+    dogs_image: np.ndarray,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=MULTI_LABEL_CLASSIFICATION_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": [dogs_image],
+            "api_key": OPEN_ROUTER_API_KEY,
+            "classes": ["cat", "dog"],
+        }
+    )
+
+    # then
+    assert len(result) == 1, "Single image given, expected single output"
+    assert set(result[0].keys()) == {
+        "result",
+        "parsed_prediction",
+    }, "Expected all outputs to be delivered"
+    assert result[0]["parsed_prediction"]["error_status"] is False
+
+
+STRUCTURED_PROMPTING_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/llama_3_2_vision@v1",
+            "name": "llama",
+            "images": "$inputs.image",
+            "task_type": "structured-answering",
+            "output_structure": {
+                "dogs_count": "count of dogs instances in the image",
+                "cats_count": "count of cats instances in the image",
+            },
+            "api_key": "$inputs.api_key",
+            "model_version": "11B (Regular) - OpenRouter",
+        },
+        {
+            "type": "roboflow_core/json_parser@v1",
+            "name": "parser",
+            "raw_json": "$steps.llama.output",
+            "expected_fields": ["dogs_count", "cats_count"],
+        },
+        {
+            "type": "roboflow_core/property_definition@v1",
+            "name": "property_definition",
+            "operations": [{"type": "ToString"}],
+            "data": "$steps.parser.dogs_count",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "llama_output",
+            "selector": "$steps.llama.output",
+        },
+        {
+            "type": "JsonField",
+            "name": "result",
+            "selector": "$steps.property_definition.output",
+        },
+    ],
+}
+
+
+@add_to_workflows_gallery(
+    category="Workflows with Visual Language Models",
+    use_case_title="Using LLama Vision 3.2 to provide structured JSON",
+    use_case_description="""
+In this example, LLama Vision 3.2 model is expected to provide structured output in JSON, which can later be
+parsed by dedicated `roboflow_core/json_parser@v1` block which transforms string into dictionary 
+and expose it's keys to other blocks for further processing. In this case, parsed output is
+transformed using `roboflow_core/property_definition@v1` block.
+    """,
+    workflow_definition=STRUCTURED_PROMPTING_WORKFLOW,
+    workflow_name_in_app="llama-vision-3-2-structured-prompting",
+)
+@pytest.mark.skipif(
+    condition=OPEN_ROUTER_API_KEY is None, reason="OpenRouter API key not provided"
+)
+def test_workflow_with_structured_prompt(
+    model_manager: ModelManager,
+    dogs_image: np.ndarray,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=STRUCTURED_PROMPTING_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": [dogs_image],
+            "api_key": OPEN_ROUTER_API_KEY,
+        }
+    )
+
+    # then
+    assert len(result) == 1, "Single image given, expected single output"
+    assert set(result[0].keys()) == {
+        "result",
+        "llama_output",
+    }, "Expected all outputs to be delivered"
+    print(result[0]["llama_output"])
+    assert isinstance(result[0]["result"], str)
+
+
+VLM_AS_SECONDARY_CLASSIFIER_WORKFLOW = {
+    "version": "1.0",
+    "inputs": [
+        {"type": "WorkflowImage", "name": "image"},
+        {"type": "WorkflowParameter", "name": "api_key"},
+        {
+            "type": "WorkflowParameter",
+            "name": "classes",
+            "default_value": [
+                "russell-terrier",
+                "wirehaired-pointing-griffon",
+                "beagle",
+            ],
+        },
+    ],
+    "steps": [
+        {
+            "type": "ObjectDetectionModel",
+            "name": "general_detection",
+            "image": "$inputs.image",
+            "model_id": "yolov8n-640",
+            "class_filter": ["dog"],
+        },
+        {
+            "type": "Crop",
+            "name": "cropping",
+            "image": "$inputs.image",
+            "predictions": "$steps.general_detection.predictions",
+        },
+        {
+            "type": "roboflow_core/llama_3_2_vision@v1",
+            "name": "llama",
+            "images": "$steps.cropping.crops",
+            "task_type": "classification",
+            "classes": "$inputs.classes",
+            "api_key": "$inputs.api_key",
+            "model_version": "11B (Regular) - OpenRouter",
+        },
+        {
+            "type": "roboflow_core/vlm_as_classifier@v2",
+            "name": "parser",
+            "image": "$steps.cropping.crops",
+            "vlm_output": "$steps.llama.output",
+            "classes": "$steps.llama.classes",
+        },
+        {
+            "type": "roboflow_core/detections_classes_replacement@v1",
+            "name": "classes_replacement",
+            "object_detection_predictions": "$steps.general_detection.predictions",
+            "classification_predictions": "$steps.parser.predictions",
+        },
+    ],
+    "outputs": [
+        {
+            "type": "JsonField",
+            "name": "predictions",
+            "selector": "$steps.classes_replacement.predictions",
+        },
+    ],
+}
+
+
+@add_to_workflows_gallery(
+    category="Workflows with Visual Language Models",
+    use_case_title="Using LLama Vision 3.2 as secondary classifier",
+    use_case_description="""
+In this example, LLama Vision 3.2 model is used as secondary classifier - first, YOLO model
+detects dogs, then for each dog we run classification with VLM and at the end we replace 
+detections classes to have bounding boxes with dogs breeds labels.
+
+Breeds that we classify: `russell-terrier`, `wirehaired-pointing-griffon`, `beagle`
+    """,
+    workflow_definition=VLM_AS_SECONDARY_CLASSIFIER_WORKFLOW,
+    workflow_name_in_app="llama-vision-3-2-secondary-classifier",
+)
+@pytest.mark.skipif(
+    condition=OPEN_ROUTER_API_KEY is None, reason="Open AI API key not provided"
+)
+def test_workflow_with_secondary_classifier(
+    model_manager: ModelManager,
+    dogs_image: np.ndarray,
+) -> None:
+    # given
+    workflow_init_parameters = {
+        "workflows_core.model_manager": model_manager,
+        "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+    }
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=VLM_AS_SECONDARY_CLASSIFIER_WORKFLOW,
+        init_parameters=workflow_init_parameters,
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": [dogs_image],
+            "api_key": OPEN_ROUTER_API_KEY,
+            "classes": ["russell-terrier", "wirehaired-pointing-griffon", "beagle"],
+        }
+    )
+
+    # then
+    assert len(result) == 1, "Single image given, expected single output"
+    assert set(result[0].keys()) == {
+        "predictions",
+    }, "Expected all outputs to be delivered"
+    assert "dog" not in set(
+        result[0]["predictions"].data["class_name"].tolist()
+    ), "Expected classes to be substituted"
diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_3_2_vision.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_3_2_vision.py
index c315f70ae..fdf3868db 100644
--- a/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_3_2_vision.py
+++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_3_2_vision.py
@@ -180,23 +180,6 @@ def test_llama_3_2_vision_step_validation_when_temperature_is_invalid(
         _ = BlockManifest.model_validate(specification)
 
 
-@pytest.mark.parametrize("value", [-0.5, 3.0])
-def test_llama_3_2_vision_step_validation_when_top_p_is_invalid(value: float) -> None:
-    # given
-    specification = {
-        "type": "roboflow_core/llama_3_2_vision@v1",
-        "name": "step_1",
-        "images": "$inputs.image",
-        "prompt": "This is my prompt",
-        "api_key": "$inputs.open_router_api_key",
-        "top_p": value,
-    }
-
-    # when
-    with pytest.raises(ValidationError):
-        _ = BlockManifest.model_validate(specification)
-
-
 @pytest.mark.parametrize("value", ["unconstrained", "visual-question-answering"])
 def test_llama_3_2_vision_when_prompt_not_delivered_when_required(value: float) -> None:
     # given
@@ -288,7 +271,6 @@ def test_execute_llama_vision_32_request_when_request_succeeds() -> None:
         llama_model_version="meta-llama/llama-3.2-11b-vision-instruct:free",
         max_tokens=300,
         temperature=0.5,
-        top_p=0.7,
     )
 
     # then
@@ -320,7 +302,6 @@ def test_execute_llama_vision_32_request_when_request_fails() -> None:
             llama_model_version="meta-llama/llama-3.2-11b-vision-instruct:free",
             max_tokens=300,
             temperature=0.5,
-            top_p=0.7,
         )
 
     # then

From b8581142dbf043a1f6d3ca3a4b4116ae8c13acc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?= <pawel@roboflow.com>
Date: Tue, 7 Jan 2025 17:07:07 +0100
Subject: [PATCH 4/4] Fix tests

---
 .../unit_tests/core/test_roboflow_api.py       | 18 ++++++++++++++----
 .../executor/test_runtime_input_validator.py   |  8 ++++----
 2 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/tests/inference/unit_tests/core/test_roboflow_api.py b/tests/inference/unit_tests/core/test_roboflow_api.py
index 23190f17a..ff31c4fab 100644
--- a/tests/inference/unit_tests/core/test_roboflow_api.py
+++ b/tests/inference/unit_tests/core/test_roboflow_api.py
@@ -1724,7 +1724,9 @@ def test_get_workflow_specification_when_connection_error_occurs_but_file_is_cac
     get_mock.return_value = MagicMock(
         status_code=200,
         json=MagicMock(
-            return_value={"workflow": {"config": json.dumps({"specification": {"some": "some"}})}}
+            return_value={
+                "workflow": {"config": json.dumps({"specification": {"some": "some"}})}
+            }
         ),
     )
     _ = get_workflow_specification(
@@ -1744,7 +1746,10 @@ def test_get_workflow_specification_when_connection_error_occurs_but_file_is_cac
     )
 
     # then
-    assert result == {"some": "some", "id": None}, "Expected workflow specification to be retrieved from file"
+    assert result == {
+        "some": "some",
+        "id": None,
+    }, "Expected workflow specification to be retrieved from file"
 
 
 @mock.patch.object(roboflow_api.requests, "get")
@@ -1760,7 +1765,9 @@ def test_get_workflow_specification_when_consecutive_request_hits_ephemeral_cach
     get_mock.return_value = MagicMock(
         status_code=200,
         json=MagicMock(
-            return_value={"workflow": {"config": json.dumps({"specification": {"some": "some"}})}}
+            return_value={
+                "workflow": {"config": json.dumps({"specification": {"some": "some"}})}
+            }
         ),
     )
     ephemeral_cache = MemoryCache()
@@ -1780,7 +1787,10 @@ def test_get_workflow_specification_when_consecutive_request_hits_ephemeral_cach
     )
 
     # then
-    assert result == {"some": "some", "id": None}, "Expected workflow specification to be retrieved from file"
+    assert result == {
+        "some": "some",
+        "id": None,
+    }, "Expected workflow specification to be retrieved from file"
     assert get_mock.call_count == 1, "Expected remote API to be only called once"
 
 
diff --git a/tests/workflows/unit_tests/execution_engine/executor/test_runtime_input_validator.py b/tests/workflows/unit_tests/execution_engine/executor/test_runtime_input_validator.py
index 66247d874..32dcfe354 100644
--- a/tests/workflows/unit_tests/execution_engine/executor/test_runtime_input_validator.py
+++ b/tests/workflows/unit_tests/execution_engine/executor/test_runtime_input_validator.py
@@ -21,9 +21,9 @@ def test_validate_runtime_input_when_input_is_valid() -> None:
                 type="ClipComparison",
                 name="a",
                 image="$inputs.image",
-                text="$inputs.text_1",
+                texts="$inputs.text_1",
             ),
-            manifest_property="text",
+            manifest_property="texts",
         ),
         InputSubstitution(
             input_parameter_name="text_2",
@@ -31,9 +31,9 @@ def test_validate_runtime_input_when_input_is_valid() -> None:
                 type="ClipComparison",
                 name="a",
                 image="$inputs.image",
-                text="$inputs.text_2",
+                texts="$inputs.text_2",
             ),
-            manifest_property="text",
+            manifest_property="texts",
         ),
     ]