griptape-ai · collindutter · Jul 24, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -49,6 +49,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **BREAKING**: Renamed `drivers-vector-postgresql` extra to `drivers-vector-pgvector`.
 - **BREAKING**: Update `marqo` dependency to `^3.7.0`.
 - **BREAKING**: Removed `drivers-sql-postgresql` extra. Use `drivers-sql` extra and install necessary drivers (i.e. `psycopg2`) separately.
+- **BREAKING**: Removed `ImageQueryDriver` in favor of using `PromptDriver` with `TextArtifact` and `ImageArtifact` inputs.
+- **BREAKING**: Removed `ImageQueryEngine`, in favor of using `PromptDriver` directly.
+- **BREAKING**: Removed `ImageQueryTask`, in favor of `PromptTask` with `TextArtifact` and `ImageArtifact` inputs.
+- **BREAKING**: `ImageQueryClient` now takes a `PromptDriver` instead of an `ImageQueryEngine`.
 - Removed unnecessary `sqlalchemy-redshift` dependency in `drivers-sql-amazon-redshift` extra.
 - Removed unnecessary `transformers` dependency in `drivers-prompt-huggingface` extra.
 - Removed unnecessary `huggingface-hub` dependency in `drivers-prompt-huggingface-pipeline` extra.

diff --git a/docs/griptape-framework/data/index.md b/docs/griptape-framework/data/index.md
@@ -18,8 +18,6 @@ Griptape provides several abstractions for working with data.
 
 [Extraction Engines](../engines/extraction-engines.md) are used for extracting structured content.
 
-[Image Query Engines](../engines/image-query-engines.md) are used for querying images with text.
-
 [Image Generation Engines](../engines/image-generation-engines.md) are used for generating images.
 
 [Summary Engines](../engines/summary-engines.md) are used for summarizing text content.

diff --git a/docs/griptape-framework/drivers/image-query-drivers.md b/docs/griptape-framework/drivers/image-query-drivers.md
diff --git a/docs/griptape-framework/engines/image-query-engines.md b/docs/griptape-framework/engines/image-query-engines.md
diff --git a/docs/griptape-framework/structures/config.md b/docs/griptape-framework/structures/config.md
@@ -41,11 +41,7 @@ agent = Agent(
     config=AzureOpenAiStructureConfig(
         azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT_3"],
         api_key=os.environ["AZURE_OPENAI_API_KEY_3"]
-    ).merge_config({
-        "image_query_driver": {
-            "azure_deployment": "gpt-4o",
-        },
-    }),
+    )
 )
 ```
 

diff --git a/docs/griptape-framework/structures/tasks.md b/docs/griptape-framework/structures/tasks.md
@@ -655,48 +655,6 @@ pipeline.add_task(
 pipeline.run("An image of a mountain shrouded by clouds")
 ```
 
-## Image Query Task
-
-The [Image Query Task](../../reference/griptape/tasks/image_query_task.md) performs a natural language query on one or more input images. This Task uses an [Image Query Engine](../engines/image-query-engines.md) configured with an [Image Query Driver](../drivers/image-query-drivers.md) to perform the query. The functionality provided by this Task depend on the capabilities of the model provided by the Driver.
-
-This Task accepts two inputs: a query (represented by either a string or a [Text Artifact](../data/artifacts.md#textartifact)) and a list of [Image Artifacts](../data/artifacts.md#imageartifact) or a Callable returning these two values.
-
-```python
-from griptape.engines import ImageQueryEngine
-from griptape.drivers import OpenAiImageQueryDriver
-from griptape.tasks import ImageQueryTask
-from griptape.loaders import ImageLoader
-from griptape.structures import Pipeline
-
-# Create a driver configured to use OpenAI's GPT-4 Vision model.
-driver = OpenAiImageQueryDriver(
-    model="gpt-4o",
-    max_tokens=100,
-)
-
-# Create an engine configured to use the driver.
-engine = ImageQueryEngine(
-    image_query_driver=driver,
-)
-
-# Load the input image artifact.
-with open("tests/resources/mountain.png", "rb") as f:
-    image_artifact = ImageLoader().load(f.read())
-
-# Instantiate a pipeline.
-pipeline = Pipeline()
-
-# Add an ImageQueryTask to the pipeline.
-pipeline.add_task(
-    ImageQueryTask(
-        input=("{{ args[0] }}", [image_artifact]),
-        image_query_engine=engine,
-    )
-)
-
-pipeline.run("Describe the weather in the image")
-```
-
 ## Structure Run Task
 The [Structure Run Task](../../reference/griptape/tasks/structure_run_task.md) runs another Structure with a given input.
 This Task is useful for orchestrating multiple specialized Structures in a single run. Note that the input to the Task is a tuple of arguments that will be passed to the Structure.

diff --git a/docs/griptape-tools/official-tools/image-query-client.md b/docs/griptape-tools/official-tools/image-query-client.md
@@ -5,22 +5,17 @@ This tool allows Agents to execute natural language queries on the contents of i
 ```python
 from griptape.structures import Agent
 from griptape.tools import ImageQueryClient
-from griptape.drivers import OpenAiImageQueryDriver
+from griptape.drivers import OpenAiChatPromptDriver
 from griptape.engines import ImageQueryEngine
 
 # Create an Image Query Driver.
 driver = OpenAiImageQueryDriver(
     model="gpt-4o"
 )
 
-# Create an Image Query Engine configured to use the driver.
-engine = ImageQueryEngine(
-    image_query_driver=driver,
-)
-
 # Create an Image Query Client configured to use the engine.
 tool = ImageQueryClient(
-    image_query_engine=engine,
+    prompt_driver=engine,
 )
 
 # Create an agent and provide the tool to it.

diff --git a/griptape/common/prompt_stack/prompt_stack.py b/griptape/common/prompt_stack/prompt_stack.py
@@ -22,7 +22,7 @@
 
 @define
 class PromptStack(SerializableMixin):
-    messages: list[Message] = field(factory=list, kw_only=True, metadata={"serializable": True})
+    messages: list[Message] = field(factory=list, metadata={"serializable": True})
     tools: list[BaseTool] = field(factory=list, kw_only=True)
 
     @property

diff --git a/griptape/config/amazon_bedrock_structure_config.py b/griptape/config/amazon_bedrock_structure_config.py
@@ -7,14 +7,12 @@
 from griptape.config import StructureConfig
 from griptape.drivers import (
     AmazonBedrockImageGenerationDriver,
-    AmazonBedrockImageQueryDriver,
     AmazonBedrockPromptDriver,
     AmazonBedrockTitanEmbeddingDriver,
     BaseEmbeddingDriver,
     BaseImageGenerationDriver,
     BasePromptDriver,
     BaseVectorStoreDriver,
-    BedrockClaudeImageQueryModelDriver,
     BedrockTitanImageGenerationModelDriver,
     LocalVectorStoreDriver,
 )
@@ -63,18 +61,6 @@ class AmazonBedrockStructureConfig(StructureConfig):
         kw_only=True,
         metadata={"serializable": True},
     )
-    image_query_driver: BaseImageGenerationDriver = field(
-        default=Factory(
-            lambda self: AmazonBedrockImageQueryDriver(
-                session=self.session,
-                model="anthropic.claude-3-5-sonnet-20240620-v1:0",
-                image_query_model_driver=BedrockClaudeImageQueryModelDriver(),
-            ),
-            takes_self=True,
-        ),
-        kw_only=True,
-        metadata={"serializable": True},
-    )
     vector_store_driver: BaseVectorStoreDriver = field(
         default=Factory(lambda self: LocalVectorStoreDriver(embedding_driver=self.embedding_driver), takes_self=True),
         kw_only=True,

diff --git a/griptape/config/anthropic_structure_config.py b/griptape/config/anthropic_structure_config.py
@@ -2,10 +2,8 @@
 
 from griptape.config import StructureConfig
 from griptape.drivers import (
-    AnthropicImageQueryDriver,
     AnthropicPromptDriver,
     BaseEmbeddingDriver,
-    BaseImageQueryDriver,
     BasePromptDriver,
     BaseVectorStoreDriver,
     LocalVectorStoreDriver,
@@ -32,8 +30,3 @@ class AnthropicStructureConfig(StructureConfig):
         kw_only=True,
         metadata={"serializable": True},
     )
-    image_query_driver: BaseImageQueryDriver = field(
-        default=Factory(lambda: AnthropicImageQueryDriver(model="claude-3-5-sonnet-20240620")),
-        kw_only=True,
-        metadata={"serializable": True},
-    )