griptape-ai · collindutter · Nov 13, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 13, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - `TrafilaturaWebScraperDriver.no_ssl` parameter to disable SSL verification. Defaults to `False`.
 - `CsvExtractionEngine.format_header` parameter to format the header row.
+- `PromptStack.from_artifact` factory method for creating a Prompt Stack with a user message from an Artifact.
 
 ### Changed
 
@@ -20,6 +21,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - **BREAKING**: Renamed `Structure.is_executing()` to `Structure.is_running()`.
 - **BREAKING**: Removed ability to pass bytes to `BaseFileLoader.fetch`.
 - **BREAKING**: Updated `CsvExtractionEngine.format_row` to format rows as comma-separated values instead of newline-separated key-value pairs.
+- **BREAKING**: Removed all `ImageQueryDriver`s, use `PromptDriver`s instead.
+- **BREAKING**: Removed `ImageQueryTask`, use `PromptTask` instead.
+- **BREAKING**: Updated `ImageQueryTool.image_query_driver` to `ImageQueryTool.prompt_driver`.
+- `BasePromptDriver.run` can now accept an Artifact in addition to a Prompt Stack.
 - Improved `CsvExtractionEngine` prompts.
 - Tweaked `PromptResponseRagModule` system prompt to yield answers more consistently.
 - Removed `azure-core` and `azure-storage-blob` dependencies.

diff --git a/MIGRATION.md b/MIGRATION.md
@@ -22,9 +22,9 @@ loader = TextLoader()
 data = loader.parse(b"data")
 ```
 
-### Removed `ImageQueryEngine`
+### Removed `ImageQueryEngine`, `ImageQueryDriver`
 
-`ImageQueryEngine` has been removed. Use `ImageQueryDriver` instead.
+`ImageQueryEngine` has been removed. Use `PromptDriver` instead.
 
 #### Before
 
@@ -45,15 +45,15 @@ engine.run("Describe the weather in the image", [image_artifact])`
 #### After
 
 ```python
-from griptape.drivers import OpenAiImageQueryDriver
-from griptape.engines import ImageQueryEngine
+from griptape.artifacts import ListArtifact, TextArtifact
+from griptape.drivers import OpenAiChatPromptDriver
 from griptape.loaders import ImageLoader
 
-driver = OpenAiImageQueryDriver(model="gpt-4o", max_tokens=256)
+driver = OpenAiChatPromptDriver(model="gpt-4o", max_tokens=256)
 
-image_artifact = ImageLoader().load("mountain.png")
+image_artifact = ImageLoader().load("./assets/mountain.jpg")
 
-driver.query("Describe the weather in the image", [image_artifact])`
+driver.run(ListArtifact([TextArtifact("Describe the weather in the image"), image_artifact]))
 ```
 
 ### Removed `InpaintingImageGenerationEngine`
@@ -209,6 +209,50 @@ driver.run_text_to_image(
 )
 ```
 
+### Removed `ImageQueryTask`, use `PromptTask` instead
+
+`ImageQueryTask` has been removed. Use `PromptTask` instead.
+
+#### Before
+
+```python
+from griptape.loaders import ImageLoader
+from griptape.structures import Pipeline
+from griptape.tasks import ImageQueryTask
+
+image_artifact = ImageLoader().load("mountain.png")
+
+pipeline = Pipeline(
+    tasks=[
+        ImageQueryTask(
+            input=("Describe the weather in the image", [image_artifact]),
+        )
+    ]
+)
+
+pipeline.run("Describe the weather in the image")
+```
+
+#### After
+
+```python
+from griptape.loaders import ImageLoader
+from griptape.structures import Pipeline
+from griptape.tasks import PromptTask
+
+image_artifact = ImageLoader().load("mountain.png")
+
+pipeline = Pipeline(
+    tasks=[
+        PromptTask(
+            input=("Describe the weather in the image", image_artifact),
+        )
+    ]
+)
+
+pipeline.run("Describe the weather in the image")
+```
+
 ## 0.33.X to 0.34.X
 
 ### `AnthropicDriversConfig` Embedding Driver

diff --git a/README.md b/README.md
@@ -36,11 +36,10 @@ Tools provide capabilities for LLMs to interact with data and services. Griptape
 
 Drivers facilitate interactions with external resources and services:
 
-- 🗣️ **Prompt Drivers** manage textual interactions with LLMs.
+- 🗣️ **Prompt Drivers** manage textual and image interactions with LLMs.
 - 🔢 **Embedding Drivers** generate vector embeddings from textual inputs.
 - 💾 **Vector Store Drivers** manage the storage and retrieval of embeddings.
 - 🎨 **Image Generation Drivers** create images from text descriptions.
-- 🔎 **Image Query Drivers** query images from text queries.
 - 💼 **SQL Drivers** interact with SQL databases.
 - 🌐 **Web Scraper Drivers** extract information from web pages.
 - 🧠 **Conversation Memory Drivers** manage the storage and retrieval of conversational data.

diff --git a/docs/griptape-framework/drivers/image-query-drivers.md b/docs/griptape-framework/drivers/image-query-drivers.md
diff --git a/docs/griptape-framework/drivers/prompt-drivers.md b/docs/griptape-framework/drivers/prompt-drivers.md
@@ -19,6 +19,12 @@ Or use them independently:
 --8<-- "docs/griptape-framework/drivers/src/prompt_drivers_2.py"
 ```
 
+You can pass images to the Driver if the model supports it:
+
+```python
+--8<-- "docs/griptape-framework/drivers/src/prompt_driver_images.py"
+```
+
 ## Prompt Drivers
 
 Griptape offers the following Prompt Drivers for interacting with LLMs.

diff --git a/docs/griptape-framework/drivers/src/image_query_drivers_1.py b/docs/griptape-framework/drivers/src/image_query_drivers_1.py
diff --git a/docs/griptape-framework/drivers/src/image_query_drivers_2.py b/docs/griptape-framework/drivers/src/image_query_drivers_2.py
diff --git a/docs/griptape-framework/drivers/src/image_query_drivers_3.py b/docs/griptape-framework/drivers/src/image_query_drivers_3.py
diff --git a/docs/griptape-framework/drivers/src/image_query_drivers_4.py b/docs/griptape-framework/drivers/src/image_query_drivers_4.py
diff --git a/docs/griptape-framework/drivers/src/image_query_drivers_5.py b/docs/griptape-framework/drivers/src/image_query_drivers_5.py
diff --git a/docs/griptape-framework/drivers/src/prompt_drivers_images.py b/docs/griptape-framework/drivers/src/prompt_drivers_images.py
@@ -0,0 +1,10 @@
+from griptape.artifacts import ListArtifact, TextArtifact
+from griptape.drivers import OpenAiChatPromptDriver
+from griptape.loaders import ImageLoader
+
+driver = OpenAiChatPromptDriver(model="gpt-4o", max_tokens=256)
+
+image_artifact = ImageLoader().load("./tests/resources/mountain.jpg")
+text_artifact = TextArtifact("Describe the weather in the image")
+
+driver.run(ListArtifact([text_artifact, image_artifact]))
diff --git a/docs/griptape-framework/structures/src/tasks_15.py b/docs/griptape-framework/structures/src/tasks_15.py
diff --git a/docs/griptape-framework/structures/task-memory.md b/docs/griptape-framework/structures/task-memory.md
@@ -279,6 +279,7 @@ Today, these include:
 - [ExtractionTool](../../griptape-tools/official-tools/extraction-tool.md)
 - [RagClient](../../griptape-tools/official-tools/rag-tool.md)
 - [FileManagerTool](../../griptape-tools/official-tools/file-manager-tool.md)
+- [ImageQueryTool](../../griptape-tools/official-tools/image-query-tool.md)
 
 ## Task Memory Considerations
 

diff --git a/docs/griptape-framework/structures/tasks.md b/docs/griptape-framework/structures/tasks.md
@@ -362,16 +362,6 @@ The [Outpainting Image Generation Task](../../reference/griptape/tasks/outpainti
 --8<-- "docs/griptape-framework/structures/src/tasks_14.py"
 ```
 
-## Image Query Task
-
-The [Image Query Task](../../reference/griptape/tasks/image_query_task.md) performs a natural language query on one or more input images. This Task uses an [Image Query Driver](../drivers/image-query-drivers.md) to perform the query. The functionality provided by this Task depend on the capabilities of the model provided by the Driver.
-
-This Task accepts two inputs: a query (represented by either a string or a [Text Artifact](../data/artifacts.md#text)) and a list of [Image Artifacts](../data/artifacts.md#image) or a Callable returning these two values.
-
-```python
---8<-- "docs/griptape-framework/structures/src/tasks_15.py"
-```
-
 ## Structure Run Task
 
 The [Structure Run Task](../../reference/griptape/tasks/structure_run_task.md) runs another Structure with a given input.

diff --git a/docs/griptape-tools/official-tools/src/image_query_tool_1.py b/docs/griptape-tools/official-tools/src/image_query_tool_1.py
@@ -1,13 +1,12 @@
-from griptape.drivers import OpenAiImageQueryDriver
+from griptape.drivers import OpenAiChatPromptDriver
 from griptape.structures import Agent
 from griptape.tools import ImageQueryTool
 
-# Create an Image Query Driver.
-driver = OpenAiImageQueryDriver(model="gpt-4o")
+driver = OpenAiChatPromptDriver(model="gpt-4o")
 
 # Create an Image Query Tool configured to use the engine.
 tool = ImageQueryTool(
-    image_query_driver=driver,
+    prompt_driver=driver,
 )
 
 # Create an agent and provide the tool to it.

diff --git a/griptape/common/prompt_stack/prompt_stack.py b/griptape/common/prompt_stack/prompt_stack.py
@@ -60,6 +60,13 @@ def add_user_message(self, artifact: str | BaseArtifact) -> Message:
     def add_assistant_message(self, artifact: str | BaseArtifact) -> Message:
         return self.add_message(artifact, Message.ASSISTANT_ROLE)
 
+    @classmethod
+    def from_artifact(cls, artifact: BaseArtifact) -> PromptStack:
+        prompt_stack = cls()
+        prompt_stack.add_user_message(artifact)
+
+        return prompt_stack
+
     def __to_message_content(self, artifact: str | BaseArtifact) -> list[BaseMessageContent]:
         if isinstance(artifact, str):
             return [TextMessageContent(TextArtifact(artifact))]

diff --git a/griptape/configs/drivers/amazon_bedrock_drivers_config.py b/griptape/configs/drivers/amazon_bedrock_drivers_config.py
@@ -7,10 +7,8 @@
 from griptape.configs.drivers import DriversConfig
 from griptape.drivers import (
     AmazonBedrockImageGenerationDriver,
-    AmazonBedrockImageQueryDriver,
     AmazonBedrockPromptDriver,
     AmazonBedrockTitanEmbeddingDriver,
-    BedrockClaudeImageQueryModelDriver,
     BedrockTitanImageGenerationModelDriver,
     LocalVectorStoreDriver,
 )
@@ -45,14 +43,6 @@ def image_generation_driver(self) -> AmazonBedrockImageGenerationDriver:
             image_generation_model_driver=BedrockTitanImageGenerationModelDriver(),
         )
 
-    @lazy_property()
-    def image_query_driver(self) -> AmazonBedrockImageQueryDriver:
-        return AmazonBedrockImageQueryDriver(
-            session=self.session,
-            model="anthropic.claude-3-5-sonnet-20240620-v1:0",
-            image_query_model_driver=BedrockClaudeImageQueryModelDriver(),
-        )
-
     @lazy_property()
     def vector_store_driver(self) -> LocalVectorStoreDriver:
         return LocalVectorStoreDriver(