From 57fa0f75d5a89429b4fd67dd18eb64884a238dd3 Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Thu, 5 Sep 2024 13:26:13 -0700 Subject: [PATCH] Artifacts are just data --- CHANGELOG.md | 5 +- MIGRATION.md | 19 ------- docs/griptape-framework/data/artifacts.md | 50 +++++++++---------- griptape/artifacts/__init__.py | 14 +++--- griptape/artifacts/base_system_artifact.py | 10 ---- griptape/artifacts/blob_artifact.py | 4 ++ griptape/artifacts/boolean_artifact.py | 17 +++++-- griptape/artifacts/csv_row_artifact.py | 34 ------------- griptape/artifacts/error_artifact.py | 7 ++- griptape/artifacts/image_artifact.py | 4 +- griptape/artifacts/info_artifact.py | 7 ++- griptape/artifacts/list_artifact.py | 4 +- .../contents/text_message_content.py | 4 +- griptape/common/prompt_stack/prompt_stack.py | 6 +-- griptape/mixins/__init__.py | 2 +- ...mixin.py => artifact_file_output_mixin.py} | 0 tests/unit/artifacts/test_csv_row_artifact.py | 30 ----------- 17 files changed, 67 insertions(+), 150 deletions(-) delete mode 100644 griptape/artifacts/base_system_artifact.py delete mode 100644 griptape/artifacts/csv_row_artifact.py rename griptape/mixins/{media_artifact_file_output_mixin.py => artifact_file_output_mixin.py} (100%) delete mode 100644 tests/unit/artifacts/test_csv_row_artifact.py diff --git a/CHANGELOG.md b/CHANGELOG.md index e5de54e3be..ccecb0768e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - **BREAKING**: Removed `MediaArtifact`, use `ImageArtifact` or `AudioArtifact` instead. -- **BREAKING**: Removed `BooleanArtifact`, use `JsonArtifact` instead. - **BREAKING**: Removed `CsvRowArtifact`. - **BREAKING**: `CsvLoader`, `DataframeLoader`, and `SqlLoader` now return `list[TextArtifact]`. - **BREAKING**: Removed `ImageArtifact.media_type`. @@ -20,8 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **BREAKING**: Moved `ImageArtifact.prompt` and `ImageArtifact.model` into `ImageArtifact.meta`. - **BREAKING**: `ImageArtifact.to_text()` now returns the base64 encoded image. - Updated `JsonArtifact` value converter to properly handle more types. -- `AudioArtifact` now subclasses `BaseArtifact` instead of `MediaArtifact`. -- `ImageArtifact` now subclasses `BaseArtifact` instead of `MediaArtifact`. +- `AudioArtifact` now subclasses `BlobArtifact` instead of `MediaArtifact`. +- `ImageArtifact` now subclasses `BlobArtifact` instead of `MediaArtifact`. - Passing a dictionary as the value to `TextArtifact` will convert to a key-value formatted string. - Removed `__add__` method from `BaseArtifact`, implemented it where necessary. diff --git a/MIGRATION.md b/MIGRATION.md index 39932d4fac..ddf9b9c003 100644 --- a/MIGRATION.md +++ b/MIGRATION.md @@ -36,25 +36,6 @@ audio_artifact = AudioArtifact( ) ``` -### Removed `BooleanArtifact` - -`BooleanArtifact` has been removed. Use `JsonArtifact` instead. - -#### Before - -```python -boolean_artifact = BooleanArtifact("true") - -print(boolean_artifact.value) # Value is True -``` - -#### After -```python -json_artifact = JsonArtifact("true") - -print(json_artifact.value) # Value is True -``` - ### Removed `CsvRowArtifact` `CsvRowArtifact` has been removed. Use `TextArtifact` instead. diff --git a/docs/griptape-framework/data/artifacts.md b/docs/griptape-framework/data/artifacts.md index b906367319..2edd1ebec2 100644 --- a/docs/griptape-framework/data/artifacts.md +++ b/docs/griptape-framework/data/artifacts.md @@ -5,54 +5,50 @@ search: ## Overview - -**[Artifacts](../../reference/griptape/artifacts/base_artifact.md)** are used to store data that can be provided as input to or received as output from a Language Learning Model (LLM). +**[Artifacts](../../reference/griptape/artifacts/base_artifact.md)** are the core data structure in Griptape. They are used to encapsulate data and enhance it with metadata. ## Text -[TextArtifact](../../reference/griptape/artifacts/text_artifact.md)s store textual data. They can be used to count tokens using the [token_count()](../../reference/griptape/artifacts/text_artifact.md#griptape.artifacts.text_artifact.TextArtifact.token_count) method with a tokenizer, generate a text embedding through the [generate_embedding()](../../reference/griptape/artifacts/text_artifact.md#griptape.artifacts.text_artifact.TextArtifact.generate_embedding) method, and access the embedding with the [embedding](../../reference/griptape/artifacts/text_artifact.md#griptape.artifacts.text_artifact.TextArtifact.embedding) property. +[TextArtifact](../../reference/griptape/artifacts/text_artifact.md)s store textual data. They offer methods such as [token_count()](../../reference/griptape/artifacts/text_artifact.md#griptape.artifacts.text_artifact.TextArtifact.token_count) for counting tokens with a tokenizer, and [generate_embedding()](../../reference/griptape/artifacts/text_artifact.md#griptape.artifacts.text_artifact.TextArtifact.generate_embedding) for creating text embeddings. You can also access the embedding via the [embedding](../../reference/griptape/artifacts/text_artifact.md#griptape.artifacts.text_artifact.TextArtifact.embedding) property. -[TaskMemory](../../reference/griptape/memory/task/task_memory.md) automatically stores `TextArtifacts` returned by tool activities and provides their IDs back to the LLM. +When `TextArtifact`s are returned from Tools, they will be stored in [Task Memory](../../griptape-framework/structures/task-memory.md) if the Tool has set `off_prompt=True`. -## Image +## Blob -[ImageArtifact](../../reference/griptape/artifacts/image_artifact.md)s store image data. They include binary image data and metadata such as MIME type, dimensions, and prompt and model information for images returned by [image generation drivers](../drivers/image-generation-drivers.md). They inherit functionality from [BlobArtifacts](#blob). +[BlobArtifact](../../reference/griptape/artifacts/blob_artifact.md)s store binary large objects (blobs). -## Audio +When `BlobArtifact`s are returned from Tools, they will be stored in [Task Memory](../../griptape-framework/structures/task-memory.md) if the Tool has set `off_prompt=True`. -[AudioArtifact](../../reference/griptape/artifacts/audio_artifact.md)s store audio content, including binary audio data and metadata such as format, duration, and prompt and model information for audio returned by generative models. They inherit from [BlobArtifacts](#blob). +### Image -## Action +[ImageArtifact](../../reference/griptape/artifacts/image_artifact.md)s store image data. This includes binary image data along with metadata such as MIME type and dimensions. They are a subclass of [BlobArtifacts](#blob). -[ActionArtifact](../../reference/griptape/artifacts/action_artifact.md)s represent actions taken by the LLM. Currently, the only supported action is [ToolAction](../../reference/griptape/common/actions/tool_action.md), which is used to execute a [Tool](../../griptape-framework/tools/index.md). +### Audio -## JSON +[AudioArtifact](../../reference/griptape/artifacts/audio_artifact.md)s store audio content. This includes binary audio data and metadata such as format, and duration. They are a subclass of [BlobArtifacts](#blob). -[JsonArtifact](../../reference/griptape/artifacts/json_artifact.md)s store JSON-serializable data. Any data assigned to the `value` property is converted using `json.dumps(json.loads(value))`. - -## Generic +## List -[GenericArtifact](../../reference/griptape/artifacts/generic_artifact.md)s act as an escape hatch for passing any type of data that does not fit into any other artifact type. While generally not recommended, they are suitable for specific scenarios. For example, see [talking to a video](../../examples/talk-to-a-video.md), which demonstrates using a `GenericArtifact` to pass a Gemini-specific video file. +[ListArtifact](../../reference/griptape/artifacts/list_artifact.md)s store lists of Artifacts. -## System Artifacts +When `ListArtifact`s are returned from Tools, their elements will be stored in [Task Memory](../../griptape-framework/structures/task-memory.md) if the element is either a `TextArtifact` or a `BlobArtifact` and the Tool has set `off_prompt=True`. -These Artifacts don't map to an LLM modality. They must be transformed in some way before they can be used as LLM input. +## Info -### Blob +[InfoArtifact](../../reference/griptape/artifacts/info_artifact.md)s store small pieces of textual information. These are useful for conveying messages about the execution or results of an operation, such as "No results found" or "Operation completed successfully." -[BlobArtifact](../../reference/griptape/artifacts/blob_artifact.md)s store binary large objects (blobs) and are used to pass unstructured data back to the LLM via [InfoArtifact](#info). - -`TaskMemory` automatically stores `BlobArtifacts` returned by tool activities, allowing them to be reused by other tools. +## JSON -### Info +[JsonArtifact](../../reference/griptape/artifacts/json_artifact.md)s store JSON-serializable data. Any data assigned to the `value` property is processed using `json.dumps(json.loads(value))`. -[InfoArtifact](../../reference/griptape/artifacts/info_artifact.md)s store short notifications that are passed back to the LLM without being stored in Task Memory. +## Error -### Error +[ErrorArtifact](../../reference/griptape/artifacts/error_artifact.md)s store exception information, providing a structured way to convey errors. -[ErrorArtifact](../../reference/griptape/artifacts/error_artifact.md)s store errors that are passed back to the LLM without being stored in Task Memory. +## Action -### List +[ActionArtifact](../../reference/griptape/artifacts/action_artifact.md)s represent actions taken by an LLM. Currently, the only supported action type is [ToolAction](../../reference/griptape/common/actions/tool_action.md), which is used to execute a [Tool](../../griptape-framework/tools/index.md). -[ListArtifact](../../reference/griptape/artifacts/list_artifact.md)s store lists of Artifacts that can be passed to the LLM. +## Generic +[GenericArtifact](../../reference/griptape/artifacts/generic_artifact.md)s provide a flexible way to pass data that does not fit into any other artifact category. While not generally recommended, they can be useful for specific use cases. For instance, see [talking to a video](../../examples/talk-to-a-video.md), which demonstrates using a `GenericArtifact` to pass a Gemini-specific video file. diff --git a/griptape/artifacts/__init__.py b/griptape/artifacts/__init__.py index a647d05782..0e58a8a764 100644 --- a/griptape/artifacts/__init__.py +++ b/griptape/artifacts/__init__.py @@ -1,27 +1,25 @@ from .base_artifact import BaseArtifact -from .base_system_artifact import BaseSystemArtifact - +from .error_artifact import ErrorArtifact +from .info_artifact import InfoArtifact from .text_artifact import TextArtifact +from .json_artifact import JsonArtifact from .blob_artifact import BlobArtifact +from .boolean_artifact import BooleanArtifact +from .list_artifact import ListArtifact from .image_artifact import ImageArtifact from .audio_artifact import AudioArtifact -from .json_artifact import JsonArtifact from .action_artifact import ActionArtifact from .generic_artifact import GenericArtifact -from .error_artifact import ErrorArtifact -from .info_artifact import InfoArtifact -from .list_artifact import ListArtifact - __all__ = [ "BaseArtifact", - "BaseSystemArtifact", "ErrorArtifact", "InfoArtifact", "TextArtifact", "JsonArtifact", "BlobArtifact", + "BooleanArtifact", "ListArtifact", "ImageArtifact", "AudioArtifact", diff --git a/griptape/artifacts/base_system_artifact.py b/griptape/artifacts/base_system_artifact.py deleted file mode 100644 index e5e7bfab6a..0000000000 --- a/griptape/artifacts/base_system_artifact.py +++ /dev/null @@ -1,10 +0,0 @@ -from abc import ABC - -from griptape.artifacts import BaseArtifact - - -class BaseSystemArtifact(BaseArtifact, ABC): - """Serves as the base class for all Artifacts specific to Griptape.""" - - def to_text(self) -> str: - return self.value diff --git a/griptape/artifacts/blob_artifact.py b/griptape/artifacts/blob_artifact.py index 072b6cb5b9..9fe729a63b 100644 --- a/griptape/artifacts/blob_artifact.py +++ b/griptape/artifacts/blob_artifact.py @@ -28,6 +28,10 @@ class BlobArtifact(BaseArtifact): encoding: str = field(default="utf-8", kw_only=True) encoding_error_handler: str = field(default="strict", kw_only=True) + @property + def mime_type(self) -> str: + return "application/octet-stream" + def to_bytes(self) -> bytes: return self.value diff --git a/griptape/artifacts/boolean_artifact.py b/griptape/artifacts/boolean_artifact.py index 5bcdfac9ba..dd8bb1504b 100644 --- a/griptape/artifacts/boolean_artifact.py +++ b/griptape/artifacts/boolean_artifact.py @@ -9,17 +9,23 @@ @define class BooleanArtifact(BaseArtifact): + """Stores a boolean value. + + Attributes: + value: The boolean value. + """ + value: bool = field(converter=bool, metadata={"serializable": True}) @classmethod - def parse_bool(cls, value: Union[str, bool]) -> BooleanArtifact: # noqa: FBT001 - """Convert a string literal or bool to a BooleanArtifact. The string must be either "true" or "false" with any casing.""" + def parse_bool(cls, value: Union[str, bool]) -> BooleanArtifact: + """Convert a string literal or bool to a BooleanArtifact. The string must be either "true" or "false".""" if value is not None: if isinstance(value, str): if value.lower() == "true": - return BooleanArtifact(True) # noqa: FBT003 + return BooleanArtifact(value=True) elif value.lower() == "false": - return BooleanArtifact(False) # noqa: FBT003 + return BooleanArtifact(value=False) elif isinstance(value, bool): return BooleanArtifact(value) raise ValueError(f"Cannot convert '{value}' to BooleanArtifact") @@ -29,3 +35,6 @@ def __add__(self, other: BaseArtifact) -> BooleanArtifact: def __eq__(self, value: object) -> bool: return self.value is value + + def to_text(self) -> str: + return str(self.value).lower() diff --git a/griptape/artifacts/csv_row_artifact.py b/griptape/artifacts/csv_row_artifact.py deleted file mode 100644 index 00f1047fcc..0000000000 --- a/griptape/artifacts/csv_row_artifact.py +++ /dev/null @@ -1,34 +0,0 @@ -from __future__ import annotations - -import csv -import io - -from attrs import define, field - -from griptape.artifacts import BaseArtifact, TextArtifact - - -@define -class CsvRowArtifact(TextArtifact): - value: dict[str, str] = field(converter=BaseArtifact.value_to_dict, metadata={"serializable": True}) - delimiter: str = field(default=",", kw_only=True, metadata={"serializable": True}) - - def __add__(self, other: BaseArtifact) -> CsvRowArtifact: - return CsvRowArtifact(self.value | other.value) - - def __bool__(self) -> bool: - return len(self) > 0 - - def to_text(self) -> str: - with io.StringIO() as csvfile: - writer = csv.DictWriter( - csvfile, - fieldnames=self.value.keys(), - quoting=csv.QUOTE_MINIMAL, - delimiter=self.delimiter, - ) - - writer.writeheader() - writer.writerow(self.value) - - return csvfile.getvalue().strip() diff --git a/griptape/artifacts/error_artifact.py b/griptape/artifacts/error_artifact.py index 086cdb8935..27e6a37ab0 100644 --- a/griptape/artifacts/error_artifact.py +++ b/griptape/artifacts/error_artifact.py @@ -4,11 +4,11 @@ from attrs import define, field -from griptape.artifacts import BaseSystemArtifact +from griptape.artifacts import BaseArtifact @define -class ErrorArtifact(BaseSystemArtifact): +class ErrorArtifact(BaseArtifact): """Represents an error that may want to be conveyed to the LLM. Attributes: @@ -18,3 +18,6 @@ class ErrorArtifact(BaseSystemArtifact): value: str = field(converter=str, metadata={"serializable": True}) exception: Optional[Exception] = field(default=None, kw_only=True, metadata={"serializable": False}) + + def to_text(self) -> str: + return self.value diff --git a/griptape/artifacts/image_artifact.py b/griptape/artifacts/image_artifact.py index 28de3b1c92..5a90ae403b 100644 --- a/griptape/artifacts/image_artifact.py +++ b/griptape/artifacts/image_artifact.py @@ -4,11 +4,11 @@ from attrs import define, field -from griptape.artifacts import BaseArtifact +from griptape.artifacts import BlobArtifact @define -class ImageArtifact(BaseArtifact): +class ImageArtifact(BlobArtifact): """Stores image data. Attributes: diff --git a/griptape/artifacts/info_artifact.py b/griptape/artifacts/info_artifact.py index 66fe94fc90..3391554e92 100644 --- a/griptape/artifacts/info_artifact.py +++ b/griptape/artifacts/info_artifact.py @@ -2,11 +2,11 @@ from attrs import define, field -from griptape.artifacts import BaseSystemArtifact +from griptape.artifacts import BaseArtifact @define -class InfoArtifact(BaseSystemArtifact): +class InfoArtifact(BaseArtifact): """Represents helpful info that can be conveyed to the LLM. For example, "No results found" or "Please try again.". @@ -16,3 +16,6 @@ class InfoArtifact(BaseSystemArtifact): """ value: str = field(converter=str, metadata={"serializable": True}) + + def to_text(self) -> str: + return self.value diff --git a/griptape/artifacts/list_artifact.py b/griptape/artifacts/list_artifact.py index 00df837898..ac11d95ccb 100644 --- a/griptape/artifacts/list_artifact.py +++ b/griptape/artifacts/list_artifact.py @@ -4,14 +4,14 @@ from attrs import Attribute, define, field -from griptape.artifacts import BaseArtifact, BaseSystemArtifact +from griptape.artifacts import BaseArtifact if TYPE_CHECKING: from collections.abc import Sequence @define -class ListArtifact(BaseSystemArtifact): +class ListArtifact(BaseArtifact): value: Sequence[BaseArtifact] = field(factory=list, metadata={"serializable": True}) item_separator: str = field(default="\n\n", kw_only=True, metadata={"serializable": True}) validate_uniform_types: bool = field(default=False, kw_only=True, metadata={"serializable": True}) diff --git a/griptape/common/prompt_stack/contents/text_message_content.py b/griptape/common/prompt_stack/contents/text_message_content.py index c862564f3f..39e678f28d 100644 --- a/griptape/common/prompt_stack/contents/text_message_content.py +++ b/griptape/common/prompt_stack/contents/text_message_content.py @@ -4,7 +4,7 @@ from attrs import define, field -from griptape.artifacts import TextArtifact +from griptape.artifacts import BaseArtifact, TextArtifact from griptape.common import BaseDeltaMessageContent, BaseMessageContent, TextDeltaMessageContent if TYPE_CHECKING: @@ -13,7 +13,7 @@ @define class TextMessageContent(BaseMessageContent): - artifact: TextArtifact = field(metadata={"serializable": True}) + artifact: BaseArtifact = field(metadata={"serializable": True}) @classmethod def from_deltas(cls, deltas: Sequence[BaseDeltaMessageContent]) -> TextMessageContent: diff --git a/griptape/common/prompt_stack/prompt_stack.py b/griptape/common/prompt_stack/prompt_stack.py index c9f71aa209..0bb325c1d1 100644 --- a/griptape/common/prompt_stack/prompt_stack.py +++ b/griptape/common/prompt_stack/prompt_stack.py @@ -7,7 +7,6 @@ from griptape.artifacts import ( ActionArtifact, BaseArtifact, - ErrorArtifact, GenericArtifact, ImageArtifact, ListArtifact, @@ -70,8 +69,6 @@ def __to_message_content(self, artifact: str | BaseArtifact) -> list[BaseMessage return [ImageMessageContent(artifact)] elif isinstance(artifact, GenericArtifact): return [GenericMessageContent(artifact)] - elif isinstance(artifact, ErrorArtifact): - return [TextMessageContent(TextArtifact(artifact.to_text()))] elif isinstance(artifact, ActionArtifact): action = artifact.value output = action.output @@ -81,6 +78,7 @@ def __to_message_content(self, artifact: str | BaseArtifact) -> list[BaseMessage return [ActionResultMessageContent(output, action=action)] elif isinstance(artifact, ListArtifact): processed_contents = [self.__to_message_content(artifact) for artifact in artifact.value] + return [sub_content for processed_content in processed_contents for sub_content in processed_content] else: - raise ValueError(f"Unsupported artifact type: {type(artifact)}") + return [TextMessageContent(TextArtifact(artifact.to_text()))] diff --git a/griptape/mixins/__init__.py b/griptape/mixins/__init__.py index 7bed528121..6cdeecbb7f 100644 --- a/griptape/mixins/__init__.py +++ b/griptape/mixins/__init__.py @@ -3,7 +3,7 @@ from .actions_subtask_origin_mixin import ActionsSubtaskOriginMixin from .rule_mixin import RuleMixin from .serializable_mixin import SerializableMixin -from .media_artifact_file_output_mixin import ArtifactFileOutputMixin +from .artifact_file_output_mixin import ArtifactFileOutputMixin from .futures_executor_mixin import FuturesExecutorMixin from .singleton_mixin import SingletonMixin diff --git a/griptape/mixins/media_artifact_file_output_mixin.py b/griptape/mixins/artifact_file_output_mixin.py similarity index 100% rename from griptape/mixins/media_artifact_file_output_mixin.py rename to griptape/mixins/artifact_file_output_mixin.py diff --git a/tests/unit/artifacts/test_csv_row_artifact.py b/tests/unit/artifacts/test_csv_row_artifact.py deleted file mode 100644 index fe0b8cd64d..0000000000 --- a/tests/unit/artifacts/test_csv_row_artifact.py +++ /dev/null @@ -1,30 +0,0 @@ -from griptape.artifacts import CsvRowArtifact - - -class TestCsvRowArtifact: - def test_value_type_conversion(self): - assert CsvRowArtifact({"foo": "bar"}).value == {"foo": "bar"} - assert CsvRowArtifact({"foo": {"bar": "baz"}}).value == {"foo": {"bar": "baz"}} - assert CsvRowArtifact('{"foo": "bar"}').value == {"foo": "bar"} - - def test___add__(self): - assert (CsvRowArtifact({"test1": "foo"}) + CsvRowArtifact({"test2": "bar"})).value == { - "test1": "foo", - "test2": "bar", - } - - def test_to_text(self): - assert CsvRowArtifact({"test1": "foo|bar", "test2": 1}, delimiter="|").to_text() == 'test1|test2\r\n"foo|bar"|1' - - def test_to_dict(self): - assert CsvRowArtifact({"test1": "foo"}).to_dict()["value"] == {"test1": "foo"} - - def test_name(self): - artifact = CsvRowArtifact({}) - - assert artifact.name == artifact.id - assert CsvRowArtifact({}, name="bar").name == "bar" - - def test___bool__(self): - assert not bool(CsvRowArtifact({})) - assert bool(CsvRowArtifact({"foo": "bar"}))