Skip to content

Commit

Permalink
Standardize loaders interface
Browse files Browse the repository at this point in the history
  • Loading branch information
collindutter committed Sep 4, 2024
1 parent af07071 commit cd88b5c
Show file tree
Hide file tree
Showing 73 changed files with 425 additions and 759 deletions.
4 changes: 3 additions & 1 deletion docs/examples/src/load_query_and_chat_marqo_1.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os

from griptape import utils
from griptape.chunkers import TextChunker
from griptape.drivers import MarqoVectorStoreDriver, OpenAiEmbeddingDriver
from griptape.loaders import WebLoader
from griptape.structures import Agent
Expand All @@ -25,11 +26,12 @@

# Load artifacts from the web
artifacts = WebLoader().load("https://www.griptape.ai")
chunks = TextChunker().chunk(artifacts)

# Upsert the artifacts into the vector store
vector_store.upsert_text_artifacts(
{
namespace: artifacts,
namespace: chunks,
}
)

Expand Down
6 changes: 4 additions & 2 deletions docs/examples/src/query_webpage_1.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import os

from griptape.chunkers import TextChunker
from griptape.drivers import LocalVectorStoreDriver, OpenAiEmbeddingDriver
from griptape.loaders import WebLoader

vector_store = LocalVectorStoreDriver(embedding_driver=OpenAiEmbeddingDriver(api_key=os.environ["OPENAI_API_KEY"]))

artifacts = WebLoader(max_tokens=100).load("https://www.griptape.ai")
artifacts = WebLoader().load("https://www.griptape.ai")
chunks = TextChunker().chunk(artifacts)

for a in artifacts:
for a in chunks:
vector_store.upsert_text_artifact(a, namespace="griptape")

results = vector_store.query("creativity", count=3, namespace="griptape")
Expand Down
7 changes: 4 additions & 3 deletions docs/examples/src/query_webpage_astra_db_1.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from griptape.chunkers import TextChunker
from griptape.drivers import (
AstraDbVectorStoreDriver,
OpenAiChatPromptDriver,
Expand Down Expand Up @@ -43,9 +44,9 @@
),
)

artifacts = WebLoader(max_tokens=256).load(input_blogpost)

vector_store_driver.upsert_text_artifacts({namespace: artifacts})
artifacts = WebLoader().load(input_blogpost)
chunks = TextChunker().chunk(artifacts)
vector_store_driver.upsert_text_artifacts({namespace: chunks})

rag_tool = RagTool(
description="A DataStax blog post",
Expand Down
4 changes: 3 additions & 1 deletion docs/examples/src/talk_to_a_pdf_1.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import requests

from griptape.chunkers import TextChunker
from griptape.drivers import LocalVectorStoreDriver, OpenAiChatPromptDriver, OpenAiEmbeddingDriver
from griptape.engines.rag import RagEngine
from griptape.engines.rag.modules import PromptResponseRagModule, VectorStoreRetrievalRagModule
Expand Down Expand Up @@ -31,8 +32,9 @@
)

artifacts = PdfLoader().load(response.content)
chunks = TextChunker().chunk(artifacts)

vector_store.upsert_text_artifacts({namespace: artifacts})
vector_store.upsert_text_artifacts({namespace: chunks})

agent = Agent(tools=[rag_tool])

Expand Down
4 changes: 3 additions & 1 deletion docs/examples/src/talk_to_a_webpage_1.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from griptape.chunkers import TextChunker
from griptape.drivers import LocalVectorStoreDriver, OpenAiChatPromptDriver, OpenAiEmbeddingDriver
from griptape.engines.rag import RagEngine
from griptape.engines.rag.modules import PromptResponseRagModule, VectorStoreRetrievalRagModule
Expand Down Expand Up @@ -26,8 +27,9 @@
)

artifacts = WebLoader().load("https://en.wikipedia.org/wiki/Physics")
chunks = TextChunker().chunk(artifacts)

vector_store_driver.upsert_text_artifacts({namespace: artifacts})
vector_store_driver.upsert_text_artifacts({namespace: chunks})

rag_tool = RagTool(
description="Contains information about physics. " "Use it to answer any physics-related questions.",
Expand Down
13 changes: 0 additions & 13 deletions docs/griptape-framework/data/loaders.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,6 @@ Can be used to load CSV files into [CsvRowArtifact](../../reference/griptape/art
--8<-- "docs/griptape-framework/data/src/loaders_3.py"
```


## DataFrame

!!! info
This driver requires the `loaders-dataframe` [extra](../index.md#extras).

Can be used to load [pandas](https://pandas.pydata.org/) [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html)s into [CsvRowArtifact](../../reference/griptape/artifacts/csv_row_artifact.md)s:

```python
--8<-- "docs/griptape-framework/data/src/loaders_4.py"
```


## Text

Used to load arbitrary text and text files:
Expand Down
11 changes: 4 additions & 7 deletions docs/griptape-framework/data/src/loaders_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@
from pathlib import Path

from griptape.loaders import PdfLoader
from griptape.utils import load_file, load_files

urllib.request.urlretrieve("https://arxiv.org/pdf/1706.03762.pdf", "attention.pdf")

# Load a single PDF file
PdfLoader().load(Path("attention.pdf").read_bytes())
# You can also use the load_file utility function
PdfLoader().load(load_file("attention.pdf"))
PdfLoader().load("attention.pdf")
# You can also pass a Path object
PdfLoader().load(Path("attention.pdf"))

urllib.request.urlretrieve("https://arxiv.org/pdf/1706.03762.pdf", "CoT.pdf")

# Load multiple PDF files
PdfLoader().load_collection([Path("attention.pdf").read_bytes(), Path("CoT.pdf").read_bytes()])
# You can also use the load_files utility function
PdfLoader().load_collection(list(load_files(["attention.pdf", "CoT.pdf"]).values()))
PdfLoader().load_collection([Path("attention.pdf"), Path("CoT.pdf")])
7 changes: 3 additions & 4 deletions docs/griptape-framework/data/src/loaders_10.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from pathlib import Path

from griptape.loaders import AudioLoader
from griptape.utils import load_file

# Load an image from disk
audio_artifact = AudioLoader().load(Path("tests/resources/sentences.wav").read_bytes())
AudioLoader().load("tests/resources/sentences.wav")

# You can also use the load_file utility function
AudioLoader().load(load_file("tests/resources/sentences.wav"))
# You can also pass a Path object
AudioLoader().load(Path("tests/resources/sentences.wav"))
13 changes: 4 additions & 9 deletions docs/griptape-framework/data/src/loaders_3.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
from pathlib import Path

from griptape.loaders import CsvLoader
from griptape.utils import load_file, load_files

# Load a single CSV file
CsvLoader().load(Path("tests/resources/cities.csv").read_text())
# You can also use the load_file utility function
CsvLoader().load(load_file("tests/resources/cities.csv"))
CsvLoader().load("tests/resources/cities.csv")
# You can also pass a Path object
CsvLoader().load(Path("tests/resources/cities.csv"))

# Load multiple CSV files
CsvLoader().load_collection(
[Path("tests/resources/cities.csv").read_text(), Path("tests/resources/addresses.csv").read_text()]
)
# You can also use the load_files utility function
CsvLoader().load_collection(list(load_files(["tests/resources/cities.csv", "tests/resources/addresses.csv"]).values()))
CsvLoader().load_collection([Path("tests/resources/cities.csv"), "tests/resources/addresses.csv"])
13 changes: 0 additions & 13 deletions docs/griptape-framework/data/src/loaders_4.py

This file was deleted.

8 changes: 4 additions & 4 deletions docs/griptape-framework/data/src/loaders_7.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from pathlib import Path

from griptape.loaders import ImageLoader
from griptape.utils import load_file

# Load an image from disk
disk_image_artifact = ImageLoader().load(Path("tests/resources/mountain.png").read_bytes())
# You can also use the load_file utility function
ImageLoader().load(load_file("tests/resources/mountain.png"))
ImageLoader().load("tests/resources/mountain.png")

# You can also pass a Path object
ImageLoader().load(Path("tests/resources/mountain.png"))
13 changes: 4 additions & 9 deletions docs/griptape-framework/data/src/loaders_8.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
from pathlib import Path

from griptape.loaders import ImageLoader
from griptape.utils import load_file, load_files

# Load a single image in BMP format
image_artifact_jpeg = ImageLoader(format="bmp").load(Path("tests/resources/mountain.png").read_bytes())
# You can also use the load_file utility function
ImageLoader(format="bmp").load(load_file("tests/resources/mountain.png"))
ImageLoader(format="bmp").load("tests/resources/mountain.png")
# You can also pass a Path object
ImageLoader(format="bmp").load(Path("tests/resources/mountain.png"))

# Load multiple images in BMP format
ImageLoader().load_collection(
[Path("tests/resources/mountain.png").read_bytes(), Path("tests/resources/cow.png").read_bytes()]
)
# You can also use the load_files utility function
ImageLoader().load_collection(list(load_files(["tests/resources/mountain.png", "tests/resources/cow.png"]).values()))
ImageLoader().load_collection([Path("tests/resources/mountain.png"), "tests/resources/cow.png"])
6 changes: 4 additions & 2 deletions docs/griptape-framework/drivers/src/vector_store_drivers_1.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from griptape.chunkers import TextChunker
from griptape.drivers import LocalVectorStoreDriver, OpenAiEmbeddingDriver
from griptape.loaders import WebLoader

Expand All @@ -9,10 +10,11 @@
vector_store_driver = LocalVectorStoreDriver(embedding_driver=embedding_driver)

# Load Artifacts from the web
artifacts = WebLoader(max_tokens=100).load("https://www.griptape.ai")
artifact = WebLoader().load("https://www.griptape.ai")
chunks = TextChunker(max_tokens=100).chunk(artifact)

# Upsert Artifacts into the Vector Store Driver
[vector_store_driver.upsert_text_artifact(a, namespace="griptape") for a in artifacts]
[vector_store_driver.upsert_text_artifact(a, namespace="griptape") for a in chunks]

results = vector_store_driver.query(query="What is griptape?")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from griptape.chunkers import TextChunker
from griptape.drivers import OpenAiEmbeddingDriver, QdrantVectorStoreDriver
from griptape.loaders import WebLoader

Expand All @@ -19,7 +20,8 @@
)

# Load Artifacts from the web
artifacts = WebLoader().load("https://www.griptape.ai")
artifact = WebLoader().load("https://www.griptape.ai")
chunks = TextChunker(max_tokens=100).chunk(artifact)

# Recreate Qdrant collection
vector_store_driver.client.recreate_collection(
Expand All @@ -28,7 +30,7 @@
)

# Upsert Artifacts into the Vector Store Driver
[vector_store_driver.upsert_text_artifact(a, namespace="griptape") for a in artifacts]
[vector_store_driver.upsert_text_artifact(a, namespace="griptape") for a in chunks]

results = vector_store_driver.query(query="What is griptape?")

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from griptape.chunkers import TextChunker
from griptape.drivers import AstraDbVectorStoreDriver, OpenAiEmbeddingDriver
from griptape.loaders import WebLoader

Expand All @@ -20,10 +21,11 @@
)

# Load Artifacts from the web
artifacts = WebLoader().load("https://www.griptape.ai")
artifact = WebLoader().load("https://www.griptape.ai")
chunks = TextChunker().chunk(artifact)

# Upsert Artifacts into the Vector Store Driver
[vector_store_driver.upsert_text_artifact(a, namespace="griptape") for a in artifacts]
[vector_store_driver.upsert_text_artifact(a, namespace="griptape") for a in chunks]

results = vector_store_driver.query(query="What is griptape?")

Expand Down
6 changes: 4 additions & 2 deletions docs/griptape-framework/drivers/src/vector_store_drivers_3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from griptape.chunkers import TextChunker
from griptape.drivers import OpenAiEmbeddingDriver, PineconeVectorStoreDriver
from griptape.loaders import WebLoader

Expand All @@ -14,10 +15,11 @@
)

# Load Artifacts from the web
artifacts = WebLoader(max_tokens=100).load("https://www.griptape.ai")
artifact = WebLoader().load("https://www.griptape.ai")
chunks = TextChunker(max_tokens=100).chunk(artifact)

# Upsert Artifacts into the Vector Store Driver
[vector_store_driver.upsert_text_artifact(a, namespace="griptape") for a in artifacts]
[vector_store_driver.upsert_text_artifact(a, namespace="griptape") for a in chunks]

results = vector_store_driver.query(query="What is griptape?")

Expand Down
6 changes: 4 additions & 2 deletions docs/griptape-framework/drivers/src/vector_store_drivers_4.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from griptape.chunkers import TextChunker
from griptape.drivers import MarqoVectorStoreDriver, OpenAiChatPromptDriver, OpenAiEmbeddingDriver
from griptape.loaders import WebLoader

Expand All @@ -19,12 +20,13 @@
)

# Load Artifacts from the web
artifacts = WebLoader(max_tokens=200).load("https://www.griptape.ai")
artifact = WebLoader().load("https://www.griptape.ai")
chunks = TextChunker(max_tokens=200).chunk(artifact)

# Upsert Artifacts into the Vector Store Driver
vector_store_driver.upsert_text_artifacts(
{
"griptape": artifacts,
"griptape": chunks,
}
)

Expand Down
10 changes: 4 additions & 6 deletions docs/griptape-framework/drivers/src/vector_store_drivers_5.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from griptape.chunkers import TextChunker
from griptape.drivers import MongoDbAtlasVectorStoreDriver, OpenAiEmbeddingDriver
from griptape.loaders import WebLoader

Expand All @@ -25,14 +26,11 @@
)

# Load Artifacts from the web
artifacts = WebLoader(max_tokens=200).load("https://www.griptape.ai")
artifact = WebLoader().load("https://www.griptape.ai")
chunks = TextChunker(max_tokens=200).chunk(artifact)

# Upsert Artifacts into the Vector Store Driver
vector_store_driver.upsert_text_artifacts(
{
"griptape": artifacts,
}
)
vector_store_driver.upsert_text_artifacts({"griptape": chunks})

results = vector_store_driver.query(query="What is griptape?")

Expand Down
6 changes: 4 additions & 2 deletions docs/griptape-framework/drivers/src/vector_store_drivers_6.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os

from griptape.chunkers import TextChunker
from griptape.drivers import AzureMongoDbVectorStoreDriver, OpenAiEmbeddingDriver
from griptape.loaders import WebLoader

Expand All @@ -25,12 +26,13 @@
)

# Load Artifacts from the web
artifacts = WebLoader(max_tokens=200).load("https://www.griptape.ai")
artifact = WebLoader().load("https://www.griptape.ai")
chunks = TextChunker(max_tokens=200).chunk(artifact)

# Upsert Artifacts into the Vector Store Driver
vector_store_driver.upsert_text_artifacts(
{
"griptape": artifacts,
"griptape": chunks,
}
)

Expand Down
Loading

0 comments on commit cd88b5c

Please sign in to comment.