Skip to content

Commit

Permalink
CrateDB: Document Loader
Browse files Browse the repository at this point in the history
Based on previous contributions, this has effectively become just a
naming-things wrapper around `SQLDatabaseLoader`.
  • Loading branch information
amotl committed Oct 30, 2024
1 parent 94ea950 commit aa29f70
Show file tree
Hide file tree
Showing 8 changed files with 74 additions and 0 deletions.
2 changes: 2 additions & 0 deletions libs/community/extended_testing_deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ chardet>=5.1.0,<6
cloudpathlib>=0.18,<0.19
cloudpickle>=2.0.0
cohere>=4,<6
crate==1.0.0dev1
databricks-vectorsearch>=0.21,<0.22
datasets>=2.15.0,<3
dgml-utils>=0.3.0,<0.4
Expand Down Expand Up @@ -76,6 +77,7 @@ requests-toolbelt>=1.0.0,<2
rspace_client>=2.5.0,<3
scikit-learn>=1.2.2,<2
simsimd>=5.0.0,<6
sqlalchemy-cratedb>=0.40.0,<1
sqlite-vss>=0.1.2,<0.2
sqlite-vec>=0.1.0,<0.2
sseclient-py>=1.8.0,<2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@
from langchain_community.document_loaders.couchbase import (
CouchbaseLoader,
)
from langchain_community.document_loaders.cratedb import (
CrateDBLoader,
)
from langchain_community.document_loaders.csv_loader import (
CSVLoader,
UnstructuredCSVLoader,
Expand Down Expand Up @@ -576,6 +579,7 @@
"ConcurrentLoader": "langchain_community.document_loaders.concurrent",
"ConfluenceLoader": "langchain_community.document_loaders.confluence",
"CouchbaseLoader": "langchain_community.document_loaders.couchbase",
"CrateDBLoader": "langchain_community.document_loaders.cratedb",
"CubeSemanticLoader": "langchain_community.document_loaders.cube_semantic",
"DataFrameLoader": "langchain_community.document_loaders.dataframe",
"DatadogLogsLoader": "langchain_community.document_loaders.datadog_logs",
Expand Down Expand Up @@ -782,6 +786,7 @@ def __getattr__(name: str) -> Any:
"ConcurrentLoader",
"ConfluenceLoader",
"CouchbaseLoader",
"CrateDBLoader",
"CubeSemanticLoader",
"DataFrameLoader",
"DatadogLogsLoader",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from langchain_community.document_loaders.sql_database import SQLDatabaseLoader


class CrateDBLoader(SQLDatabaseLoader):
pass
1 change: 1 addition & 0 deletions libs/community/tests/examples/mlb_teams_2012.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
-- Provisioning table "mlb_teams_2012".
--
-- psql postgresql://postgres@localhost < mlb_teams_2012.sql
-- crash < mlb_teams_2012.sql

DROP TABLE IF EXISTS mlb_teams_2012;
CREATE TABLE mlb_teams_2012 ("Team" VARCHAR, "Payroll (millions)" FLOAT, "Wins" BIGINT);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
version: "3"

services:
postgresql:
image: crate/crate:nightly
environment:
- CRATE_HEAP_SIZE=4g
ports:
- "4200:4200"
- "5432:5432"
command: |
crate -Cdiscovery.type=single-node
healthcheck:
test:
[
"CMD-SHELL",
"curl --silent --fail http://localhost:4200/ || exit 1",
]
interval: 5s
retries: 60
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
version: "3"

services:
postgresql:
image: postgres:16
environment:
- POSTGRES_HOST_AUTH_METHOD=trust
ports:
- "5432:5432"
command: |
postgres -c log_statement=all
healthcheck:
test:
[
"CMD-SHELL",
"psql postgresql://postgres@localhost --command 'SELECT 1;' || exit 1",
]
interval: 5s
retries: 60
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@
warnings.warn("psycopg2 not installed, skipping corresponding tests", UserWarning)
psycopg2_installed = False

try:
import sqlalchemy_cratedb # noqa: F401

cratedb_installed = True
except ImportError:
warnings.warn("cratedb not installed, skipping corresponding tests", UserWarning)
cratedb_installed = False


@pytest.fixture()
def engine(db_uri: str) -> sa.Engine:
Expand Down Expand Up @@ -75,6 +83,9 @@ def provision_database(engine: sa.Engine) -> None:
continue
connection.execute(sa.text(statement))
connection.commit()
if engine.dialect.name.startswith("crate"):
connection.execute(sa.text("REFRESH TABLE mlb_teams_2012;"))
connection.commit()


tmpdir = TemporaryDirectory()
Expand Down Expand Up @@ -103,6 +114,16 @@ def pytest_generate_tests(metafunc: "Metafunc") -> None:
"postgresql+psycopg2://langchain:langchain@localhost:6023/langchain"
)
ids.append("postgresql")
if cratedb_installed:
# We use non-standard port for testing purposes.
# The easiest way to spin up the PostgreSQL instance is to use
# the docker compose file at the root of the repo located at
# langchain/docker/docker-compose.yml
# use `docker compose up postgres` to start the instance
# it will have the appropriate credentials set up including
# being exposed on the appropriate port.
urls.append("crate://crate@localhost/?schema=testdrive")
ids.append("cratedb")

metafunc.parametrize("db_uri", urls, ids=ids)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"ConcurrentLoader",
"ConfluenceLoader",
"CouchbaseLoader",
"CrateDBLoader",
"CubeSemanticLoader",
"DataFrameLoader",
"DatadogLogsLoader",
Expand Down

0 comments on commit aa29f70

Please sign in to comment.