diff --git a/libs/community/extended_testing_deps.txt b/libs/community/extended_testing_deps.txt index 1214645947abf6..58d8c7ce294243 100644 --- a/libs/community/extended_testing_deps.txt +++ b/libs/community/extended_testing_deps.txt @@ -14,6 +14,7 @@ chardet>=5.1.0,<6 cloudpathlib>=0.18,<0.19 cloudpickle>=2.0.0 cohere>=4,<6 +crate==1.0.0dev1 databricks-vectorsearch>=0.21,<0.22 datasets>=2.15.0,<3 dgml-utils>=0.3.0,<0.4 @@ -76,6 +77,7 @@ requests-toolbelt>=1.0.0,<2 rspace_client>=2.5.0,<3 scikit-learn>=1.2.2,<2 simsimd>=5.0.0,<6 +sqlalchemy-cratedb>=0.40.0,<1 sqlite-vss>=0.1.2,<0.2 sqlite-vec>=0.1.0,<0.2 sseclient-py>=1.8.0,<2 diff --git a/libs/community/langchain_community/document_loaders/__init__.py b/libs/community/langchain_community/document_loaders/__init__.py index 2576093d3d48b3..76493d827752b8 100644 --- a/libs/community/langchain_community/document_loaders/__init__.py +++ b/libs/community/langchain_community/document_loaders/__init__.py @@ -129,6 +129,9 @@ from langchain_community.document_loaders.couchbase import ( CouchbaseLoader, ) + from langchain_community.document_loaders.cratedb import ( + CrateDBLoader, + ) from langchain_community.document_loaders.csv_loader import ( CSVLoader, UnstructuredCSVLoader, @@ -576,6 +579,7 @@ "ConcurrentLoader": "langchain_community.document_loaders.concurrent", "ConfluenceLoader": "langchain_community.document_loaders.confluence", "CouchbaseLoader": "langchain_community.document_loaders.couchbase", + "CrateDBLoader": "langchain_community.document_loaders.cratedb", "CubeSemanticLoader": "langchain_community.document_loaders.cube_semantic", "DataFrameLoader": "langchain_community.document_loaders.dataframe", "DatadogLogsLoader": "langchain_community.document_loaders.datadog_logs", @@ -782,6 +786,7 @@ def __getattr__(name: str) -> Any: "ConcurrentLoader", "ConfluenceLoader", "CouchbaseLoader", + "CrateDBLoader", "CubeSemanticLoader", "DataFrameLoader", "DatadogLogsLoader", diff --git a/libs/community/langchain_community/document_loaders/cratedb.py b/libs/community/langchain_community/document_loaders/cratedb.py new file mode 100644 index 00000000000000..a97b4dde8f354d --- /dev/null +++ b/libs/community/langchain_community/document_loaders/cratedb.py @@ -0,0 +1,5 @@ +from langchain_community.document_loaders.sql_database import SQLDatabaseLoader + + +class CrateDBLoader(SQLDatabaseLoader): + pass diff --git a/libs/community/tests/examples/mlb_teams_2012.sql b/libs/community/tests/examples/mlb_teams_2012.sql index 33cb765a38ebe8..9df72ef19954ab 100644 --- a/libs/community/tests/examples/mlb_teams_2012.sql +++ b/libs/community/tests/examples/mlb_teams_2012.sql @@ -1,6 +1,7 @@ -- Provisioning table "mlb_teams_2012". -- -- psql postgresql://postgres@localhost < mlb_teams_2012.sql +-- crash < mlb_teams_2012.sql DROP TABLE IF EXISTS mlb_teams_2012; CREATE TABLE mlb_teams_2012 ("Team" VARCHAR, "Payroll (millions)" FLOAT, "Wins" BIGINT); diff --git a/libs/community/tests/integration_tests/document_loaders/docker-compose/cratedb.yml b/libs/community/tests/integration_tests/document_loaders/docker-compose/cratedb.yml new file mode 100644 index 00000000000000..b547b2c766f201 --- /dev/null +++ b/libs/community/tests/integration_tests/document_loaders/docker-compose/cratedb.yml @@ -0,0 +1,20 @@ +version: "3" + +services: + postgresql: + image: crate/crate:nightly + environment: + - CRATE_HEAP_SIZE=4g + ports: + - "4200:4200" + - "5432:5432" + command: | + crate -Cdiscovery.type=single-node + healthcheck: + test: + [ + "CMD-SHELL", + "curl --silent --fail http://localhost:4200/ || exit 1", + ] + interval: 5s + retries: 60 diff --git a/libs/community/tests/integration_tests/document_loaders/docker-compose/postgresql.yml b/libs/community/tests/integration_tests/document_loaders/docker-compose/postgresql.yml new file mode 100644 index 00000000000000..f8ab2cfdeb4184 --- /dev/null +++ b/libs/community/tests/integration_tests/document_loaders/docker-compose/postgresql.yml @@ -0,0 +1,19 @@ +version: "3" + +services: + postgresql: + image: postgres:16 + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + ports: + - "5432:5432" + command: | + postgres -c log_statement=all + healthcheck: + test: + [ + "CMD-SHELL", + "psql postgresql://postgres@localhost --command 'SELECT 1;' || exit 1", + ] + interval: 5s + retries: 60 diff --git a/libs/community/tests/integration_tests/document_loaders/test_sql_database.py b/libs/community/tests/integration_tests/document_loaders/test_sql_database.py index 121948075a316f..a687ab7c0491e7 100644 --- a/libs/community/tests/integration_tests/document_loaders/test_sql_database.py +++ b/libs/community/tests/integration_tests/document_loaders/test_sql_database.py @@ -47,6 +47,14 @@ warnings.warn("psycopg2 not installed, skipping corresponding tests", UserWarning) psycopg2_installed = False +try: + import sqlalchemy_cratedb # noqa: F401 + + cratedb_installed = True +except ImportError: + warnings.warn("cratedb not installed, skipping corresponding tests", UserWarning) + cratedb_installed = False + @pytest.fixture() def engine(db_uri: str) -> sa.Engine: @@ -75,6 +83,9 @@ def provision_database(engine: sa.Engine) -> None: continue connection.execute(sa.text(statement)) connection.commit() + if engine.dialect.name.startswith("crate"): + connection.execute(sa.text("REFRESH TABLE mlb_teams_2012;")) + connection.commit() tmpdir = TemporaryDirectory() @@ -103,6 +114,16 @@ def pytest_generate_tests(metafunc: "Metafunc") -> None: "postgresql+psycopg2://langchain:langchain@localhost:6023/langchain" ) ids.append("postgresql") + if cratedb_installed: + # We use non-standard port for testing purposes. + # The easiest way to spin up the PostgreSQL instance is to use + # the docker compose file at the root of the repo located at + # langchain/docker/docker-compose.yml + # use `docker compose up postgres` to start the instance + # it will have the appropriate credentials set up including + # being exposed on the appropriate port. + urls.append("crate://crate@localhost/?schema=testdrive") + ids.append("cratedb") metafunc.parametrize("db_uri", urls, ids=ids) diff --git a/libs/community/tests/unit_tests/document_loaders/test_imports.py b/libs/community/tests/unit_tests/document_loaders/test_imports.py index b49a1b7cc4a2e1..60445d14518567 100644 --- a/libs/community/tests/unit_tests/document_loaders/test_imports.py +++ b/libs/community/tests/unit_tests/document_loaders/test_imports.py @@ -48,6 +48,7 @@ "ConcurrentLoader", "ConfluenceLoader", "CouchbaseLoader", + "CrateDBLoader", "CubeSemanticLoader", "DataFrameLoader", "DatadogLogsLoader",