diff --git a/docs/docs/.gitignore b/docs/docs/.gitignore
index 25a6e30a4b775..e586a74dfb131 100644
--- a/docs/docs/.gitignore
+++ b/docs/docs/.gitignore
@@ -4,4 +4,5 @@ node_modules/
.docusaurus
.cache-loader
-docs/api
\ No newline at end of file
+docs/api
+example.sqlite
diff --git a/docs/docs/how_to/document_loader_sql_database.ipynb b/docs/docs/how_to/document_loader_sql_database.ipynb
new file mode 100644
index 0000000000000..9b3fe41df43fa
--- /dev/null
+++ b/docs/docs/how_to/document_loader_sql_database.ipynb
@@ -0,0 +1,360 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# SQL Database\n",
+ "\n",
+ "## About\n",
+ "\n",
+ "The `SQLDatabaseLoader` loads records from any database supported by\n",
+ "[SQLAlchemy], see [SQLAlchemy dialects] for the whole list of supported\n",
+ "SQL databases and dialects.\n",
+ "\n",
+ "For talking to the database, the document loader uses the [SQLDatabase]\n",
+ "utility from the LangChain integration toolkit.\n",
+ "\n",
+ "You can either use plain SQL for querying, or use an SQLAlchemy `Select`\n",
+ "statement object, if you are using SQLAlchemy-Core or -ORM.\n",
+ "\n",
+ "You can select which columns to place into the document, which columns\n",
+ "to place into its metadata, which columns to use as a `source` attribute\n",
+ "in metadata, and whether to include the result row number and/or the SQL\n",
+ "query expression into the metadata.\n",
+ "\n",
+ "## What's inside\n",
+ "\n",
+ "This notebook covers how to load documents from an [SQLite] database,\n",
+ "using the [SQLAlchemy] document loader.\n",
+ "\n",
+ "It loads the result of a database query with one document per row.\n",
+ "\n",
+ "[SQLAlchemy]: https://www.sqlalchemy.org/\n",
+ "[SQLAlchemy dialects]: https://docs.sqlalchemy.org/en/latest/dialects/\n",
+ "[SQLDatabase]: https://python.langchain.com/docs/integrations/toolkits/sql_database\n",
+ "[SQLite]: https://sqlite.org/\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false
+ },
+ "source": [
+ "## Prerequisites"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "#!pip install langchain langchain-community sqlalchemy termsql"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false
+ },
+ "source": [
+ "Populate SQLite database with example input data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {
+ "collapsed": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Nationals|81.34|98\r\n",
+ "Reds|82.2|97\r\n",
+ "Yankees|197.96|95\r\n",
+ "Giants|117.62|94\r\n",
+ "Braves|83.31|94\r\n",
+ "Athletics|55.37|94\r\n",
+ "Rangers|120.51|93\r\n",
+ "Orioles|81.43|93\r\n",
+ "Rays|64.17|90\r\n",
+ "Angels|154.49|89\r\n",
+ "Tigers|132.3|88\r\n",
+ "Cardinals|110.3|88\r\n",
+ "Dodgers|95.14|86\r\n",
+ "White Sox|96.92|85\r\n",
+ "Brewers|97.65|83\r\n",
+ "Phillies|174.54|81\r\n",
+ "Diamondbacks|74.28|81\r\n",
+ "Pirates|63.43|79\r\n",
+ "Padres|55.24|76\r\n",
+ "Mariners|81.97|75\r\n",
+ "Mets|93.35|74\r\n",
+ "Blue Jays|75.48|73\r\n",
+ "Royals|60.91|72\r\n",
+ "Marlins|118.07|69\r\n",
+ "Red Sox|173.18|69\r\n",
+ "Indians|78.43|68\r\n",
+ "Twins|94.08|66\r\n",
+ "Rockies|78.06|64\r\n",
+ "Cubs|88.19|61\r\n",
+ "Astros|60.65|55\r\n",
+ "||\r\n"
+ ]
+ }
+ ],
+ "source": [
+ "!termsql --infile=./example_data/mlb_teams_2012.csv --head --csv --outfile=example.sqlite --table=payroll"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "collapsed": false
+ },
+ "source": [
+ "## Basic usage"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from pprint import pprint\n",
+ "\n",
+ "from langchain_community.document_loaders import SQLDatabaseLoader\n",
+ "\n",
+ "loader = SQLDatabaseLoader(\n",
+ " \"SELECT * FROM payroll LIMIT 2\",\n",
+ " url=\"sqlite:///example.sqlite\",\n",
+ ")\n",
+ "documents = loader.load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[Document(page_content='Team: Nationals\\nPayroll (millions): 81.34\\nWins: 98'),\n",
+ " Document(page_content='Team: Reds\\nPayroll (millions): 82.2\\nWins: 97')]\n"
+ ]
+ }
+ ],
+ "source": [
+ "pprint(documents)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Specify which columns are content vs. metadata\n",
+ "\n",
+ "Use the `page_content_mapper` keyword argument to optionally customize how to derive\n",
+ "a page content string from an input database record / row. By default, all columns\n",
+ "will be used.\n",
+ "\n",
+ "Use the `metadata_mapper` keyword argument to optionally customize how to derive\n",
+ "a document metadata dictionary from an input database record / row. By default,\n",
+ "document metadata will be empty."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import functools\n",
+ "\n",
+ "# Configure built-in page content mapper to include only specified columns.\n",
+ "row_to_content = functools.partial(\n",
+ " SQLDatabaseLoader.page_content_default_mapper, column_names=[\"Team\", \"Wins\"]\n",
+ ")\n",
+ "\n",
+ "# Configure built-in metadata dictionary mapper to include specified columns.\n",
+ "row_to_metadata = functools.partial(\n",
+ " SQLDatabaseLoader.metadata_default_mapper, column_names=[\"Payroll (millions)\"]\n",
+ ")\n",
+ "\n",
+ "loader = SQLDatabaseLoader(\n",
+ " \"SELECT * FROM payroll LIMIT 2\",\n",
+ " url=\"sqlite:///example.sqlite\",\n",
+ " page_content_mapper=row_to_content,\n",
+ " metadata_mapper=row_to_metadata,\n",
+ ")\n",
+ "documents = loader.load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[Document(page_content='Team: Nationals\\nWins: 98', metadata={'Payroll (millions)': 81.34}),\n",
+ " Document(page_content='Team: Reds\\nWins: 97', metadata={'Payroll (millions)': 82.2})]\n"
+ ]
+ }
+ ],
+ "source": [
+ "pprint(documents)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Those examples demonstrate how to use custom functions to define arbitrary\n",
+ "mapping rules by using Python code.\n",
+ "```python\n",
+ "def page_content_mapper(row: sa.RowMapping, column_names: Optional[List[str]] = None) -> str:\n",
+ " return f\"Team: {row['Team']}\"\n",
+ "```\n",
+ "```python\n",
+ "def metadata_default_mapper(row: sa.RowMapping, column_names: Optional[List[str]] = None) -> Dict[str, Any]:\n",
+ " return {\"team\": row['Team']}\n",
+ "```"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Specify column(s) to identify the document source\n",
+ "\n",
+ "Use the `source_columns` option to specify the columns to use as a \"source\" for the\n",
+ "document created from each row. This is useful for identifying documents through\n",
+ "their metadata. Typically, you may use the primary key column(s) for that purpose."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "loader = SQLDatabaseLoader(\n",
+ " \"SELECT * FROM payroll LIMIT 2\",\n",
+ " url=\"sqlite:///example.sqlite\",\n",
+ " source_columns=[\"Team\"],\n",
+ ")\n",
+ "documents = loader.load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[Document(page_content='Team: Nationals\\nPayroll (millions): 81.34\\nWins: 98', metadata={'source': 'Nationals'}),\n",
+ " Document(page_content='Team: Reds\\nPayroll (millions): 82.2\\nWins: 97', metadata={'source': 'Reds'})]\n"
+ ]
+ }
+ ],
+ "source": [
+ "pprint(documents)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Enrich metadata with row number and/or original SQL query\n",
+ "\n",
+ "Use the `include_rownum_into_metadata` and `include_query_into_metadata` options to\n",
+ "optionally populate the `metadata` dictionary with corresponding information.\n",
+ "\n",
+ "Having the `query` within metadata is useful when using documents loaded from\n",
+ "database tables for chains that answer questions using their origin queries."
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "outputs": [],
+ "source": [
+ "loader = SQLDatabaseLoader(\n",
+ " \"SELECT * FROM payroll LIMIT 2\",\n",
+ " url=\"sqlite:///example.sqlite\",\n",
+ " include_rownum_into_metadata=True,\n",
+ " include_query_into_metadata=True,\n",
+ ")\n",
+ "documents = loader.load()"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[Document(page_content='Team: Nationals\\nPayroll (millions): 81.34\\nWins: 98', metadata={'row': 0, 'query': 'SELECT * FROM payroll LIMIT 2'}),\n",
+ " Document(page_content='Team: Reds\\nPayroll (millions): 82.2\\nWins: 97', metadata={'row': 1, 'query': 'SELECT * FROM payroll LIMIT 2'})]\n"
+ ]
+ }
+ ],
+ "source": [
+ "pprint(documents)"
+ ],
+ "metadata": {
+ "collapsed": false
+ }
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/docs/how_to/document_loader_sql_database.mdx b/docs/docs/how_to/document_loader_sql_database.mdx
new file mode 100644
index 0000000000000..1ecdeda75b307
--- /dev/null
+++ b/docs/docs/how_to/document_loader_sql_database.mdx
@@ -0,0 +1,165 @@
+# SQLDatabaseLoader
+
+
+## About
+
+The `SQLDatabaseLoader` loads records from any database supported by
+[SQLAlchemy], see [SQLAlchemy dialects] for the whole list of supported
+SQL databases and dialects.
+
+You can either use plain SQL for querying, or use an SQLAlchemy `Select`
+statement object, if you are using SQLAlchemy-Core or -ORM.
+
+You can select which columns to place into the document, which columns
+to place into its metadata, which columns to use as a `source` attribute
+in metadata, and whether to include the result row number and/or the SQL
+query expression into the metadata.
+
+
+## Example
+
+This example uses PostgreSQL, and the `psycopg2` driver.
+
+
+### Prerequisites
+
+```shell
+psql postgresql://postgres@localhost/ --command "CREATE DATABASE testdrive;"
+psql postgresql://postgres@localhost/testdrive < ./libs/langchain/tests/integration_tests/examples/mlb_teams_2012.sql
+```
+
+
+### Basic loading
+
+```python
+from langchain_community.document_loaders.sql_database import SQLDatabaseLoader
+from pprint import pprint
+
+
+loader = SQLDatabaseLoader(
+ query="SELECT * FROM mlb_teams_2012 LIMIT 3;",
+ url="postgresql+psycopg2://postgres@localhost:5432/testdrive",
+)
+docs = loader.load()
+```
+
+```python
+pprint(docs)
+```
+
+
+
+```
+[Document(page_content='Team: Nationals\nPayroll (millions): 81.34\nWins: 98', metadata={}),
+ Document(page_content='Team: Reds\nPayroll (millions): 82.2\nWins: 97', metadata={}),
+ Document(page_content='Team: Yankees\nPayroll (millions): 197.96\nWins: 95', metadata={})]
+```
+
+
+
+
+## Enriching metadata
+
+Use the `include_rownum_into_metadata` and `include_query_into_metadata` options to
+optionally populate the `metadata` dictionary with corresponding information.
+
+Having the `query` within metadata is useful when using documents loaded from
+database tables for chains that answer questions using their origin queries.
+
+```python
+loader = SQLDatabaseLoader(
+ query="SELECT * FROM mlb_teams_2012 LIMIT 3;",
+ url="postgresql+psycopg2://postgres@localhost:5432/testdrive",
+ include_rownum_into_metadata=True,
+ include_query_into_metadata=True,
+)
+docs = loader.load()
+```
+
+```python
+pprint(docs)
+```
+
+
+
+```
+[Document(page_content='Team: Nationals\nPayroll (millions): 81.34\nWins: 98', metadata={'row': 0, 'query': 'SELECT * FROM mlb_teams_2012 LIMIT 3;'}),
+ Document(page_content='Team: Reds\nPayroll (millions): 82.2\nWins: 97', metadata={'row': 1, 'query': 'SELECT * FROM mlb_teams_2012 LIMIT 3;'}),
+ Document(page_content='Team: Yankees\nPayroll (millions): 197.96\nWins: 95', metadata={'row': 2, 'query': 'SELECT * FROM mlb_teams_2012 LIMIT 3;'})]
+```
+
+
+
+
+## Customizing metadata
+
+Use the `page_content_columns`, and `metadata_columns` options to optionally populate
+the `metadata` dictionary with corresponding information. When `page_content_columns`
+is empty, all columns will be used.
+
+```python
+import functools
+
+row_to_content = functools.partial(
+ SQLDatabaseLoader.page_content_default_mapper, column_names=["Payroll (millions)", "Wins"]
+)
+row_to_metadata = functools.partial(
+ SQLDatabaseLoader.metadata_default_mapper, column_names=["Team"]
+)
+
+loader = SQLDatabaseLoader(
+ query="SELECT * FROM mlb_teams_2012 LIMIT 3;",
+ url="postgresql+psycopg2://postgres@localhost:5432/testdrive",
+ page_content_mapper=row_to_content,
+ metadata_mapper=row_to_metadata,
+)
+docs = loader.load()
+```
+
+```python
+pprint(docs)
+```
+
+
+
+```
+[Document(page_content='Payroll (millions): 81.34\nWins: 98', metadata={'Team': 'Nationals'}),
+ Document(page_content='Payroll (millions): 82.2\nWins: 97', metadata={'Team': 'Reds'}),
+ Document(page_content='Payroll (millions): 197.96\nWins: 95', metadata={'Team': 'Yankees'})]
+```
+
+
+
+
+## Specify column(s) to identify the document source
+
+Use the `source_columns` option to specify the columns to use as a "source" for the
+document created from each row. This is useful for identifying documents through
+their metadata. Typically, you may use the primary key column(s) for that purpose.
+
+```python
+loader = SQLDatabaseLoader(
+ query="SELECT * FROM mlb_teams_2012 LIMIT 3;",
+ url="postgresql+psycopg2://postgres@localhost:5432/testdrive",
+ source_columns=["Team"],
+)
+docs = loader.load()
+```
+
+```python
+pprint(docs)
+```
+
+
+
+```
+[Document(page_content='Team: Nationals\nPayroll (millions): 81.34\nWins: 98', metadata={'source': 'Nationals'}),
+ Document(page_content='Team: Reds\nPayroll (millions): 82.2\nWins: 97', metadata={'source': 'Reds'}),
+ Document(page_content='Team: Yankees\nPayroll (millions): 197.96\nWins: 95', metadata={'source': 'Yankees'})]
+```
+
+
+
+
+[SQLAlchemy]: https://www.sqlalchemy.org/
+[SQLAlchemy dialects]: https://docs.sqlalchemy.org/en/20/dialects/
diff --git a/libs/community/tests/unit_tests/test_sql_database.py b/libs/community/tests/unit_tests/test_sql_database.py
index 6acb734a54309..d7795d59badae 100644
--- a/libs/community/tests/unit_tests/test_sql_database.py
+++ b/libs/community/tests/unit_tests/test_sql_database.py
@@ -55,6 +55,12 @@ def db_lazy_reflection(engine: Engine) -> SQLDatabase:
return SQLDatabase(engine, lazy_table_reflection=True)
+@pytest.mark.xfail(is_sqlalchemy_v1, reason="SQLAlchemy 1.x issues")
+def test_configure_mappers() -> None:
+ """Test that configuring table mappers works."""
+ sa.orm.configure_mappers()
+
+
@pytest.mark.xfail(is_sqlalchemy_v1, reason="SQLAlchemy 1.x issues")
def test_table_info(db: SQLDatabase) -> None:
"""Test that table info is constructed properly."""