Skip to content

Commit

Permalink
refactor(duckdb): pass temp_directory through as-is to `duckdb.conn…
Browse files Browse the repository at this point in the history
…ect`

BREAKING CHANGE: Special handling of the `temp_directory` argument passed to Ibis is removed in favor of passing the argument through directly to `duckdb.connect`. Interior nodes of directory trees must be created, e.g., using `Path.mkdir(exists_ok=True, parents=True)`, `mkdir -p`, etc.
  • Loading branch information
NickCrews authored and cpcloud committed Nov 11, 2024
1 parent bcbab3f commit 4ea041f
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 48 deletions.
17 changes: 0 additions & 17 deletions ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import ast
import contextlib
import os
import urllib
import warnings
from operator import itemgetter
Expand Down Expand Up @@ -362,7 +361,6 @@ def do_connect(
self,
database: str | Path = ":memory:",
read_only: bool = False,
temp_directory: str | Path | None = None,
extensions: Sequence[str] | None = None,
**config: Any,
) -> None:
Expand All @@ -374,9 +372,6 @@ def do_connect(
Path to a duckdb database.
read_only
Whether the database is read-only.
temp_directory
Directory to use for spilling to disk. Only set by default for
in-memory connections.
extensions
A list of duckdb extensions to install/load upon connection.
config
Expand All @@ -394,19 +389,7 @@ def do_connect(
("md:", "motherduck:", ":memory:")
):
database = Path(database).absolute()

if temp_directory is None:
temp_directory = (
Path(os.environ.get("XDG_CACHE_HOME", Path.home() / ".cache"))
/ "ibis-duckdb"
/ str(os.getpid())
)
else:
Path(temp_directory).mkdir(parents=True, exist_ok=True)
config["temp_directory"] = str(temp_directory)

self.con = duckdb.connect(str(database), config=config, read_only=read_only)

self._post_connect(extensions)

@util.experimental
Expand Down
41 changes: 10 additions & 31 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import os
import sqlite3
import tempfile
from pathlib import Path

import duckdb
Expand Down Expand Up @@ -137,32 +136,6 @@ def test_read_json(con, data_dir, tmp_path):
assert nrows == jst.count().execute()


def test_temp_directory(tmp_path):
query = "SELECT current_setting('temp_directory')"

# 1. in-memory + no temp_directory specified
con = ibis.duckdb.connect()

value = con.raw_sql(query).fetchone()[0]
assert value # we don't care what the specific value is

temp_directory = Path(tempfile.gettempdir()) / "duckdb"

# 2. in-memory + temp_directory specified
con = ibis.duckdb.connect(temp_directory=temp_directory)
value = con.raw_sql(query).fetchone()[0]
assert value == str(temp_directory)

# 3. on-disk + no temp_directory specified
# untested, duckdb sets the temp_directory to something implementation
# defined

# 4. on-disk + temp_directory specified
con = ibis.duckdb.connect(tmp_path / "test2.ddb", temp_directory=temp_directory)
value = con.raw_sql(query).fetchone()[0]
assert value == str(temp_directory)


@pytest.fixture(scope="session")
def pgurl(): # pragma: no cover
pgcon = ibis.postgres.connect(
Expand Down Expand Up @@ -381,10 +354,16 @@ def test_memtable_with_nullable_pyarrow_not_string(con):
assert len(res) == len(data)


def test_set_temp_dir(tmp_path):
path = tmp_path / "foo" / "bar"
ibis.duckdb.connect(temp_directory=path)
assert path.exists()
@pytest.mark.parametrize(
"database",
[lambda parent: parent / "test.ddb", lambda _: ":memory:"],
ids=["disk", "memory"],
)
def test_temp_dir_set(tmp_path, database):
temp_directory = tmp_path / "does" / "not" / "exist"
temp_directory.mkdir(parents=True, exist_ok=True)
con = ibis.duckdb.connect(database(tmp_path), temp_directory=temp_directory)
assert con.settings["temp_directory"] == str(temp_directory)


@pytest.mark.xfail(
Expand Down

0 comments on commit 4ea041f

Please sign in to comment.