Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps: bump dvc-data to >=3.16.0 #10513

Merged
merged 1 commit into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions dvc/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,9 @@ def _build(
desc=f"Collecting files and computing hashes in {self}",
disable=no_progress_bar,
) as pb:
return build(*args, callback=pb.as_callback(), **kwargs)
kwargs["callback"] = pb.as_callback()
kwargs.setdefault("checksum_jobs", self.fs.hash_jobs)
return build(*args, **kwargs)

def _get_hash_meta(self):
if self.use_cache:
Expand Down Expand Up @@ -730,6 +732,7 @@ def _checkout(self, *args, **kwargs) -> Optional[bool]:
from dvc_data.hashfile.checkout import LinkError, PromptError

kwargs.setdefault("ignore", self.dvcignore)
kwargs.setdefault("checksum_jobs", self.fs.hash_jobs)
try:
return checkout(*args, **kwargs)
except PromptError as exc:
Expand All @@ -749,11 +752,8 @@ def commit(self, filter_info=None, relink=True) -> None:
granular = (
self.is_dir_checksum and filter_info and filter_info != self.fs_path
)
# NOTE: trying to use hardlink during transfer only if we will be
# relinking later
hardlink = relink
if granular:
obj = self._commit_granular_dir(filter_info, hardlink)
obj = self._commit_granular_dir(filter_info, hardlink=False)
else:
staging, _, obj = self._build(
self.cache,
Expand All @@ -771,7 +771,7 @@ def commit(self, filter_info=None, relink=True) -> None:
self.cache,
{obj.hash_info},
shallow=False,
hardlink=hardlink,
hardlink=False,
callback=cb,
)
if relink:
Expand All @@ -786,6 +786,7 @@ def commit(self, filter_info=None, relink=True) -> None:
state=self.repo.state,
prompt=prompt.confirm,
progress_callback=cb,
old=obj,
)
self.set_exec()

Expand Down Expand Up @@ -1403,7 +1404,7 @@ def add( # noqa: C901
staging,
self.cache,
{obj.hash_info},
hardlink=relink,
hardlink=False,
shallow=False,
callback=cb,
)
Expand All @@ -1421,6 +1422,7 @@ def add( # noqa: C901
state=self.repo.state,
prompt=prompt.confirm,
progress_callback=callback,
old=obj,
)
self.set_exec()
return obj
Expand Down
9 changes: 7 additions & 2 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def __init__( # noqa: PLR0915, PLR0913
):
from dvc.cachemgr import CacheManager
from dvc.data_cloud import DataCloud
from dvc.fs import GitFileSystem, LocalFileSystem, localfs
from dvc.fs import GitFileSystem, LocalFileSystem
from dvc.lock import LockNoop, make_lock
from dvc.repo.artifacts import Artifacts
from dvc.repo.datasets import Datasets
Expand All @@ -161,7 +161,7 @@ def __init__( # noqa: PLR0915, PLR0913

self.url = url
self._fs_conf = {"repo_factory": repo_factory}
self._fs = fs or localfs
self._fs = fs or LocalFileSystem()
self._scm = scm
self._config = config
self._remote = remote
Expand Down Expand Up @@ -206,6 +206,11 @@ def __init__( # noqa: PLR0915, PLR0913
friendly=True,
)
os.makedirs(self.site_cache_dir, exist_ok=True)
if not fs and (
checksum_jobs := self.config["core"].get("checksum_jobs")
):
self.fs.hash_jobs = checksum_jobs

self.state = State(self.root_dir, self.site_cache_dir, self.dvcignore)
else:
self.lock = LockNoop()
Expand Down
21 changes: 19 additions & 2 deletions dvc/testing/benchmarks/cli/commands/test_add.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,19 @@
def test_add(bench_dvc, tmp_dir, dvc, dataset):
bench_dvc("add", dataset)
from .test_checkout import _skip_unsupported_link


def generate_test(*, link_type="copy"):
def _test_add(bench_dvc, tmp_dir, dvc, dataset):
_skip_unsupported_link((tmp_dir / ".dvc" / "cache"), tmp_dir, link_type)

with dvc.config.edit() as conf:
conf["cache"]["type"] = link_type

bench_dvc("add", dataset)
bench_dvc("add", dataset, name="noop")

return _test_add


test_add_copy = generate_test(link_type="copy")
test_add_symlink = generate_test(link_type="symlink")
test_add_hardlink = generate_test(link_type="hardlink")
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ dependencies = [
"distro>=1.3",
"dpath<3,>=2.1.0",
"dulwich",
"dvc-data>=3.15,<3.16",
"dvc-data>=3.16,<3.17",
"dvc-http>=2.29.0",
"dvc-objects",
"dvc-render>=1.0.1,<2",
Expand Down
15 changes: 8 additions & 7 deletions tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,20 +305,20 @@ def test_should_update_state_entry_for_directory_after_add(mocker, dvc, tmp_dir)

ret = main(["add", "data"])
assert ret == 0
assert file_md5_counter.mock.call_count == 4
assert file_md5_counter.mock.call_count == 3

ret = main(["status"])
assert ret == 0
assert file_md5_counter.mock.call_count == 5
assert file_md5_counter.mock.call_count == 4

os.rename("data", "data.back")
ret = main(["checkout"])
assert ret == 0
assert file_md5_counter.mock.call_count == 6
assert file_md5_counter.mock.call_count == 5

ret = main(["status"])
assert ret == 0
assert file_md5_counter.mock.call_count == 7
assert file_md5_counter.mock.call_count == 6


def test_add_commit(tmp_dir, dvc):
Expand All @@ -339,15 +339,15 @@ def test_should_collect_dir_cache_only_once(mocker, tmp_dir, dvc):
counter = mocker.spy(dvc_data.hashfile.build, "_build_tree")
ret = main(["add", "data"])
assert ret == 0
assert counter.mock.call_count == 2
assert counter.mock.call_count == 1

ret = main(["status"])
assert ret == 0
assert counter.mock.call_count == 3
assert counter.mock.call_count == 2

ret = main(["status"])
assert ret == 0
assert counter.mock.call_count == 4
assert counter.mock.call_count == 3


def test_should_place_stage_in_data_dir_if_repository_below_symlink(
Expand Down Expand Up @@ -754,6 +754,7 @@ def test_add_file_in_symlink_dir(make_tmp_dir, tmp_dir, dvc):
def test_add_with_cache_link_error(tmp_dir, dvc, mocker, capsys):
tmp_dir.gen("foo", "foo")

dvc.cache.local.cache_types = ["symlink", "hardlink"]
mocker.patch("dvc_data.hashfile.checkout.test_links", return_value=[])
dvc.add("foo")
err = capsys.readouterr()[1]
Expand Down
Loading