diff --git a/dvc/output.py b/dvc/output.py index 7eef73d646..422e1fa574 100644 --- a/dvc/output.py +++ b/dvc/output.py @@ -542,7 +542,9 @@ def _build( desc=f"Collecting files and computing hashes in {self}", disable=no_progress_bar, ) as pb: - return build(*args, callback=pb.as_callback(), **kwargs) + kwargs["callback"] = pb.as_callback() + kwargs.setdefault("checksum_jobs", self.fs.hash_jobs) + return build(*args, **kwargs) def _get_hash_meta(self): if self.use_cache: @@ -730,6 +732,7 @@ def _checkout(self, *args, **kwargs) -> Optional[bool]: from dvc_data.hashfile.checkout import LinkError, PromptError kwargs.setdefault("ignore", self.dvcignore) + kwargs.setdefault("checksum_jobs", self.fs.hash_jobs) try: return checkout(*args, **kwargs) except PromptError as exc: @@ -749,11 +752,8 @@ def commit(self, filter_info=None, relink=True) -> None: granular = ( self.is_dir_checksum and filter_info and filter_info != self.fs_path ) - # NOTE: trying to use hardlink during transfer only if we will be - # relinking later - hardlink = relink if granular: - obj = self._commit_granular_dir(filter_info, hardlink) + obj = self._commit_granular_dir(filter_info, hardlink=False) else: staging, _, obj = self._build( self.cache, @@ -771,7 +771,7 @@ def commit(self, filter_info=None, relink=True) -> None: self.cache, {obj.hash_info}, shallow=False, - hardlink=hardlink, + hardlink=False, callback=cb, ) if relink: @@ -786,6 +786,7 @@ def commit(self, filter_info=None, relink=True) -> None: state=self.repo.state, prompt=prompt.confirm, progress_callback=cb, + old=obj, ) self.set_exec() @@ -1403,7 +1404,7 @@ def add( # noqa: C901 staging, self.cache, {obj.hash_info}, - hardlink=relink, + hardlink=False, shallow=False, callback=cb, ) @@ -1421,6 +1422,7 @@ def add( # noqa: C901 state=self.repo.state, prompt=prompt.confirm, progress_callback=callback, + old=obj, ) self.set_exec() return obj diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index c12a81b50c..2b89626972 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -147,7 +147,7 @@ def __init__( # noqa: PLR0915, PLR0913 ): from dvc.cachemgr import CacheManager from dvc.data_cloud import DataCloud - from dvc.fs import GitFileSystem, LocalFileSystem, localfs + from dvc.fs import GitFileSystem, LocalFileSystem from dvc.lock import LockNoop, make_lock from dvc.repo.artifacts import Artifacts from dvc.repo.datasets import Datasets @@ -161,7 +161,7 @@ def __init__( # noqa: PLR0915, PLR0913 self.url = url self._fs_conf = {"repo_factory": repo_factory} - self._fs = fs or localfs + self._fs = fs or LocalFileSystem() self._scm = scm self._config = config self._remote = remote @@ -206,6 +206,11 @@ def __init__( # noqa: PLR0915, PLR0913 friendly=True, ) os.makedirs(self.site_cache_dir, exist_ok=True) + if not fs and ( + checksum_jobs := self.config["core"].get("checksum_jobs") + ): + self.fs.hash_jobs = checksum_jobs + self.state = State(self.root_dir, self.site_cache_dir, self.dvcignore) else: self.lock = LockNoop() diff --git a/dvc/testing/benchmarks/cli/commands/test_add.py b/dvc/testing/benchmarks/cli/commands/test_add.py index 9aadbaea10..d8fdcee304 100644 --- a/dvc/testing/benchmarks/cli/commands/test_add.py +++ b/dvc/testing/benchmarks/cli/commands/test_add.py @@ -1,2 +1,19 @@ -def test_add(bench_dvc, tmp_dir, dvc, dataset): - bench_dvc("add", dataset) +from .test_checkout import _skip_unsupported_link + + +def generate_test(*, link_type="copy"): + def _test_add(bench_dvc, tmp_dir, dvc, dataset): + _skip_unsupported_link((tmp_dir / ".dvc" / "cache"), tmp_dir, link_type) + + with dvc.config.edit() as conf: + conf["cache"]["type"] = link_type + + bench_dvc("add", dataset) + bench_dvc("add", dataset, name="noop") + + return _test_add + + +test_add_copy = generate_test(link_type="copy") +test_add_symlink = generate_test(link_type="symlink") +test_add_hardlink = generate_test(link_type="hardlink") diff --git a/pyproject.toml b/pyproject.toml index 1e7bc73792..d0e322116f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ dependencies = [ "distro>=1.3", "dpath<3,>=2.1.0", "dulwich", - "dvc-data>=3.15,<3.16", + "dvc-data>=3.16,<3.17", "dvc-http>=2.29.0", "dvc-objects", "dvc-render>=1.0.1,<2", diff --git a/tests/func/test_add.py b/tests/func/test_add.py index ea3eab9ed2..90f45273ce 100644 --- a/tests/func/test_add.py +++ b/tests/func/test_add.py @@ -305,20 +305,20 @@ def test_should_update_state_entry_for_directory_after_add(mocker, dvc, tmp_dir) ret = main(["add", "data"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 4 + assert file_md5_counter.mock.call_count == 3 ret = main(["status"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 5 + assert file_md5_counter.mock.call_count == 4 os.rename("data", "data.back") ret = main(["checkout"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 6 + assert file_md5_counter.mock.call_count == 5 ret = main(["status"]) assert ret == 0 - assert file_md5_counter.mock.call_count == 7 + assert file_md5_counter.mock.call_count == 6 def test_add_commit(tmp_dir, dvc): @@ -339,15 +339,15 @@ def test_should_collect_dir_cache_only_once(mocker, tmp_dir, dvc): counter = mocker.spy(dvc_data.hashfile.build, "_build_tree") ret = main(["add", "data"]) assert ret == 0 - assert counter.mock.call_count == 2 + assert counter.mock.call_count == 1 ret = main(["status"]) assert ret == 0 - assert counter.mock.call_count == 3 + assert counter.mock.call_count == 2 ret = main(["status"]) assert ret == 0 - assert counter.mock.call_count == 4 + assert counter.mock.call_count == 3 def test_should_place_stage_in_data_dir_if_repository_below_symlink( @@ -754,6 +754,7 @@ def test_add_file_in_symlink_dir(make_tmp_dir, tmp_dir, dvc): def test_add_with_cache_link_error(tmp_dir, dvc, mocker, capsys): tmp_dir.gen("foo", "foo") + dvc.cache.local.cache_types = ["symlink", "hardlink"] mocker.patch("dvc_data.hashfile.checkout.test_links", return_value=[]) dvc.add("foo") err = capsys.readouterr()[1]