From 2d35b8072c2f6ffc48927aefe2106925e6414918 Mon Sep 17 00:00:00 2001 From: Nipunn Koorapati Date: Sun, 1 Nov 2020 23:36:25 +0000 Subject: [PATCH] Use git's partial clone feature to speed up pip Clone with --filter=blob:none - as it fetches all metadata, but only dynamically fetches the blobs as needed by checkout. Since typically, pip only needs the blobs for a single revision, this can be a big improvement, especially when fetching from repositories with a lot of history, particularly on slower network connections. Added unit test for the rev-less path. Confirmed that both of the if/else paths are tested by the unit tests. --- news/9086.feature.rst | 1 + src/pip/_internal/vcs/git.py | 17 ++++- tests/functional/test_vcs_git.py | 105 +++++++++++++++++++++++++++++++ 3 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 news/9086.feature.rst diff --git a/news/9086.feature.rst b/news/9086.feature.rst new file mode 100644 index 00000000000..7a68189f0b2 --- /dev/null +++ b/news/9086.feature.rst @@ -0,0 +1 @@ +When a revision is specified in a Git URL, use git's partial clone feature to speed up source retrieval. diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index 2b9fa8d8b40..4f0025c5960 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -262,12 +262,27 @@ def fetch_new(self, dest, url, rev_options): # type: (str, HiddenText, RevOptions) -> None rev_display = rev_options.to_display() logger.info("Cloning %s%s to %s", url, rev_display, display_path(dest)) - self.run_command(make_command("clone", "-q", url, dest)) + if self.get_git_version() >= (2, 17): + # Git added support for partial clone in 2.17 + # https://git-scm.com/docs/partial-clone + # Speeds up cloning by functioning without a complete copy of repository + self.run_command( + make_command( + "clone", + "--filter=blob:none", + "-q", + url, + dest, + ) + ) + else: + self.run_command(make_command("clone", "-q", url, dest)) if rev_options.rev: # Then a specific revision was requested. rev_options = self.resolve_revision(dest, url, rev_options) branch_name = getattr(rev_options, "branch_name", None) + logger.debug("Rev options %s, branch_name %s", rev_options, branch_name) if branch_name is None: # Only do a checkout if the current commit id doesn't match # the requested revision. diff --git a/tests/functional/test_vcs_git.py b/tests/functional/test_vcs_git.py index d5de1a2fd77..936a52a56c4 100644 --- a/tests/functional/test_vcs_git.py +++ b/tests/functional/test_vcs_git.py @@ -3,6 +3,7 @@ """ import os +from unittest.mock import patch import pytest @@ -282,3 +283,107 @@ def test_resolve_commit_not_on_branch(script, tmp_path): # check we can fetch our commit rev_options = Git.make_rev_options(commit) Git().fetch_new(str(clone_path), repo_path.as_uri(), rev_options) + + +def _initialize_clonetest_server(repo_path, script, enable_partial_clone): + repo_path.mkdir() + script.run("git", "init", cwd=str(repo_path)) + repo_file = repo_path / "file.txt" + repo_file.write_text(u".") + script.run("git", "add", "file.txt", cwd=str(repo_path)) + script.run("git", "commit", "-m", "initial commit", cwd=str(repo_path)) + + # Enable filtering support on server + if enable_partial_clone: + script.run("git", "config", "uploadpack.allowFilter", "true", cwd=repo_path) + script.run( + "git", "config", "uploadpack.allowanysha1inwant", "true", cwd=repo_path + ) + + return repo_file + + +@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old") +def test_partial_clone(script, tmp_path): + """Test partial clone w/ a git-server that supports it""" + repo_path = tmp_path / "repo" + repo_file = _initialize_clonetest_server( + repo_path, script, enable_partial_clone=True + ) + clone_path1 = repo_path / "clone1" + clone_path2 = repo_path / "clone2" + + commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip() + + # Check that we can clone at HEAD + Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options()) + # Check that we can clone to commit + Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit)) + + # Write some additional stuff to git pull + repo_file.write_text(u"..") + script.run("git", "commit", "-am", "second commit", cwd=str(repo_path)) + + # Make sure git pull works - with server supporting filtering + assert ( + "warning: filtering not recognized by server, ignoring" + not in script.run("git", "pull", cwd=clone_path1).stderr + ) + assert ( + "warning: filtering not recognized by server, ignoring" + not in script.run("git", "pull", cwd=clone_path2).stderr + ) + + +@pytest.mark.skipif(Git().get_git_version() < (2, 17), reason="git too old") +def test_partial_clone_without_server_support(script, tmp_path): + """Test partial clone w/ a git-server that does not support it""" + repo_path = tmp_path / "repo" + repo_file = _initialize_clonetest_server( + repo_path, script, enable_partial_clone=False + ) + clone_path1 = repo_path / "clone1" + clone_path2 = repo_path / "clone2" + + commit = script.run("git", "rev-parse", "HEAD", cwd=str(repo_path)).stdout.strip() + + # Check that we can clone at HEAD + Git().fetch_new(str(clone_path1), repo_path.as_uri(), Git.make_rev_options()) + # Check that we can clone to commit + Git().fetch_new(str(clone_path2), repo_path.as_uri(), Git.make_rev_options(commit)) + + # Write some additional stuff to git pull + repo_file.write_text(u"..") + script.run("git", "commit", "-am", "second commit", cwd=str(repo_path)) + + # Make sure git pull works - even though server doesn't support filtering + assert ( + "warning: filtering not recognized by server, ignoring" + in script.run("git", "pull", cwd=clone_path1).stderr + ) + assert ( + "warning: filtering not recognized by server, ignoring" + in script.run("git", "pull", cwd=clone_path2).stderr + ) + + +def test_clone_without_partial_clone_support(script, tmp_path): + """Older git clients don't support partial clone. Test the fallback path""" + repo_path = tmp_path / "repo" + repo_file = _initialize_clonetest_server( + repo_path, script, enable_partial_clone=True + ) + clone_path = repo_path / "clone1" + + # Check that we can clone w/ old version of git w/o --filter + with patch("pip._internal.vcs.git.Git.get_git_version", return_value=(2, 16)): + Git().fetch_new(str(clone_path), repo_path.as_uri(), Git.make_rev_options()) + + repo_file.write_text(u"...") + script.run("git", "commit", "-am", "third commit", cwd=str(repo_path)) + + # Should work fine w/o attempting to use `--filter` args + assert ( + "warning: filtering not recognized by server, ignoring" + not in script.run("git", "pull", cwd=clone_path).stderr + )