Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Private submodule support for GitRepository #16518

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 55 additions & 16 deletions src/prefect/runner/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,16 @@
import subprocess
from copy import deepcopy
from pathlib import Path
from typing import Any, Dict, Optional, Protocol, TypedDict, Union, runtime_checkable
from typing import (
Any,
Dict,
List,
Optional,
Protocol,
TypedDict,
Union,
runtime_checkable,
)
from urllib.parse import urlparse, urlsplit, urlunparse
from uuid import uuid4

Expand Down Expand Up @@ -147,11 +156,9 @@ def pull_interval(self) -> Optional[int]:
return self._pull_interval

@property
def _repository_url_with_credentials(self) -> str:
def _formatted_credentials(self) -> Optional[str]:
if not self._credentials:
return self._url

url_components = urlparse(self._url)
return None

credentials = (
self._credentials.model_dump()
Expand All @@ -165,19 +172,47 @@ def _repository_url_with_credentials(self) -> str:
elif isinstance(v, SecretStr):
credentials[k] = v.get_secret_value()

formatted_credentials = _format_token_from_credentials(
urlparse(self._url).netloc, credentials
)
if url_components.scheme == "https" and formatted_credentials is not None:
return _format_token_from_credentials(urlparse(self._url).netloc, credentials)

@property
def _repository_url_with_credentials(self) -> str:
if not self._credentials:
return self._url

url_components = urlparse(self._url)

if url_components.scheme == "https" and self._formatted_credentials is not None:
updated_components = url_components._replace(
netloc=f"{formatted_credentials}@{url_components.netloc}"
netloc=f"{self._formatted_credentials}@{url_components.netloc}"
)
repository_url = urlunparse(updated_components)
else:
repository_url = self._url

return repository_url

def _git_config(self) -> List[str]:
jakekaplan marked this conversation as resolved.
Show resolved Hide resolved
"""
Build a git configuration to use when running git commands.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what other use cases we may have, but any other arbitrary config options can get added here.

"""
config = {}

# Submodules can be private. The url in .gitmodules
# will not include the credentials, we need to
# propagate them down here.
if self._include_submodules and self._formatted_credentials is not None:
url_components = urlparse(self._url)._replace(path="")
if url_components.scheme == "https":
with_auth = urlunparse(
url_components._replace(
netloc=f"{self._formatted_credentials}@{url_components.netloc}"
)
)
without_auth = urlunparse(url_components)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This bit looks almost exactly the same as what's in _repository_url_with_credentials except for the removal of the path URL component. Is there a way that we could reuse _repository_url_with_credentials here? I'd be ok with updating _repository_url_with_credentials to return the parsed URL instead of a str if that helps.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point. Refactored slightly, how does this look?

def _add_credentials_to_url(self, url: str) -> str:
"""Add credentials to given url if possible."""
components = urlparse(url)
credentials = self._formatted_credentials
if components.scheme != "https" or not credentials:
return url
return urlunparse(
components._replace(netloc=f"{credentials}@{components.netloc}")
)
@property
def _repository_url_with_credentials(self) -> str:
return self._add_credentials_to_url(self._url)
@property
def _git_config(self) -> list[str]:
"""Build a git configuration to use when running git commands."""
config = {}
# Submodules can be private. The url in .gitmodules
# will not include the credentials, we need to
# propagate them down here if they exist.
if self._include_submodules and self._formatted_credentials:
base_url = urlparse(self._url)._replace(path="")
without_auth = urlunparse(base_url)
with_auth = self._add_credentials_to_url(without_auth)
config[f"url.{with_auth}.insteadOf"] = without_auth
return ["-c", " ".join(f"{k}={v}" for k, v in config.items())] if config else []

config[f"url.{with_auth}.insteadOf"] = without_auth

return ["-c", " ".join(f"{k}={v}" for k, v in config.items())] if config else []

async def pull_code(self):
"""
Pulls the contents of the configured repository to the local filesystem.
Expand Down Expand Up @@ -208,7 +243,11 @@ async def pull_code(self):

self._logger.debug("Pulling latest changes from origin/%s", self._branch)
# Update the existing repository
cmd = ["git", "pull", "origin"]
cmd = ["git"]
# Add the git configuration, must be given after `git` and before the command
cmd += self._git_config()
# Add the pull command and parameters
cmd += ["pull", "origin"]
if self._branch:
cmd += [self._branch]
if self._include_submodules:
Expand All @@ -234,12 +273,12 @@ async def _clone_repo(self):
self._logger.debug("Cloning repository %s", self._url)

repository_url = self._repository_url_with_credentials
cmd = ["git"]
# Add the git configuration, must be given after `git` and before the command
cmd += self._git_config()
# Add the clone command and its parameters
cmd += ["clone", repository_url]

cmd = [
"git",
"clone",
repository_url,
]
if self._branch:
cmd += ["--branch", self._branch]
if self._include_submodules:
Expand Down
44 changes: 44 additions & 0 deletions tests/runner/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,50 @@ async def test_include_submodules_property(
cwd=Path.cwd() / "repo",
)

async def test_include_submodules_with_credentials(
self, mock_run_process: AsyncMock, monkeypatch
):
access_token = Secret(value="token")
await access_token.save("test-token")

repo = GitRepository(
url="https://github.com/org/repo.git",
include_submodules=True,
credentials={"access_token": access_token},
)
await repo.pull_code()
mock_run_process.assert_awaited_with(
[
"git",
"-c",
"url.https://[email protected]=https://github.com",
"clone",
"https://[email protected]/org/repo.git",
"--recurse-submodules",
"--depth",
"1",
str(Path.cwd() / "repo"),
]
)

# pretend the repo already exists
monkeypatch.setattr("pathlib.Path.exists", lambda x: ".git" in str(x))

await repo.pull_code()
mock_run_process.assert_awaited_with(
[
"git",
"-c",
"url.https://[email protected]=https://github.com",
"pull",
"origin",
"--recurse-submodules",
"--depth",
"1",
],
cwd=Path.cwd() / "repo",
)

async def test_git_clone_errors_obscure_access_token(
self, monkeypatch, capsys, tmp_path: Path
):
Expand Down
Loading