Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support fetching a file from a git repo as an asset #12181

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions spacy/cli/_util.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from tempfile import TemporaryDirectory
from typing import Dict, Any, Union, List, Optional, Tuple, Iterable
from typing import TYPE_CHECKING, overload
import sys
Expand Down Expand Up @@ -405,16 +406,20 @@ def git_checkout(
f"temporarily. To only download the files needed, make sure "
f"you're using Git v2.22 or above."
)
with make_tempdir() as tmp_dir:
cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
with TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
cmd = f"git -C {tmp_path} clone {repo} . -b {branch}"
run_command(cmd, capture=True)
# We need Path(name) to make sure we also support subdirectories
try:
source_path = tmp_dir / Path(subpath)
if not is_subpath_of(tmp_dir, source_path):
source_path = tmp_path / Path(subpath)
if not is_subpath_of(tmp_path, source_path):
err = f"'{subpath}' is a path outside of the cloned repository."
msg.fail(err, repo, exits=1)
shutil.copytree(str(source_path), str(dest))
if os.path.isdir(source_path):
shutil.copytree(source_path, dest)
else:
shutil.copyfile(source_path, dest)
except FileNotFoundError:
err = f"Can't clone {subpath}. Make sure the directory exists in the repo (branch '{branch}')"
msg.fail(err, repo, exits=1)
Expand Down
30 changes: 29 additions & 1 deletion spacy/tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import math
from collections import Counter
from tempfile import TemporaryDirectory
from typing import Tuple, List, Dict, Any
import pkg_resources
import time
Expand All @@ -20,7 +21,7 @@
from spacy.cli._util import parse_config_overrides, string_to_list
from spacy.cli._util import substitute_project_variables
from spacy.cli._util import validate_project_commands
from spacy.cli._util import upload_file, download_file
from spacy.cli._util import upload_file, download_file, git_checkout
from spacy.cli.debug_data import _compile_gold, _get_labels_from_model
from spacy.cli.debug_data import _get_labels_from_spancat
from spacy.cli.debug_data import _get_distribution, _get_kl_divergence
Expand Down Expand Up @@ -145,6 +146,33 @@ def test_issue11235():
assert cfg["commands"][0]["script"][0] == f"hello {lang_var}"


def test_project_git_dir_asset():
with TemporaryDirectory() as d:
p = Path(d)
# Use a very small repo.
git_checkout(
"https://github.com/explosion/os-signpost.git",
"os_signpost",
p / "signpost",
branch="v0.0.3",
)
assert os.path.isdir(p / "signpost")


@pytest.mark.issue(12168)
def test_project_git_file_asset():
with TemporaryDirectory() as d:
p = Path(d)
# Use a very small repo.
git_checkout(
"https://github.com/explosion/os-signpost.git",
"README.md",
p / "readme.md",
branch="v0.0.3",
)
assert os.path.isfile(p / "readme.md")


def test_cli_info():
nlp = Dutch()
nlp.add_pipe("textcat")
Expand Down