-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
200 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from __future__ import annotations | ||
|
||
from dataclasses import dataclass | ||
from pathlib import ( | ||
Path, | ||
PurePosixPath, | ||
) | ||
from typing import TYPE_CHECKING | ||
|
||
if TYPE_CHECKING: | ||
from collections.abc import Generator | ||
|
||
from datalad_core.iter_collections.utils import ( | ||
GitTreeItemType, | ||
git_ls_tree, | ||
git_mode_type_map, | ||
) | ||
|
||
|
||
@dataclass(frozen=True) | ||
class GitTreeItem: | ||
"""Item in a Git tree""" | ||
|
||
relpath: PurePosixPath | ||
# gitsha is not the sha1 of the file content, but the output | ||
# of `git hash-object` which does something like | ||
# `printf "blob $(wc -c < "$file_name")\0$(cat "$file_name")" | sha1sum` | ||
gitsha: str | None = None | ||
gittype: GitTreeItemType | None = None | ||
|
||
|
||
def iter_gittree( | ||
path: Path, | ||
treeish: str, | ||
*, | ||
recursive: str = 'repository', | ||
) -> Generator[GitTreeItem]: | ||
"""Uses ``git ls-tree`` to report on a tree in a Git repository | ||
Parameters | ||
---------- | ||
path: Path | ||
Path of a directory in a Git repository to report on. This directory | ||
need not be the root directory of the repository, but must be part of | ||
the repository. If the directory is not the root directory of a | ||
non-bare repository, the iterator is constrained to items underneath | ||
that directory. | ||
recursive: {'repository', 'no'}, optional | ||
Behavior for recursion into subtrees. By default (``repository``), | ||
all tree within the repository underneath ``path``) are reported, | ||
but not tree within submodules. If ``no``, only direct children | ||
are reported on. | ||
Yields | ||
------ | ||
:class:`GitTreeItem` | ||
The ``name`` attribute of an item is a ``str`` with the corresponding | ||
(relative) path, as reported by Git (in POSIX conventions). | ||
""" | ||
# we force-convert to Path to give us the piece of mind we want. | ||
# The docs already ask for that, but it is easy to | ||
# forget/ignore and leads to non-obvious errors. Running this once is | ||
# a cheap safety net | ||
path = Path(path) | ||
|
||
# although it would be easy to also query the object size, we do not | ||
# do so, because it has a substantial runtime impact. It is unclear | ||
# what the main factor for the slowdown is, but in test cases I can | ||
# see 10x slower | ||
# lstree_args = ['--long'] | ||
# we do not go for a custom format that would allow for a single split | ||
# by tab, because if we do, Git starts quoting paths with special | ||
# characters (like tab) again | ||
# lstree_args = ['--format=%(objectmode)%x09%(objectname)%x09%(path)'] | ||
lstree_args = [] | ||
if recursive == 'repository': | ||
lstree_args.append('-r') | ||
|
||
for line in git_ls_tree(path, treeish, *lstree_args): | ||
yield _get_tree_item(line) | ||
|
||
|
||
def _get_tree_item(spec: str) -> GitTreeItem: | ||
props, path = spec.split('\t', maxsplit=1) | ||
# 0::2 gets the first and third (last) item, effectively skippping the | ||
# type name (blob/tree etc.), we have the mode lookup for that, which | ||
# provides more detail | ||
mode, sha = props.split(' ')[0::2] | ||
return GitTreeItem( | ||
relpath=PurePosixPath(path), | ||
gitsha=sha, | ||
gittype=git_mode_type_map[mode], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
from pathlib import ( | ||
PurePosixPath, | ||
) | ||
|
||
import pytest | ||
|
||
from datalad_core.consts import PRE_INIT_COMMIT_SHA | ||
from datalad_core.runners import CommandError | ||
from datalad_core.tests import ( | ||
call_git_addcommit, | ||
) | ||
|
||
from ..gittree import ( | ||
GitTreeItem, | ||
GitTreeItemType, | ||
iter_gittree, | ||
) | ||
|
||
|
||
def test_iter_gittree(gitrepo): | ||
# an initial commit | ||
(gitrepo / '.gitkeep').touch() | ||
call_git_addcommit(gitrepo) | ||
|
||
# we add a new file and test its expected properties | ||
probe_name = 'probe.txt' | ||
# on crippled FS we are testing the managed branch which contains | ||
# pointer files, not symlinks | ||
expected_probe_sha = '24ae15ce9741d53115a9fc71c2b761790ca47995' | ||
probe = gitrepo / 'subdir' / probe_name | ||
probe.parent.mkdir() | ||
probe.write_text('probe') | ||
call_git_addcommit(gitrepo) | ||
|
||
tracked_items = list(iter_gittree(gitrepo, 'HEAD')) | ||
# without untracked's and no link resolution this is plain and fast | ||
assert all( | ||
isinstance(i, GitTreeItem) and i.gitsha and i.gittype for i in tracked_items | ||
) | ||
assert any( | ||
# let's query a Path instance here, to get that covered too | ||
i.relpath == PurePosixPath(f'subdir/{probe_name}') | ||
and i.gitsha == expected_probe_sha | ||
and i.gittype == GitTreeItemType.file | ||
for i in iter_gittree(gitrepo, 'HEAD') | ||
) | ||
# if we check the prior version, we do not see it (hence the | ||
# tree-ish passing is working | ||
assert not any( | ||
i.relpath == PurePosixPath(f'subdir/{probe_name}') | ||
for i in iter_gittree(gitrepo, 'HEAD~1') | ||
) | ||
|
||
# if we disable recursion, the probe is not listed, but its | ||
# parent dir is | ||
tracked_toplevel_items = list(iter_gittree(gitrepo, 'HEAD', recursive='no')) | ||
assert not any(i.relpath == f'subdir/{probe_name}' for i in tracked_toplevel_items) | ||
assert any( | ||
i.relpath == PurePosixPath('subdir') | ||
and i.gitsha == '0dd69202ba4657a5d9c37d5716d5b27127c4b57b' | ||
and i.gittype == GitTreeItemType.directory | ||
for i in tracked_toplevel_items | ||
) | ||
# iterating on a subdir does constrain the report | ||
tracked_subdir_items = list(iter_gittree(probe.parent, 'HEAD')) | ||
assert len(tracked_subdir_items) == 1 | ||
probe_item = tracked_subdir_items[0] | ||
assert probe_item.relpath.name == probe_name | ||
assert probe_item.gitsha == expected_probe_sha | ||
|
||
|
||
def test_name_starting_with_tab(gitrepo): | ||
tabbed_file_name = '\ttab.txt' | ||
tabbed_file = gitrepo / tabbed_file_name | ||
try: | ||
tabbed_file.write_text('name of this file starts with a tab') | ||
except OSError: | ||
pytest.skip('not applicable on crippled filesystems') | ||
|
||
call_git_addcommit(gitrepo) | ||
iter_names = [item.relpath for item in iter_gittree(gitrepo, 'HEAD')] | ||
assert PurePosixPath(tabbed_file_name) in iter_names | ||
|
||
|
||
def test_iter_gittree_empty(gitrepo): | ||
with pytest.raises(CommandError, match='Not a valid object name HEAD'): | ||
list(iter_gittree(gitrepo, 'HEAD')) | ||
|
||
all_items = list(iter_gittree(gitrepo, PRE_INIT_COMMIT_SHA)) | ||
assert len(all_items) == 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters