diff --git a/.authors.yml b/.authors.yml index b22e4b31e4..4e6c03865d 100644 --- a/.authors.yml +++ b/.authors.yml @@ -1201,7 +1201,7 @@ alternate_emails: - clee@anaconda.com - name: Ken Odegard - num_commits: 155 + num_commits: 158 email: kodegard@anaconda.com first_commit: 2020-09-08 19:53:41 github: kenodegard diff --git a/CHANGELOG.md b/CHANGELOG.md index dfc774b884..e376dc1422 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ [//]: # (current developments) +## 3.28.3 (2024-01-04) + +### Bug fixes + +* Update `conda_build.os_utils.liefldd.ensure_binary` to handle `None` inputs. (#5123 via #5124) +* Update `conda_build.inspect_pkg.which_package` to use a cached mapping of paths to packages (first call: `O(n)`, subsequent calls: `O(1)`) instead of relying on `Path.samefile` comparisons (`O(n * m)`). (#5126 via #5130) + +### Contributors + +* @kenodegard + + + ## 3.28.2 (2023-12-15) ### Enhancements diff --git a/conda_build/inspect_pkg.py b/conda_build/inspect_pkg.py index 59772d1f7f..d5650681f6 100644 --- a/conda_build/inspect_pkg.py +++ b/conda_build/inspect_pkg.py @@ -42,7 +42,6 @@ on_mac, on_win, package_has_file, - samefile, ) log = get_logger(__name__) @@ -51,7 +50,7 @@ @deprecated("3.28.0", "24.1.0") @lru_cache(maxsize=None) def dist_files(prefix: str | os.PathLike | Path, dist: Dist) -> set[str]: - if (prec := PrefixData(prefix).get(dist.name, None)) is None: + if (prec := PrefixData(str(prefix)).get(dist.name, None)) is None: return set() elif MatchSpec(dist).match(prec): return set(prec["files"]) @@ -64,19 +63,48 @@ def which_package( path: str | os.PathLike | Path, prefix: str | os.PathLike | Path, ) -> Iterable[PrefixRecord]: - """ + """Detect which package(s) a path belongs to. + Given the path (of a (presumably) conda installed file) iterate over the conda packages the file came from. Usually the iteration yields only one package. + + We use lstat since a symlink doesn't clobber the file it points to. """ prefix = Path(prefix) - # historically, path was relative to prefix just to be safe we append to prefix - # (pathlib correctly handles this even if path is absolute) - path = prefix / path + # historically, path was relative to prefix, just to be safe we append to prefix + # get lstat before calling _file_package_mapping in case path doesn't exist + try: + lstat = (prefix / path).lstat() + except FileNotFoundError: + # FileNotFoundError: path doesn't exist + return + else: + yield from _file_package_mapping(prefix).get(lstat, ()) + + +@lru_cache(maxsize=None) +def _file_package_mapping(prefix: Path) -> dict[os.stat_result, set[PrefixRecord]]: + """Map paths to package records. + + We use lstat since a symlink doesn't clobber the file it points to. + """ + mapping: dict[os.stat_result, set[PrefixRecord]] = {} for prec in PrefixData(str(prefix)).iter_records(): - if any(samefile(prefix / file, path) for file in prec["files"]): - yield prec + for file in prec["files"]: + # packages are capable of removing files installed by other dependencies from + # the build prefix, in those cases lstat will fail, while which_package wont + # return the correct package(s) in such a condition we choose to not worry about + # it since this file to package lookup exists primarily to detect clobbering + try: + lstat = (prefix / file).lstat() + except FileNotFoundError: + # FileNotFoundError: path doesn't exist + continue + else: + mapping.setdefault(lstat, set()).add(prec) + return mapping def print_object_info(info, key): diff --git a/conda_build/os_utils/liefldd.py b/conda_build/os_utils/liefldd.py index 535b924771..8898f45473 100644 --- a/conda_build/os_utils/liefldd.py +++ b/conda_build/os_utils/liefldd.py @@ -43,10 +43,12 @@ def is_string(s): # these are to be avoided, or if not avoided they # should be passed a binary when possible as that # will prevent having to parse it multiple times. -def ensure_binary(file: str | os.PathLike | Path | lief.Binary) -> lief.Binary | None: +def ensure_binary( + file: str | os.PathLike | Path | lief.Binary | None, +) -> lief.Binary | None: if isinstance(file, lief.Binary): return file - elif not Path(file).exists(): + elif not file or not Path(file).exists(): return None try: return lief.parse(str(file)) @@ -525,9 +527,9 @@ def inspect_linkages_lief( todo.pop(0) filename2 = element[0] binary = element[1] - uniqueness_key = get_uniqueness_key(binary) if not binary: continue + uniqueness_key = get_uniqueness_key(binary) if uniqueness_key not in already_seen: parent_exe_dirname = None if binary.format == lief.EXE_FORMATS.PE: diff --git a/conda_build/utils.py b/conda_build/utils.py index 5ceff5f762..fc70e9660d 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2188,6 +2188,7 @@ def is_conda_pkg(pkg_path: str) -> bool: ) +@deprecated("3.28.3", "24.1.0") def samefile(path1: Path, path2: Path) -> bool: try: return path1.samefile(path2) diff --git a/pyproject.toml b/pyproject.toml index 602c15be73..8b55ee4168 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,4 +132,5 @@ markers = [ "slow: execute the slow tests if active", "sanity: execute the sanity tests", "no_default_testing_config: used internally to disable monkeypatching for testing_config", + "benchmark: execute the benchmark tests", ] diff --git a/tests/test_config.py b/tests/test_config.py index 7c46ca0693..fa362a0b4f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -7,7 +7,7 @@ import pytest from conda_build.config import Config, get_or_merge_config -from conda_build.utils import on_win, samefile +from conda_build.utils import on_win @pytest.fixture @@ -39,20 +39,25 @@ def test_keep_old_work(config: Config, build_id: str, tmp_path: Path): config.croot = tmp_path config.build_id = build_id + magic = "a_touched_file.magic" + # empty working directory orig_dir = Path(config.work_dir) + assert orig_dir.exists() assert not len(os.listdir(config.work_dir)) # touch a file so working directory is not empty - (orig_dir / "a_touched_file.magic").touch() - assert len(os.listdir(config.work_dir)) + (orig_dir / magic).touch() + assert orig_dir.exists() + assert len(os.listdir(config.work_dir)) == 1 + assert Path(config.work_dir, magic).exists() config.compute_build_id("a_new_name", reset=True) - # working directory should still exist and have the touched file - assert not samefile(orig_dir, config.work_dir) + # working directory should still exist (in new location) and have the touched file assert not orig_dir.exists() - assert len(os.listdir(config.work_dir)) + assert len(os.listdir(config.work_dir)) == 1 + assert Path(config.work_dir, magic).exists() @pytest.mark.skipif(on_win, reason="Windows uses only the short prefix") diff --git a/tests/test_inspect_pkg.py b/tests/test_inspect_pkg.py index edefa96a54..97aa9228db 100644 --- a/tests/test_inspect_pkg.py +++ b/tests/test_inspect_pkg.py @@ -3,8 +3,11 @@ from __future__ import annotations import json +import os from pathlib import Path +from uuid import uuid4 +import pytest from conda.core.prefix_data import PrefixData from conda_build.inspect_pkg import which_package @@ -100,7 +103,7 @@ def test_which_package(tmp_path: Path): precs_hardlinkA = list(which_package(tmp_path / "hardlinkA", tmp_path)) assert len(precs_hardlinkA) == 1 - assert precs_hardlinkA[0] == precA + assert set(precs_hardlinkA) == {precA} precs_shared = list(which_package(tmp_path / "shared", tmp_path)) assert len(precs_shared) == 2 @@ -108,12 +111,71 @@ def test_which_package(tmp_path: Path): precs_internal = list(which_package(tmp_path / "internal", tmp_path)) assert len(precs_internal) == 1 - assert precs_internal[0] == precA + assert set(precs_internal) == {precA} precs_external = list(which_package(tmp_path / "external", tmp_path)) - assert len(precs_external) == 2 - assert set(precs_external) == {precA, precB} + assert len(precs_external) == 1 + assert set(precs_external) == {precA} precs_hardlinkB = list(which_package(tmp_path / "hardlinkB", tmp_path)) - assert len(precs_hardlinkB) == 2 - assert set(precs_hardlinkB) == {precA, precB} + assert len(precs_hardlinkB) == 1 + assert set(precs_hardlinkB) == {precB} + + +@pytest.mark.benchmark +def test_which_package_battery(tmp_path: Path): + # regression: https://github.com/conda/conda-build/issues/5126 + # create a dummy environment + (tmp_path / "conda-meta").mkdir() + (tmp_path / "conda-meta" / "history").touch() + (tmp_path / "lib").mkdir() + + # dummy packages with files + removed = [] + for _ in range(100): + name = f"package_{uuid4().hex}" + + # mock a package with 100 files + files = [f"lib/{uuid4().hex}" for _ in range(100)] + for file in files: + (tmp_path / file).touch() + + # mock a removed file + remove = f"lib/{uuid4().hex}" + files.append(remove) + removed.append(remove) + + (tmp_path / "conda-meta" / f"{name}-1-0.json").write_text( + json.dumps( + { + "build": "0", + "build_number": 0, + "channel": f"{name}-channel", + "files": files, + "name": name, + "paths_data": { + "paths": [ + {"_path": file, "path_type": "hardlink", "size_in_bytes": 0} + for file in files + ], + "paths_version": 1, + }, + "version": "1", + } + ) + ) + + # every path should return exactly one package + for subdir, _, files in os.walk(tmp_path / "lib"): + for file in files: + path = Path(subdir, file) + + assert len(list(which_package(path, tmp_path))) == 1 + + # removed files should return no packages + # this occurs when, e.g., a package removes files installed by another package + for file in removed: + assert not len(list(which_package(tmp_path / file, tmp_path))) + + # missing files should return no packages + assert not len(list(which_package(tmp_path / "missing", tmp_path)))