From 75b0e70f64f66fb1ae3e988e35354d6d2cc84840 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Tue, 26 Dec 2023 09:09:30 -0600 Subject: [PATCH 1/3] Fix `conda_build.os_utils.liefldd.ensure_binary` to handle `None` (#5124) --- conda_build/os_utils/liefldd.py | 8 +++++--- news/5124-fix-ensure_binary-None-handling | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 news/5124-fix-ensure_binary-None-handling diff --git a/conda_build/os_utils/liefldd.py b/conda_build/os_utils/liefldd.py index 0c47fd2533..9d638d055c 100644 --- a/conda_build/os_utils/liefldd.py +++ b/conda_build/os_utils/liefldd.py @@ -43,10 +43,12 @@ def is_string(s): # these are to be avoided, or if not avoided they # should be passed a binary when possible as that # will prevent having to parse it multiple times. -def ensure_binary(file: str | os.PathLike | Path | lief.Binary) -> lief.Binary | None: +def ensure_binary( + file: str | os.PathLike | Path | lief.Binary | None, +) -> lief.Binary | None: if isinstance(file, lief.Binary): return file - elif not Path(file).exists(): + elif not file or not Path(file).exists(): return None try: return lief.parse(str(file)) @@ -525,9 +527,9 @@ def inspect_linkages_lief( todo.pop(0) filename2 = element[0] binary = element[1] - uniqueness_key = get_uniqueness_key(binary) if not binary: continue + uniqueness_key = get_uniqueness_key(binary) if uniqueness_key not in already_seen: parent_exe_dirname = None if binary.format == lief.EXE_FORMATS.PE: diff --git a/news/5124-fix-ensure_binary-None-handling b/news/5124-fix-ensure_binary-None-handling new file mode 100644 index 0000000000..edd4b9d9a4 --- /dev/null +++ b/news/5124-fix-ensure_binary-None-handling @@ -0,0 +1,19 @@ +### Enhancements + +* + +### Bug fixes + +* Update `conda_build.os_utils.liefldd.ensure_binary` to handle `None` inputs. (#5123 via #5124) + +### Deprecations + +* + +### Docs + +* + +### Other + +* From 6edc3dc4af4c131c2e8455dabd42d9d2b5059c43 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Thu, 4 Jan 2024 12:29:36 -0600 Subject: [PATCH 2/3] Deprecate `conda_build.utils.samefile` in favor of path to package mapping (#5130) * Deprecate `conda_build.utils.samefile` * Implement path (using lstat) to package mapping * Add benchmark for `which_package` --- conda_build/inspect_pkg.py | 45 +++++++++++++++++---- conda_build/utils.py | 1 + news/5126-samefile-regression | 19 +++++++++ pyproject.toml | 1 + tests/test_config.py | 17 +++++--- tests/test_inspect_pkg.py | 74 ++++++++++++++++++++++++++++++++--- 6 files changed, 137 insertions(+), 20 deletions(-) create mode 100644 news/5126-samefile-regression diff --git a/conda_build/inspect_pkg.py b/conda_build/inspect_pkg.py index f8fa57a1b5..87bc372766 100644 --- a/conda_build/inspect_pkg.py +++ b/conda_build/inspect_pkg.py @@ -43,13 +43,13 @@ ) from .deprecations import deprecated -from .utils import on_mac, on_win, samefile +from .utils import on_mac, on_win @deprecated("3.28.0", "24.1.0") @lru_cache(maxsize=None) def dist_files(prefix: str | os.PathLike | Path, dist: Dist) -> set[str]: - if (prec := PrefixData(prefix).get(dist.name, None)) is None: + if (prec := PrefixData(str(prefix)).get(dist.name, None)) is None: return set() elif MatchSpec(dist).match(prec): return set(prec["files"]) @@ -62,19 +62,48 @@ def which_package( path: str | os.PathLike | Path, prefix: str | os.PathLike | Path, ) -> Iterable[PrefixRecord]: - """ + """Detect which package(s) a path belongs to. + Given the path (of a (presumably) conda installed file) iterate over the conda packages the file came from. Usually the iteration yields only one package. + + We use lstat since a symlink doesn't clobber the file it points to. """ prefix = Path(prefix) - # historically, path was relative to prefix just to be safe we append to prefix - # (pathlib correctly handles this even if path is absolute) - path = prefix / path + # historically, path was relative to prefix, just to be safe we append to prefix + # get lstat before calling _file_package_mapping in case path doesn't exist + try: + lstat = (prefix / path).lstat() + except FileNotFoundError: + # FileNotFoundError: path doesn't exist + return + else: + yield from _file_package_mapping(prefix).get(lstat, ()) + + +@lru_cache(maxsize=None) +def _file_package_mapping(prefix: Path) -> dict[os.stat_result, set[PrefixRecord]]: + """Map paths to package records. + + We use lstat since a symlink doesn't clobber the file it points to. + """ + mapping: dict[os.stat_result, set[PrefixRecord]] = {} for prec in PrefixData(str(prefix)).iter_records(): - if any(samefile(prefix / file, path) for file in prec["files"]): - yield prec + for file in prec["files"]: + # packages are capable of removing files installed by other dependencies from + # the build prefix, in those cases lstat will fail, while which_package wont + # return the correct package(s) in such a condition we choose to not worry about + # it since this file to package lookup exists primarily to detect clobbering + try: + lstat = (prefix / file).lstat() + except FileNotFoundError: + # FileNotFoundError: path doesn't exist + continue + else: + mapping.setdefault(lstat, set()).add(prec) + return mapping def print_object_info(info, key): diff --git a/conda_build/utils.py b/conda_build/utils.py index 6c300737c1..3961e38bd6 100644 --- a/conda_build/utils.py +++ b/conda_build/utils.py @@ -2222,6 +2222,7 @@ def is_conda_pkg(pkg_path: str) -> bool: ) +@deprecated("3.28.3", "24.1.0") def samefile(path1: Path, path2: Path) -> bool: try: return path1.samefile(path2) diff --git a/news/5126-samefile-regression b/news/5126-samefile-regression new file mode 100644 index 0000000000..e3ec790882 --- /dev/null +++ b/news/5126-samefile-regression @@ -0,0 +1,19 @@ +### Enhancements + +* + +### Bug fixes + +* Update `which_package` to use a cached mapping of files to packages (`O(1)`) instead of relying on `Path.samefile` comparisons (`O(n * m)`). (#5126 via #5130) + +### Deprecations + +* + +### Docs + +* + +### Other + +* diff --git a/pyproject.toml b/pyproject.toml index a3477043d0..825ae5627f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,4 +129,5 @@ markers = [ "slow: execute the slow tests if active", "sanity: execute the sanity tests", "no_default_testing_config: used internally to disable monkeypatching for testing_config", + "benchmark: execute the benchmark tests", ] diff --git a/tests/test_config.py b/tests/test_config.py index 7c46ca0693..fa362a0b4f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -7,7 +7,7 @@ import pytest from conda_build.config import Config, get_or_merge_config -from conda_build.utils import on_win, samefile +from conda_build.utils import on_win @pytest.fixture @@ -39,20 +39,25 @@ def test_keep_old_work(config: Config, build_id: str, tmp_path: Path): config.croot = tmp_path config.build_id = build_id + magic = "a_touched_file.magic" + # empty working directory orig_dir = Path(config.work_dir) + assert orig_dir.exists() assert not len(os.listdir(config.work_dir)) # touch a file so working directory is not empty - (orig_dir / "a_touched_file.magic").touch() - assert len(os.listdir(config.work_dir)) + (orig_dir / magic).touch() + assert orig_dir.exists() + assert len(os.listdir(config.work_dir)) == 1 + assert Path(config.work_dir, magic).exists() config.compute_build_id("a_new_name", reset=True) - # working directory should still exist and have the touched file - assert not samefile(orig_dir, config.work_dir) + # working directory should still exist (in new location) and have the touched file assert not orig_dir.exists() - assert len(os.listdir(config.work_dir)) + assert len(os.listdir(config.work_dir)) == 1 + assert Path(config.work_dir, magic).exists() @pytest.mark.skipif(on_win, reason="Windows uses only the short prefix") diff --git a/tests/test_inspect_pkg.py b/tests/test_inspect_pkg.py index edefa96a54..97aa9228db 100644 --- a/tests/test_inspect_pkg.py +++ b/tests/test_inspect_pkg.py @@ -3,8 +3,11 @@ from __future__ import annotations import json +import os from pathlib import Path +from uuid import uuid4 +import pytest from conda.core.prefix_data import PrefixData from conda_build.inspect_pkg import which_package @@ -100,7 +103,7 @@ def test_which_package(tmp_path: Path): precs_hardlinkA = list(which_package(tmp_path / "hardlinkA", tmp_path)) assert len(precs_hardlinkA) == 1 - assert precs_hardlinkA[0] == precA + assert set(precs_hardlinkA) == {precA} precs_shared = list(which_package(tmp_path / "shared", tmp_path)) assert len(precs_shared) == 2 @@ -108,12 +111,71 @@ def test_which_package(tmp_path: Path): precs_internal = list(which_package(tmp_path / "internal", tmp_path)) assert len(precs_internal) == 1 - assert precs_internal[0] == precA + assert set(precs_internal) == {precA} precs_external = list(which_package(tmp_path / "external", tmp_path)) - assert len(precs_external) == 2 - assert set(precs_external) == {precA, precB} + assert len(precs_external) == 1 + assert set(precs_external) == {precA} precs_hardlinkB = list(which_package(tmp_path / "hardlinkB", tmp_path)) - assert len(precs_hardlinkB) == 2 - assert set(precs_hardlinkB) == {precA, precB} + assert len(precs_hardlinkB) == 1 + assert set(precs_hardlinkB) == {precB} + + +@pytest.mark.benchmark +def test_which_package_battery(tmp_path: Path): + # regression: https://github.com/conda/conda-build/issues/5126 + # create a dummy environment + (tmp_path / "conda-meta").mkdir() + (tmp_path / "conda-meta" / "history").touch() + (tmp_path / "lib").mkdir() + + # dummy packages with files + removed = [] + for _ in range(100): + name = f"package_{uuid4().hex}" + + # mock a package with 100 files + files = [f"lib/{uuid4().hex}" for _ in range(100)] + for file in files: + (tmp_path / file).touch() + + # mock a removed file + remove = f"lib/{uuid4().hex}" + files.append(remove) + removed.append(remove) + + (tmp_path / "conda-meta" / f"{name}-1-0.json").write_text( + json.dumps( + { + "build": "0", + "build_number": 0, + "channel": f"{name}-channel", + "files": files, + "name": name, + "paths_data": { + "paths": [ + {"_path": file, "path_type": "hardlink", "size_in_bytes": 0} + for file in files + ], + "paths_version": 1, + }, + "version": "1", + } + ) + ) + + # every path should return exactly one package + for subdir, _, files in os.walk(tmp_path / "lib"): + for file in files: + path = Path(subdir, file) + + assert len(list(which_package(path, tmp_path))) == 1 + + # removed files should return no packages + # this occurs when, e.g., a package removes files installed by another package + for file in removed: + assert not len(list(which_package(tmp_path / file, tmp_path))) + + # missing files should return no packages + assert not len(list(which_package(tmp_path / "missing", tmp_path))) From 31cc7d5ce15e8ac01e2befa82f7695498c801e44 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Fri, 5 Jan 2024 11:34:35 -0600 Subject: [PATCH 3/3] Release 3.28.3 (#5128) --- .authors.yml | 2 +- CHANGELOG.md | 13 +++++++++++++ news/5124-fix-ensure_binary-None-handling | 19 ------------------- news/5126-samefile-regression | 19 ------------------- 4 files changed, 14 insertions(+), 39 deletions(-) delete mode 100644 news/5124-fix-ensure_binary-None-handling delete mode 100644 news/5126-samefile-regression diff --git a/.authors.yml b/.authors.yml index b22e4b31e4..4e6c03865d 100644 --- a/.authors.yml +++ b/.authors.yml @@ -1201,7 +1201,7 @@ alternate_emails: - clee@anaconda.com - name: Ken Odegard - num_commits: 155 + num_commits: 158 email: kodegard@anaconda.com first_commit: 2020-09-08 19:53:41 github: kenodegard diff --git a/CHANGELOG.md b/CHANGELOG.md index dfc774b884..e376dc1422 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ [//]: # (current developments) +## 3.28.3 (2024-01-04) + +### Bug fixes + +* Update `conda_build.os_utils.liefldd.ensure_binary` to handle `None` inputs. (#5123 via #5124) +* Update `conda_build.inspect_pkg.which_package` to use a cached mapping of paths to packages (first call: `O(n)`, subsequent calls: `O(1)`) instead of relying on `Path.samefile` comparisons (`O(n * m)`). (#5126 via #5130) + +### Contributors + +* @kenodegard + + + ## 3.28.2 (2023-12-15) ### Enhancements diff --git a/news/5124-fix-ensure_binary-None-handling b/news/5124-fix-ensure_binary-None-handling deleted file mode 100644 index edd4b9d9a4..0000000000 --- a/news/5124-fix-ensure_binary-None-handling +++ /dev/null @@ -1,19 +0,0 @@ -### Enhancements - -* - -### Bug fixes - -* Update `conda_build.os_utils.liefldd.ensure_binary` to handle `None` inputs. (#5123 via #5124) - -### Deprecations - -* - -### Docs - -* - -### Other - -* diff --git a/news/5126-samefile-regression b/news/5126-samefile-regression deleted file mode 100644 index e3ec790882..0000000000 --- a/news/5126-samefile-regression +++ /dev/null @@ -1,19 +0,0 @@ -### Enhancements - -* - -### Bug fixes - -* Update `which_package` to use a cached mapping of files to packages (`O(1)`) instead of relying on `Path.samefile` comparisons (`O(n * m)`). (#5126 via #5130) - -### Deprecations - -* - -### Docs - -* - -### Other - -*