From 36013a4c78417aebc8c242c8b7c17840ed8c6c84 Mon Sep 17 00:00:00 2001 From: Alexey Ovchinnikov Date: Thu, 17 Oct 2024 12:00:10 -0500 Subject: [PATCH 1/3] yarn: Adding Workspace model A model to handle yarn workspaces is added. Workspaces allow user to install dependencies from multiple package.json files within one root package. Official workspaces definition: https://classic.yarnpkg.com/lang/en/docs/workspaces/ Signed-off-by: Alexey Ovchinnikov --- .../yarn_classic/workspaces.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 cachi2/core/package_managers/yarn_classic/workspaces.py diff --git a/cachi2/core/package_managers/yarn_classic/workspaces.py b/cachi2/core/package_managers/yarn_classic/workspaces.py new file mode 100644 index 000000000..c6bde99a0 --- /dev/null +++ b/cachi2/core/package_managers/yarn_classic/workspaces.py @@ -0,0 +1,23 @@ +import json +from itertools import chain +from pathlib import Path +from typing import Any, Generator, Iterable + +import pydantic + +from cachi2.core.errors import PackageRejected +from cachi2.core.models.input import YarnClassicPackageInput +from cachi2.core.rooted_path import PathOutsideRoot, RootedPath + + +class Workspace(pydantic.BaseModel): + """Workspace model.""" + + path: Path # path to a workspace. + package_contents: dict # package data extracted from path/"package.json". + + @pydantic.field_validator("package_contents") + def _ensure_package_is_named(cls, package_contents: dict) -> dict: + if "name" not in package_contents: + raise ValueError("Workspaces must contain 'name' field.") + return package_contents From 5bb55c729a39d0400f8201956cc35fe10abe567f Mon Sep 17 00:00:00 2001 From: Alexey Ovchinnikov Date: Thu, 17 Oct 2024 12:02:09 -0500 Subject: [PATCH 2/3] Adding Yarn v1.x workspaces metadata extractor In Yarn v1.x workspaces metadata is stored outside of yarn.lock in package.json. This commit inroduces functions for workspaces metadata extraction and a dataclass to represent workspaces. Signed-off-by: Alexey Ovchinnikov --- .../package_managers/yarn_classic/main.py | 6 ++ .../yarn_classic/workspaces.py | 83 +++++++++++++++++++ .../yarn_classic/test_main.py | 2 + .../yarn_classic/test_workspaces.py | 74 +++++++++++++++++ 4 files changed, 165 insertions(+) create mode 100644 tests/unit/package_managers/yarn_classic/test_workspaces.py diff --git a/cachi2/core/package_managers/yarn_classic/main.py b/cachi2/core/package_managers/yarn_classic/main.py index da578ce8c..b8ea33d5d 100644 --- a/cachi2/core/package_managers/yarn_classic/main.py +++ b/cachi2/core/package_managers/yarn_classic/main.py @@ -6,6 +6,7 @@ from cachi2.core.models.input import Request from cachi2.core.models.output import Component, EnvironmentVariable, RequestOutput from cachi2.core.package_managers.yarn.utils import run_yarn_cmd +from cachi2.core.package_managers.yarn_classic.workspaces import extract_workspace_metadata from cachi2.core.rooted_path import RootedPath log = logging.getLogger(__name__) @@ -27,6 +28,11 @@ def _ensure_mirror_dir_exists(output_dir: RootedPath) -> None: prefetch_env = _get_prefetch_environment_variables(request.output_dir) _verify_corepack_yarn_version(path, prefetch_env) _fetch_dependencies(path, prefetch_env) + # Workspaces metadata is not used at the moment, but will + # eventualy be converted into components. Using a noop assertion + # to prevent linters from complaining. + workspaces = extract_workspace_metadata(package, request.source_dir) + assert workspaces is not None # nosec -- see comment above return RequestOutput.from_obj_list( components, _generate_build_environment_variables(), project_files=[] diff --git a/cachi2/core/package_managers/yarn_classic/workspaces.py b/cachi2/core/package_managers/yarn_classic/workspaces.py index c6bde99a0..de1b0c6e0 100644 --- a/cachi2/core/package_managers/yarn_classic/workspaces.py +++ b/cachi2/core/package_managers/yarn_classic/workspaces.py @@ -21,3 +21,86 @@ def _ensure_package_is_named(cls, package_contents: dict) -> dict: if "name" not in package_contents: raise ValueError("Workspaces must contain 'name' field.") return package_contents + + +def ensure_no_path_leads_out( + paths: Iterable[Path], + source_dir: RootedPath, +) -> None: + """Ensure no path leads out of source directory. + + Raises an exception when any path is not relative to source directory. + Does nothing when path does not exist in the file system. + """ + for path in paths: + try: + source_dir.join_within_root(path) + except PathOutsideRoot: + raise PackageRejected( + f"Found a workspace path which is not relative to package: {path}", + solution=( + "Avoid using packages which try to access your filesystem " + "outside of package directory." + ), + ) + + +def _ensure_workspaces_are_well_formed( + paths: Iterable[Path], +) -> None: + """Ensure that every workspace contains package.json. + + Reject the package otherwise. + """ + for p in paths: + if not Path(p, "package.json").is_file(): + raise PackageRejected( + reason=f"Workspace {p} does not contain 'package.json'", + solution=None, + ) + + +def _get_workspace_paths( + workspaces_globs: list[str], + source_dir: RootedPath, +) -> Iterable[Path]: + """Resolve globs within source directory.""" + + def all_paths_matching(glob: str) -> Generator[Path, None, None]: + return (pth.resolve() for pth in source_dir.path.glob(glob)) + + return chain.from_iterable(map(all_paths_matching, workspaces_globs)) + + +def _extract_workspaces_globs( + package: dict[str, Any], +) -> list[str]: + """Extract globs from workspaces entry in package dict.""" + workspaces_globs = package.get("workspaces", []) + return workspaces_globs + + +def _read_package_from(path: RootedPath) -> dict[str, Any]: + """Read package.json from a path.""" + return json.loads(path.join_within_root("package.json").path.read_text()) + + +def extract_workspace_metadata( + package: YarnClassicPackageInput, + source_dir: RootedPath, +) -> list[Workspace]: + """Extract workspace metadata from a package.""" + processed_package = _read_package_from(source_dir.join_within_root(package.path)) + workspaces_globs = _extract_workspaces_globs(processed_package) + workspaces_paths = _get_workspace_paths(workspaces_globs, source_dir) + ensure_no_path_leads_out(workspaces_paths, source_dir) + _ensure_workspaces_are_well_formed(workspaces_paths) + parsed_workspaces = [] + for wp in workspaces_paths: + parsed_workspaces.append( + Workspace( + path=wp, + package_contents=_read_package_from(source_dir.join_within_root(wp)), + ) + ) + return parsed_workspaces diff --git a/tests/unit/package_managers/yarn_classic/test_main.py b/tests/unit/package_managers/yarn_classic/test_main.py index fb2ff6918..35fb1e9f3 100644 --- a/tests/unit/package_managers/yarn_classic/test_main.py +++ b/tests/unit/package_managers/yarn_classic/test_main.py @@ -53,7 +53,9 @@ def test_generate_build_environment_variables( @mock.patch("cachi2.core.package_managers.yarn_classic.main._verify_corepack_yarn_version") @mock.patch("cachi2.core.package_managers.yarn_classic.main._get_prefetch_environment_variables") @mock.patch("cachi2.core.package_managers.yarn_classic.main._fetch_dependencies") +@mock.patch("cachi2.core.package_managers.yarn_classic.main.extract_workspace_metadata") def test_fetch_yarn_source( + mock_extract_metadata: mock.Mock, mock_fetch_dependencies: mock.Mock, mock_prefetch_env_vars: mock.Mock, mock_verify_yarn_version: mock.Mock, diff --git a/tests/unit/package_managers/yarn_classic/test_workspaces.py b/tests/unit/package_managers/yarn_classic/test_workspaces.py new file mode 100644 index 000000000..e9aaea70e --- /dev/null +++ b/tests/unit/package_managers/yarn_classic/test_workspaces.py @@ -0,0 +1,74 @@ +from pathlib import Path +from unittest import mock + +import pytest + +from cachi2.core.errors import PackageRejected +from cachi2.core.models.input import YarnClassicPackageInput +from cachi2.core.package_managers.yarn_classic.workspaces import ( + Workspace, + _extract_workspaces_globs, + _get_workspace_paths, + extract_workspace_metadata, +) +from cachi2.core.rooted_path import RootedPath + + +@mock.patch("cachi2.core.package_managers.yarn_classic.workspaces._read_package_from") +@mock.patch("cachi2.core.package_managers.yarn_classic.workspaces._get_workspace_paths") +def test_packages_with_workspaces_outside_source_dir_are_rejected( + mock_get_ws_paths: mock.Mock, + mock_read_package_from: mock.Mock, +) -> None: + package = YarnClassicPackageInput(type="yarn-classic", path=".") + mock_read_package_from.return_value = {"workspaces": ["../../usr"]} + mock_get_ws_paths.return_value = [Path("/tmp/foo/bar"), Path("/usr")] + source_dir = RootedPath("/tmp/foo") + + with pytest.raises(PackageRejected): + extract_workspace_metadata(package, source_dir=source_dir) + + +@mock.patch("cachi2.core.package_managers.yarn_classic.workspaces._read_package_from") +@mock.patch("cachi2.core.package_managers.yarn_classic.workspaces._get_workspace_paths") +@mock.patch( + "cachi2.core.package_managers.yarn_classic.workspaces._ensure_workspaces_are_well_formed" +) +def test_workspaces_could_be_parsed( + mock_workspaces_ok: mock.Mock, + mock_get_ws_paths: mock.Mock, + mock_read_package_from: mock.Mock, +) -> None: + package = YarnClassicPackageInput(type="yarn-classic", path=".") + mock_read_package_from.side_effect = [{"workspaces": ["quux"]}, {"name": "inner_package"}] + mock_get_ws_paths.return_value = [Path("/tmp/foo/bar")] + source_dir = RootedPath("/tmp/foo") + + expected_result = [ + Workspace( + path="/tmp/foo/bar", + package=YarnClassicPackageInput(type="yarn-classic", path=Path("bar")), + package_contents={"name": "inner_package"}, + ), + ] + result = extract_workspace_metadata(package, source_dir=source_dir) + + assert result == expected_result + + +def test_extracting_workspace_globs_works_with_globs_deined_in_list() -> None: + package = {"workspaces": ["foo"]} + + expected = ["foo"] + result = _extract_workspaces_globs(package) + + assert expected == result + + +def test_workspace_paths_could_be_resolved(rooted_tmp_path: RootedPath) -> None: + expected = rooted_tmp_path.path / "foo" + expected.mkdir() + + result = list(_get_workspace_paths(["foo"], rooted_tmp_path)) + + assert result == [expected] From 3296fa5ee240b5b53ad9b73b4102f259150a3b3b Mon Sep 17 00:00:00 2001 From: Alexey Ovchinnikov Date: Thu, 17 Oct 2024 12:02:59 -0500 Subject: [PATCH 3/3] yarn: Expanding globs extraction to nested Arrays It turns out that workspaces could be either Array or a nested Array in an Object thus we must handle both cases. Official docs mentioning the former: https://classic.yarnpkg.com/lang/en/docs/workspaces/ Official blog containing a hint about the latter: https://classic.yarnpkg.com/lang/en/docs/workspaces/ Signed-off-by: Alexey Ovchinnikov --- .../yarn_classic/workspaces.py | 7 ++++++ .../yarn_classic/test_workspaces.py | 22 ++++++++++++++++--- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/cachi2/core/package_managers/yarn_classic/workspaces.py b/cachi2/core/package_managers/yarn_classic/workspaces.py index de1b0c6e0..314bcc82c 100644 --- a/cachi2/core/package_managers/yarn_classic/workspaces.py +++ b/cachi2/core/package_managers/yarn_classic/workspaces.py @@ -76,7 +76,14 @@ def _extract_workspaces_globs( package: dict[str, Any], ) -> list[str]: """Extract globs from workspaces entry in package dict.""" + # This could be an Array or an Array nested in an Object. + # Official docs mentioning the former: + # https://classic.yarnpkg.com/lang/en/docs/workspaces/ + # Official blog containing a hint about the latter: + # https://classic.yarnpkg.com/lang/en/docs/workspaces/ workspaces_globs = package.get("workspaces", []) + if isinstance(workspaces_globs, dict): + workspaces_globs = workspaces_globs.get("packages", []) return workspaces_globs diff --git a/tests/unit/package_managers/yarn_classic/test_workspaces.py b/tests/unit/package_managers/yarn_classic/test_workspaces.py index e9aaea70e..778e357c2 100644 --- a/tests/unit/package_managers/yarn_classic/test_workspaces.py +++ b/tests/unit/package_managers/yarn_classic/test_workspaces.py @@ -56,10 +56,26 @@ def test_workspaces_could_be_parsed( assert result == expected_result -def test_extracting_workspace_globs_works_with_globs_deined_in_list() -> None: - package = {"workspaces": ["foo"]} +@pytest.mark.parametrize( + "package, expected", + [ + pytest.param( + {"workspaces": ["foo"]}, + ["foo"], + id="workspaces_defined_in_an_array", + ), + pytest.param( + {"workspaces": {"packages": ["foo"]}}, + ["foo"], + id="workspaces_defined_in_an_array_within_an_object", + ), + ], +) +def test_extracting_workspace_globs_works_for_all_types_of_workspaces( + package: dict, + expected: list, +) -> None: - expected = ["foo"] result = _extract_workspaces_globs(package) assert expected == result