Skip to content

Commit

Permalink
bundler: Fix for prefetching of dependencies
Browse files Browse the repository at this point in the history
Some dependencies contain architecture-specific components.  Their name
and location differ from standard Ruby-only dependencies. As a result
prior to this commit such dependencies would be skipped during prefetch.
This, in turn, failed hermetic builds because a dependency would end up
missing. Corresponding upstream issue:

#672

This commit fixes the issue and adds e2e tests.

Signed-off-by: Alexey Ovchinnikov <[email protected]>
  • Loading branch information
a-ovchinnikov committed Oct 7, 2024
1 parent 93915e7 commit ca5d99c
Show file tree
Hide file tree
Showing 11 changed files with 317 additions and 14 deletions.
1 change: 1 addition & 0 deletions cachi2/core/models/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class BundlerPackageInput(_PackageInputBase):
"""Accepted input for a bundler package."""

type: Literal["bundler"]
allow_binary: bool = False


class GomodPackageInput(_PackageInputBase):
Expand Down
8 changes: 8 additions & 0 deletions cachi2/core/models/property_semantics.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class PropertySet:
npm_bundled: bool = False
npm_development: bool = False
pip_package_binary: bool = False
bundler_package_binary: bool = False

@classmethod
def from_properties(cls, props: Iterable[Property]) -> "Self":
Expand All @@ -42,6 +43,7 @@ def from_properties(cls, props: Iterable[Property]) -> "Self":
npm_bundled = False
npm_development = False
pip_package_binary = False
bundler_package_binary = False

for prop in props:
if prop.name == "cachi2:found_by":
Expand All @@ -54,6 +56,8 @@ def from_properties(cls, props: Iterable[Property]) -> "Self":
npm_development = True
elif prop.name == "cachi2:pip:package:binary":
pip_package_binary = True
elif prop.name == "cachi2:bundler:package:binary":
bundler_package_binary = True
else:
assert_never(prop.name)

Expand All @@ -63,6 +67,7 @@ def from_properties(cls, props: Iterable[Property]) -> "Self":
npm_bundled,
npm_development,
pip_package_binary,
bundler_package_binary,
)

def to_properties(self) -> list[Property]:
Expand All @@ -80,6 +85,8 @@ def to_properties(self) -> list[Property]:
props.append(Property(name="cdx:npm:package:development", value="true"))
if self.pip_package_binary:
props.append(Property(name="cachi2:pip:package:binary", value="true"))
if self.bundler_package_binary:
props.append(Property(name="cachi2:bundler:package:binary", value="true"))

return sorted(props, key=lambda p: (p.name, p.value))

Expand All @@ -92,4 +99,5 @@ def merge(self, other: "Self") -> "Self":
npm_bundled=self.npm_bundled and other.npm_bundled,
npm_development=self.npm_development and other.npm_development,
pip_package_binary=self.pip_package_binary or other.pip_package_binary,
bundler_package_binary=self.bundler_package_binary or other.bundler_package_binary,
)
1 change: 1 addition & 0 deletions cachi2/core/models/sbom.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cachi2.core.models.validators import unique_sorted

PropertyName = Literal[
"cachi2:bundler:package:binary",
"cachi2:found_by",
"cachi2:missing_hash:in_file",
"cachi2:pip:package:binary",
Expand Down
37 changes: 30 additions & 7 deletions cachi2/core/package_managers/bundler/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,21 @@
import os
from pathlib import Path
from textwrap import dedent
from typing import Optional
from typing import Optional, cast

from packageurl import PackageURL

from cachi2.core.errors import PackageRejected, UnsupportedFeature
from cachi2.core.models.input import Request
from cachi2.core.models.input import BundlerPackageInput, Request
from cachi2.core.models.output import EnvironmentVariable, ProjectFile, RequestOutput
from cachi2.core.models.property_semantics import PropertySet
from cachi2.core.models.sbom import Component
from cachi2.core.package_managers.bundler.parser import ParseResult, PathDependency, parse_lockfile
from cachi2.core.package_managers.bundler.parser import (
GemPlatformSpecificDependency,
ParseResult,
PathDependency,
parse_lockfile,
)
from cachi2.core.rooted_path import RootedPath
from cachi2.core.scm import get_repo_id

Expand All @@ -30,7 +36,11 @@ def fetch_bundler_source(request: Request) -> RequestOutput:
for package in request.packages:
path_within_root = request.source_dir.join_within_root(package.path)
components.extend(
_resolve_bundler_package(package_dir=path_within_root, output_dir=request.output_dir)
_resolve_bundler_package(
package_dir=path_within_root,
output_dir=request.output_dir,
allow_binary=cast(BundlerPackageInput, package).allow_binary,
)
)
project_files.append(_prepare_for_hermetic_build(request.source_dir, request.output_dir))

Expand All @@ -41,11 +51,15 @@ def fetch_bundler_source(request: Request) -> RequestOutput:
)


def _resolve_bundler_package(package_dir: RootedPath, output_dir: RootedPath) -> list[Component]:
def _resolve_bundler_package(
package_dir: RootedPath,
output_dir: RootedPath,
allow_binary: bool = False,
) -> list[Component]:
"""Process a request for a single bundler package."""
deps_dir = output_dir.join_within_root("deps", "bundler")
deps_dir.path.mkdir(parents=True, exist_ok=True)
dependencies = parse_lockfile(package_dir)
dependencies = parse_lockfile(package_dir, allow_binary)

name, version = _get_main_package_name_and_version(package_dir, dependencies)
vcs_url = get_repo_id(package_dir.root).as_vcs_url_qualifier()
Expand All @@ -60,7 +74,16 @@ def _resolve_bundler_package(package_dir: RootedPath, output_dir: RootedPath) ->
components = [Component(name=name, version=version, purl=main_package_purl.to_string())]
for dep in dependencies:
dep.download_to(deps_dir)
components.append(Component(name=dep.name, version=dep.version, purl=dep.purl))
if isinstance(dep, GemPlatformSpecificDependency):
c = Component(
name=dep.name,
version=dep.version,
purl=dep.purl,
properties=PropertySet(bundler_package_binary=True).to_properties(),
)
else:
c = Component(name=dep.name, version=dep.version, purl=dep.purl)
components.append(c)

return components

Expand Down
50 changes: 47 additions & 3 deletions cachi2/core/package_managers/bundler/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,32 @@ def download_to(self, deps_dir: RootedPath) -> None:
download_binary_file(self.remote_location, fs_location)


class GemPlatformSpecificDependency(GemDependency):
"""
Represents a gem dependency built for a specific platform.
Attributes:
platform: Platform for which the dependency was built.
"""

platform: str

@cached_property
def remote_location(self) -> str:
"""Return remote location to download this gem from."""
return f"{self.source}/downloads/{self.name}-{self.version}-{self.platform}.gem"

def download_to(self, deps_dir: RootedPath) -> None:
"""Download represented gem to specified file system location."""
fs_location = deps_dir.join_within_root(
Path(f"{self.name}-{self.version}-{self.platform}.gem")
)
log.info(
"Downloading platform-specific gem %s-%s-%s", self.name, self.version, self.platform
)
download_binary_file(self.remote_location, fs_location)


class GitDependency(_GemMetadata):
"""
Represents a git dependency.
Expand Down Expand Up @@ -162,11 +188,13 @@ def purl(self) -> str:
return purl.to_string()


BundlerDependency = Union[GemDependency, GitDependency, PathDependency]
BundlerDependency = Union[
GemDependency, GemPlatformSpecificDependency, GitDependency, PathDependency
]
ParseResult = list[BundlerDependency]


def parse_lockfile(package_dir: RootedPath) -> ParseResult:
def parse_lockfile(package_dir: RootedPath, allow_binary: bool = False) -> ParseResult:
"""Parse a Gemfile.lock file and return a list of dependencies."""
lockfile_path = package_dir.join_within_root(GEMFILE_LOCK)
gemfile_path = package_dir.join_within_root(GEMFILE)
Expand Down Expand Up @@ -194,7 +222,23 @@ def parse_lockfile(package_dir: RootedPath) -> ParseResult:
result: ParseResult = []
for dep in dependencies:
if dep["type"] == "rubygems":
result.append(GemDependency(**dep))
if f'{dep["name"]}-{dep["version"]}' != dep["full_name"]:
log.warning("Found a binary dependency %s", dep["full_name"])
if allow_binary:
log.warning(
"Downloading binary dependency %s because 'allow_binary' is set to True",
dep["full_name"],
)
result.append(GemPlatformSpecificDependency(**dep))
else:
# No need to force a platform if we skip the packages.
log.warning(
"Skipping binary dependency %s because 'allow_binary' is set to False."
" This will likely result in an unbuildable package.",
dep["full_name"],
)
else:
result.append(GemDependency(**dep))
elif dep["type"] == "git":
result.append(GitDependency(**dep))
elif dep["type"] == "path":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
lockfile_parser.specs.each do |spec|
parsed_spec = {
name: spec.name,
version: spec.version.to_s
version: spec.version.to_s,
full_name: spec.full_name,
platform: spec.platform.to_s
}

case spec.source
Expand Down
73 changes: 73 additions & 0 deletions tests/integration/test_bundler.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,3 +100,76 @@ def test_bundler_packages(
utils.fetch_deps_and_check_output(
tmp_path, test_case, test_params, source_folder, test_data_dir, cachi2_image
)


@pytest.mark.parametrize(
"test_params,check_cmd,expected_cmd_output",
[
pytest.param(
utils.TestParameters(
repo="https://github.com/cachito-testing/cachi2-bundler.git",
ref="well_formed_ruby_all_features",
packages=({"path": ".", "type": "bundler", "allow_binary": "true"},),
flags=["--dev-package-managers"],
check_output=False,
check_deps_checksums=False,
check_vendor_checksums=False,
expected_exit_code=0,
expected_output="",
),
[], # No additional commands are run to verify the build
[],
id="bundler_everything_present",
),
pytest.param(
utils.TestParameters(
repo="https://github.com/cachito-testing/cachi2-bundler.git",
ref="well_formed_ruby_without_gemspec",
packages=({"path": ".", "type": "bundler", "allow_binary": "true"},),
flags=["--dev-package-managers"],
check_output=False,
check_deps_checksums=False,
check_vendor_checksums=False,
expected_exit_code=0,
expected_output="",
),
[], # No additional commands are run to verify the build
[],
id="bundler_everything_present_except_gemspec",
),
],
)
def test_e2e_bundler(
test_params: utils.TestParameters,
check_cmd: list[str],
expected_cmd_output: str,
cachi2_image: utils.ContainerImage,
tmp_path: Path,
test_data_dir: Path,
request: pytest.FixtureRequest,
) -> None:
"""
End to end test for bundler.
:param test_params: Test case arguments
:param tmp_path: Temp directory for pytest
"""
test_case = request.node.callspec.id

source_folder = utils.clone_repository(
test_params.repo, test_params.ref, f"{test_case}-source", tmp_path
)

output_folder = utils.fetch_deps_and_check_output(
tmp_path, test_case, test_params, source_folder, test_data_dir, cachi2_image
)

utils.build_image_and_check_cmd(
tmp_path,
output_folder,
test_data_dir,
test_case,
check_cmd,
expected_cmd_output,
cachi2_image,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
FROM docker.io/ruby:3.3

# Test disabled network access
RUN if curl -IsS www.google.com; then echo "Has network access!"; exit 1; fi

# Print cachi2 env vars file
RUN cat /tmp/cachi2.env

# Check bundler deps
RUN ls /tmp/bundler_everything_present-output/deps/bundler

# Check content of source repository folder
RUN ls /tmp/bundler_everything_present-source/

# This should be a COPY, but the source code and Containerfile are in different directories
RUN cp -r /tmp/bundler_everything_present-source /src

WORKDIR /src
# Bundler would try and install whichever version was used to generate
# This will cause bundler to attempt to download an earlier version even if
# just microversions diverged. This, int turn, would cause a build to fail.
# Running 'bundle _<version>_ install' is supposed to attempt to run installation
# with this specific version. The extra code below ensures that any present
# version is used:
RUN . /tmp/cachi2.env && bundle --version | cut -d ' ' -f3- | xargs -I {} bundle _{}_ install
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
FROM docker.io/ruby:3.3

# Test disabled network access
RUN if curl -IsS www.google.com; then echo "Has network access!"; exit 1; fi

# Print cachi2 env vars file
RUN cat /tmp/cachi2.env

# Check bundler deps
RUN ls /tmp/bundler_everything_present_except_gemspec-output/deps/bundler

# Check content of source repository folder
RUN ls /tmp/bundler_everything_present_except_gemspec-source/

# This should be a COPY, but the source code and Containerfile are in different directories
RUN cp -r /tmp/bundler_everything_present_except_gemspec-source /src

WORKDIR /src
# Bundler would try and install whichever version was used to generate
# This will cause bundler to attempt to download an earlier version even if
# just microversions diverged. This, int turn, would cause a build to fail.
# Running 'bundle _<version>_ install' is supposed to attempt to run installation
# with this specific version. The extra code below ensures that any present
# version is used:
RUN . /tmp/cachi2.env && bundle --version | cut -d ' ' -f3- | xargs -I {} bundle _{}_ install
2 changes: 1 addition & 1 deletion tests/unit/package_managers/bundler/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_resolve_bundler_package(

components = _resolve_bundler_package(package_dir=package_dir, output_dir=output_dir)

mock_parse_lockfile.assert_called_once_with(package_dir)
mock_parse_lockfile.assert_called_once_with(package_dir, False)
mock_get_main_package_name_and_version.assert_called_once_with(package_dir, deps)
mock_gem_dep_download_to.assert_called_with(deps_dir)
mock_git_dep_download_to.assert_called_with(deps_dir)
Expand Down
Loading

0 comments on commit ca5d99c

Please sign in to comment.