From 9415acc1845f87c50af48ac5b1e0343d9cdf9376 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 30 May 2023 09:44:57 +1000 Subject: [PATCH 01/29] chore(deps): add problog dependency to pyproject.toml Signed-off-by: sophie-bates --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 1d697f32e..14735b4bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,6 +29,7 @@ dependencies = [ "jinja2 >=3.1.2,<4.0.0", "SQLAlchemy >=2.0.0,<3.0.0", "defusedxml >=0.7.1,<1.0.0", + "problog >=2.2.4,<3.0.0" ] keywords = [] # https://pypi.org/classifiers/ From 8cc866d1733745f05aed0a4facfd70bb8a559ec7 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Wed, 31 May 2023 16:56:33 +1000 Subject: [PATCH 02/29] feat: split build_as_code_check into subchecks and aggregate certainty values using ProbLog. Signed-off-by: sophie-bates --- pyproject.toml | 1 + src/macaron/slsa_analyzer/checks/bac_.py | 18 + .../slsa_analyzer/checks/base_check.py | 1 + .../checks/build_as_code_check.py | 516 +++++++++++------- .../slsa_analyzer/checks/check_result.py | 1 + .../checks/problog_predicates.py | 55 ++ src/macaron/slsa_analyzer/registry.py | 1 + 7 files changed, 396 insertions(+), 197 deletions(-) create mode 100644 src/macaron/slsa_analyzer/checks/bac_.py create mode 100644 src/macaron/slsa_analyzer/checks/problog_predicates.py diff --git a/pyproject.toml b/pyproject.toml index 14735b4bc..529587b4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -188,6 +188,7 @@ module = [ "gitdb.*", "yamale.*", "defusedxml.*", + "problog.*" ] ignore_missing_imports = true diff --git a/src/macaron/slsa_analyzer/checks/bac_.py b/src/macaron/slsa_analyzer/checks/bac_.py new file mode 100644 index 000000000..6a993dcf0 --- /dev/null +++ b/src/macaron/slsa_analyzer/checks/bac_.py @@ -0,0 +1,18 @@ +# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains a class to store results from the BuildAsCodeCheck subchecks.""" +from attr import dataclass + + +@dataclass +class BuildAsCodeSubchecks: + """Dataclass for storing the results from the BuildAsCodeCheck subchecks.""" + + ci_parsed: float + deploy_action: float + deploy_command: float + deploy_kws: float + + +build_as_code_subchecks: BuildAsCodeSubchecks = None # type: ignore # pylint: disable=invalid-name diff --git a/src/macaron/slsa_analyzer/checks/base_check.py b/src/macaron/slsa_analyzer/checks/base_check.py index 432a61158..8b12d2005 100644 --- a/src/macaron/slsa_analyzer/checks/base_check.py +++ b/src/macaron/slsa_analyzer/checks/base_check.py @@ -86,6 +86,7 @@ def run(self, target: AnalyzeContext, skipped_info: Optional[SkippedInfo] = None justification=[], result_type=CheckResultType.SKIPPED, result_tables=[], + confidence_score=0, ) if skipped_info: diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index e25768049..7a00c27ee 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -6,17 +6,21 @@ import logging import os +from problog import get_evaluatable +from problog.program import PrologString from sqlalchemy.orm import Mapped, mapped_column -from sqlalchemy.sql.sqltypes import String +from sqlalchemy.sql.sqltypes import Float, String from macaron.config.defaults import defaults from macaron.database.database_manager import ORMBase from macaron.database.table_definitions import CheckFactsTable from macaron.slsa_analyzer.analyze_context import AnalyzeContext from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, NoneBuildTool +from macaron.slsa_analyzer.build_tool.pip import Pip +from macaron.slsa_analyzer.checks import bac_ from macaron.slsa_analyzer.checks.base_check import BaseCheck from macaron.slsa_analyzer.checks.check_result import CheckResult, CheckResultType -from macaron.slsa_analyzer.ci_service.base_ci_service import NoneCIService +from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService, NoneCIService from macaron.slsa_analyzer.ci_service.circleci import CircleCI from macaron.slsa_analyzer.ci_service.github_actions import GHWorkflowType from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI @@ -24,6 +28,7 @@ from macaron.slsa_analyzer.ci_service.travis import Travis from macaron.slsa_analyzer.registry import registry from macaron.slsa_analyzer.slsa_req import ReqName +from macaron.slsa_analyzer.specs.ci_spec import CIInfo logger: logging.Logger = logging.getLogger(__name__) @@ -37,6 +42,197 @@ class BuildAsCodeTable(CheckFactsTable, ORMBase): build_trigger: Mapped[str] = mapped_column(String, nullable=True) deploy_command: Mapped[str] = mapped_column(String, nullable=True) build_status_url: Mapped[str] = mapped_column(String, nullable=True) + confidence_score: Mapped[float] = mapped_column(Float, nullable=True) + + +def has_deploy_command(commands: list[list[str]], build_tool: BaseBuildTool) -> str: + """Check if the bash command is a build and deploy command.""" + # Account for Python projects having separate tools for packaging and publishing. + deploy_tool = build_tool.publisher if build_tool.publisher else build_tool.builder + for com in commands: + + # Check for empty or invalid commands. + if not com or not com[0]: + continue + # The first argument in a bash command is the program name. + # So first check that the program name is a supported build tool name. + # We need to handle cases where the first argument is a path to the program. + cmd_program_name = os.path.basename(com[0]) + if not cmd_program_name: + logger.debug("Found invalid program name %s.", com[0]) + continue + + check_build_commands = any(build_cmd for build_cmd in deploy_tool if build_cmd == cmd_program_name) + + # Support the use of interpreters like Python that load modules, i.e., 'python -m pip install'. + check_module_build_commands = any( + interpreter == cmd_program_name + and com[1] + and com[1] in build_tool.interpreter_flag + and com[2] + and com[2] in deploy_tool + for interpreter in build_tool.interpreter + ) + prog_name_index = 2 if check_module_build_commands else 0 + + if check_build_commands or check_module_build_commands: + # Check the arguments in the bash command for the deploy goals. + # If there are no deploy args for this build tool, accept as deploy command. + if not build_tool.deploy_arg: + logger.info("No deploy arguments required. Accept %s as deploy command.", str(com)) + return str(com) + + for word in com[(prog_name_index + 1) :]: + # TODO: allow plugin versions in arguments, e.g., maven-plugin:1.6.8:deploy. + if word in build_tool.deploy_arg: + logger.info("Found deploy command %s.", str(com)) + return str(com) + return "" + + +def ci_parsed_subcheck(ci_info: CIInfo) -> dict: + """Check whether parsing is supported for this CI service's CI config files.""" + check_certainty = 1 + + justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."] + + if ci_info["bash_commands"]: + return {"certainty": check_certainty, "justification": justification} + return {"certainty": 0, "justification": [{"The CI workflow files for this CI service aren't parsed."}]} + + +def deploy_action_subcheck( + ctx: AnalyzeContext, ci_info: CIInfo, ci_service: BaseCIService, build_tool: BaseBuildTool +) -> dict: + """Check for use of a trusted Github Actions workflow to publish/deploy.""" + # TODO: verify that deployment is legitimate and not a test + check_certainty = 0.8 + + if isinstance(build_tool, Pip): + trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[]) + + for callee in ci_info["callgraph"].bfs(): + workflow_name = callee.name.split("@")[0] + + if not workflow_name or callee.node_type not in [ + GHWorkflowType.EXTERNAL, + GHWorkflowType.REUSABLE, + ]: + logger.debug("Workflow %s is not relevant. Skipping...", callee.name) + continue + if workflow_name in trusted_deploy_actions: + trigger_link = ci_service.api_client.get_file_link( + ctx.repo_full_name, + ctx.commit_sha, + ci_service.api_client.get_relative_path_of_workflow(os.path.basename(callee.caller_path)), + ) + deploy_action_source_link = ci_service.api_client.get_file_link( + ctx.repo_full_name, ctx.commit_sha, callee.caller_path + ) + + html_url = ci_service.has_latest_run_passed( + ctx.repo_full_name, + ctx.branch_name, + ctx.commit_sha, + ctx.commit_date, + os.path.basename(callee.caller_path), + ) + + # TODO: include in the justification multiple cases of external action usage + justification: list[str | dict[str, str]] = [ + { + "To deploy": deploy_action_source_link, + "The build is triggered by": trigger_link, + }, + f"Deploy action: {workflow_name}", + {"The status of the build can be seen at": html_url} + if html_url + else "However, could not find a passing workflow run.", + ] + + return { + "certainty": check_certainty, + "justification": justification, + "deploy_command": workflow_name, + "trigger_link": trigger_link, + "deploy_action_source_link": deploy_action_source_link, + "html_url": html_url, + } + + return {"certainty": 0, "justification": []} + + +def deploy_command_subcheck( + ctx: AnalyzeContext, ci_info: CIInfo, ci_service: BaseCIService, build_tool: BaseBuildTool +) -> dict: + """Check for the use of deploy command to deploy.""" + check_certainty = 0.7 + for bash_cmd in ci_info["bash_commands"]: + deploy_cmd = has_deploy_command(bash_cmd["commands"], build_tool) + if deploy_cmd: + # Get the permalink and HTML hyperlink tag of the CI file that triggered the bash command. + trigger_link = ci_service.api_client.get_file_link( + ctx.repo_full_name, + ctx.commit_sha, + ci_service.api_client.get_relative_path_of_workflow(os.path.basename(bash_cmd["CI_path"])), + ) + # Get the permalink of the source file of the bash command. + bash_source_link = ci_service.api_client.get_file_link( + ctx.repo_full_name, ctx.commit_sha, bash_cmd["caller_path"] + ) + + html_url = ci_service.has_latest_run_passed( + ctx.repo_full_name, + ctx.branch_name, + ctx.commit_sha, + ctx.commit_date, + os.path.basename(bash_cmd["CI_path"]), + ) + + justification: list[str | dict[str, str]] = [ + { + f"The target repository uses build tool {build_tool.name} to deploy": bash_source_link, + "The build is triggered by": trigger_link, + }, + f"Deploy command: {deploy_cmd}", + {"The status of the build can be seen at": html_url} + if html_url + else "However, could not find a passing workflow run.", + ] + return { + "certainty": check_certainty, + "justification": justification, + "deploy_cmd": deploy_cmd, + "trigger_link": trigger_link, + "bash_source_link": bash_source_link, + "html_url": html_url, + } + return {"certainty": 0, "justification": ""} + + +def deploy_kws_subcheck(ctx: AnalyzeContext, ci_service: BaseCIService, build_tool: BaseBuildTool) -> dict: + """Check for the use of deploy keywords to deploy.""" + check_certainty = 0.6 + # We currently don't parse these CI configuration files. + # We just look for a keyword for now. + for unparsed_ci in (Jenkins, Travis, CircleCI, GitLabCI): + if isinstance(ci_service, unparsed_ci): + if build_tool.ci_deploy_kws[ci_service.name]: + deploy_kw, config_name = ci_service.has_kws_in_config( + build_tool.ci_deploy_kws[ci_service.name], repo_path=ctx.repo_path + ) + if not config_name: + return {"certainty": 0, "justification": ""} + + justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."] + + return { + "certainty": check_certainty, + "justification": justification, + "deploy_kw": deploy_kw, + "config_name": config_name, + } + return {"certainty": 0, "justification": []} class BuildAsCodeCheck(BaseCheck): @@ -56,6 +252,8 @@ def __init__(self) -> None: ("mcn_trusted_builder_level_three_1", CheckResultType.FAILED), ] eval_reqs = [ReqName.BUILD_AS_CODE] + self.confidence_score_threshold = 0.3 + super().__init__( check_id="mcn_build_as_code_1", description=description, @@ -64,50 +262,6 @@ def __init__(self) -> None: result_on_skip=CheckResultType.PASSED, ) - def _has_deploy_command(self, commands: list[list[str]], build_tool: BaseBuildTool) -> str: - """Check if the bash command is a build and deploy command.""" - # Account for Python projects having separate tools for packaging and publishing. - deploy_tool = build_tool.publisher if build_tool.publisher else build_tool.builder - for com in commands: - - # Check for empty or invalid commands. - if not com or not com[0]: - continue - # The first argument in a bash command is the program name. - # So first check that the program name is a supported build tool name. - # We need to handle cases where the first argument is a path to the program. - cmd_program_name = os.path.basename(com[0]) - if not cmd_program_name: - logger.debug("Found invalid program name %s.", com[0]) - continue - - check_build_commands = any(build_cmd for build_cmd in deploy_tool if build_cmd == cmd_program_name) - - # Support the use of interpreters like Python that load modules, i.e., 'python -m pip install'. - check_module_build_commands = any( - interpreter == cmd_program_name - and com[1] - and com[1] in build_tool.interpreter_flag - and com[2] - and com[2] in deploy_tool - for interpreter in build_tool.interpreter - ) - prog_name_index = 2 if check_module_build_commands else 0 - - if check_build_commands or check_module_build_commands: - # Check the arguments in the bash command for the deploy goals. - # If there are no deploy args for this build tool, accept as deploy command. - if not build_tool.deploy_arg: - logger.info("No deploy arguments required. Accept %s as deploy command.", str(com)) - return str(com) - - for word in com[(prog_name_index + 1) :]: - # TODO: allow plugin versions in arguments, e.g., maven-plugin:1.6.8:deploy. - if word in build_tool.deploy_arg: - logger.info("Found deploy command %s.", str(com)) - return str(com) - return "" - def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResultType: """Implement the check in this method. @@ -130,164 +284,132 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu # Checking if a build tool is discovered for this repo. if build_tool and not isinstance(build_tool, NoneBuildTool): for ci_info in ci_services: + ci_service = ci_info["service"] # Checking if a CI service is discovered for this repo. if isinstance(ci_service, NoneCIService): continue - trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[]) - - # Check for use of a trusted Github Actions workflow to publish/deploy. - # TODO: verify that deployment is legitimate and not a test - if trusted_deploy_actions: - for callee in ci_info["callgraph"].bfs(): - workflow_name = callee.name.split("@")[0] - - if not workflow_name or callee.node_type not in [ - GHWorkflowType.EXTERNAL, - GHWorkflowType.REUSABLE, - ]: - logger.debug("Workflow %s is not relevant. Skipping...", callee.name) - continue - if workflow_name in trusted_deploy_actions: - trigger_link = ci_service.api_client.get_file_link( - ctx.repo_full_name, - ctx.commit_sha, - ci_service.api_client.get_relative_path_of_workflow( - os.path.basename(callee.caller_path) - ), - ) - deploy_action_source_link = ci_service.api_client.get_file_link( - ctx.repo_full_name, ctx.commit_sha, callee.caller_path - ) - - html_url = ci_service.has_latest_run_passed( - ctx.repo_full_name, - ctx.branch_name, - ctx.commit_sha, - ctx.commit_date, - os.path.basename(callee.caller_path), - ) - - # TODO: include in the justification multiple cases of external action usage - justification: list[str | dict[str, str]] = [ - { - f"The target repository uses build tool {build_tool.name}" - " to deploy": deploy_action_source_link, - "The build is triggered by": trigger_link, - }, - f"Deploy action: {workflow_name}", - {"The status of the build can be seen at": html_url} - if html_url - else "However, could not find a passing workflow run.", - ] - check_result["justification"].extend(justification) - if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: - predicate = ci_info["provenances"][0]["predicate"] - predicate["buildType"] = f"Custom {ci_service.name}" - predicate["builder"]["id"] = deploy_action_source_link - predicate["invocation"]["configSource"][ - "uri" - ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}" - predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha - predicate["invocation"]["configSource"]["entryPoint"] = trigger_link - predicate["metadata"]["buildInvocationId"] = html_url - check_result["result_tables"] = [ - BuildAsCodeTable( - build_tool_name=build_tool.name, - ci_service_name=ci_service.name, - build_trigger=trigger_link, - deploy_command=workflow_name, - build_status_url=html_url, - ) - ] - return CheckResultType.PASSED - - for bash_cmd in ci_info["bash_commands"]: - deploy_cmd = self._has_deploy_command(bash_cmd["commands"], build_tool) - if deploy_cmd: - # Get the permalink and HTML hyperlink tag of the CI file that triggered the bash command. - trigger_link = ci_service.api_client.get_file_link( - ctx.repo_full_name, - ctx.commit_sha, - ci_service.api_client.get_relative_path_of_workflow(os.path.basename(bash_cmd["CI_path"])), - ) - # Get the permalink of the source file of the bash command. - bash_source_link = ci_service.api_client.get_file_link( - ctx.repo_full_name, ctx.commit_sha, bash_cmd["caller_path"] - ) - - html_url = ci_service.has_latest_run_passed( - ctx.repo_full_name, - ctx.branch_name, - ctx.commit_sha, - ctx.commit_date, - os.path.basename(bash_cmd["CI_path"]), - ) - - justification_cmd: list[str | dict[str, str]] = [ - { - f"The target repository uses build tool {build_tool.name} to deploy": bash_source_link, - "The build is triggered by": trigger_link, - }, - f"Deploy command: {deploy_cmd}", - {"The status of the build can be seen at": html_url} - if html_url - else "However, could not find a passing workflow run.", - ] - check_result["justification"].extend(justification_cmd) - if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: - predicate = ci_info["provenances"][0]["predicate"] - predicate["buildType"] = f"Custom {ci_service.name}" - predicate["builder"]["id"] = bash_source_link - predicate["invocation"]["configSource"][ - "uri" - ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}" - predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha + # Run subchecks + ci_parsed = ci_parsed_subcheck(ci_info) + deploy_action = deploy_action_subcheck( + ctx=ctx, ci_info=ci_info, ci_service=ci_service, build_tool=build_tool + ) + deploy_command = deploy_command_subcheck( + ctx=ctx, ci_info=ci_info, ci_service=ci_service, build_tool=build_tool + ) + deploy_kws = deploy_kws_subcheck(ctx=ctx, ci_service=ci_service, build_tool=build_tool) + + # Compile justifications from subchecks + for subcheck in [ci_parsed, deploy_action, deploy_command, deploy_kws]: + check_result["justification"].extend(subcheck["justification"]) + + deploy_source_link = deploy_cmd = html_url = trigger_link = "" + + # TODO: do we want to populate this information regardless of whether the check passes or not? + if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: + + if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: + predicate = ci_info["provenances"][0]["predicate"] + predicate["buildType"] = f"Custom {ci_service.name}" + predicate["invocation"]["configSource"][ + "uri" + ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}" + predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha + + # TODO: Change this. Need a better method for deciding which of the values to store. + # Could decide based on preliminary queries in the prolog string. + if deploy_action["certainty"]: + deploy_source_link = deploy_action["deploy_action_source_link"] + deploy_cmd = deploy_action["deploy_command"] + html_url = deploy_action["html_url"] + trigger_link = deploy_action["trigger_link"] + predicate["metadata"]["buildInvocationId"] = html_url predicate["invocation"]["configSource"]["entryPoint"] = trigger_link + predicate["builder"]["id"] = deploy_source_link + elif deploy_command["certainty"]: + deploy_source_link = deploy_command["deploy_action_source_link"] + deploy_cmd = deploy_command["deploy_command"] + html_url = deploy_command["html_url"] predicate["metadata"]["buildInvocationId"] = html_url - check_result["result_tables"] = [ - BuildAsCodeTable( - build_tool_name=build_tool.name, - ci_service_name=ci_service.name, - build_trigger=trigger_link, - deploy_command=deploy_cmd, - build_status_url=html_url, - ) - ] - return CheckResultType.PASSED - - # We currently don't parse these CI configuration files. - # We just look for a keyword for now. - for unparsed_ci in (Jenkins, Travis, CircleCI, GitLabCI): - if isinstance(ci_service, unparsed_ci): - if build_tool.ci_deploy_kws[ci_service.name]: - deploy_kw, config_name = ci_service.has_kws_in_config( - build_tool.ci_deploy_kws[ci_service.name], repo_path=ctx.repo_path - ) - if not config_name: - break - check_result["justification"].append( - f"The target repository uses build tool {build_tool.name}" - + f" in {ci_service.name} using {deploy_kw} to deploy." - ) - if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: - predicate = ci_info["provenances"][0]["predicate"] - predicate["buildType"] = f"Custom {ci_service.name}" - predicate["builder"]["id"] = config_name - predicate["invocation"]["configSource"][ - "uri" - ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}" - predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha - predicate["invocation"]["configSource"]["entryPoint"] = config_name - check_result["result_tables"] = [ - BuildAsCodeTable( - build_tool_name=build_tool.name, - ci_service_name=ci_service.name, - deploy_command=deploy_kw, - ) - ] - return CheckResultType.PASSED + predicate["invocation"]["configSource"]["entryPoint"] = trigger_link + predicate["builder"]["id"] = deploy_source_link + elif deploy_kws["certainty"]: + deploy_cmd = deploy_kws["config_name"] + predicate["builder"]["id"] = deploy_command + predicate["invocation"]["configSource"]["entryPoint"] = deploy_command + + # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores. + # TODO: just decide on one deploy method to pass to the database. + + # Populate the BuildAsCodeSubchecks object with the certainty results from subchecks. + bac_.build_as_code_subchecks = bac_.BuildAsCodeSubchecks( + ci_parsed=ci_parsed["certainty"], + deploy_action=deploy_action["certainty"], + deploy_command=deploy_command["certainty"], + deploy_kws=deploy_kws["certainty"], + ) + + prolog_string = PrologString( + """ + :- use_module('src/macaron/slsa_analyzer/checks/problog_predicates.py'). + + A :: ci_parsed :- ci_parsed_check(A). + B :: deploy_action :- deploy_action_check(B). + C :: deploy_command :- deploy_command_check(C). + D :: deploy_kws :- deploy_kws_check(D). + + 0.80 :: deploy_action_certainty :- deploy_action. + 0.15 :: deploy_action_certainty :- deploy_action, ci_parsed. + + 0.70 :: deploy_command_certainty :- deploy_command. + 0.15 :: deploy_command_certainty :- deploy_command, ci_parsed. + + 0.60 :: deploy_kws_certainty :- deploy_kws. + + build_as_code_check :- deploy_action_certainty; deploy_command_certainty; deploy_kws_certainty. + + query(build_as_code_check). + """ + ) + + # TODO: query each of the methods, and take the values from the one with the highest confidence. + confidence_score = 0.0 + result = get_evaluatable().create_from(prolog_string).evaluate() + for key, value in result.items(): + if str(key) == "build_as_code_check": + confidence_score = float(value) + # logger.info("%s : %s", key, value) + results = vars(bac_.build_as_code_subchecks) + + # TODO: Ideas: + # - Query the intermediate checks to construct the check_result table for the highest + # confidence score? + # - Can we find the evidence that contributes the most to this check to output the confidence + # scores for it, and populate the check_result table. + # - Print intermediate proofs? + + check_result["confidence_score"] = confidence_score + + subcheck_results: list[str | dict[str, str]] = [results] + check_result["justification"].extend(subcheck_results) + + # TODO: Return subcheck certainties + check_result["result_tables"] = [ + BuildAsCodeTable( + build_tool_name=build_tool.name, + ci_service_name=ci_service.name, + build_trigger=trigger_link, + deploy_command=deploy_cmd, + build_status_url=html_url, + confidence_score=confidence_score, + ) + ] + + # Check whether the confidence score is greater than the minimum threshold for this check. + if confidence_score >= self.confidence_score_threshold: + logger.info("The certainty of this check passing is: %s", confidence_score) + return CheckResultType.PASSED pass_msg = f"The target repository does not use {build_tool.name} to deploy." check_result["justification"].append(pass_msg) diff --git a/src/macaron/slsa_analyzer/checks/check_result.py b/src/macaron/slsa_analyzer/checks/check_result.py index ab5531e01..3397dedb3 100644 --- a/src/macaron/slsa_analyzer/checks/check_result.py +++ b/src/macaron/slsa_analyzer/checks/check_result.py @@ -40,6 +40,7 @@ class CheckResult(TypedDict): result_tables: list[DeclarativeBase | Table] # recommendation: str result_type: CheckResultType + confidence_score: float class SkippedInfo(TypedDict): diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py new file mode 100644 index 000000000..3dcce24b9 --- /dev/null +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -0,0 +1,55 @@ +# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Contains ProbLog predicates that return the results stored in the BuildAsCodeSubchecks dataclass.""" +from problog.extern import problog_export + +from macaron.slsa_analyzer.checks.bac_ import build_as_code_subchecks + + +@problog_export("-int") # type: ignore +def ci_parsed_check() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + return build_as_code_subchecks.ci_parsed + + +@problog_export("-int") # type: ignore +def deploy_action_check() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + return build_as_code_subchecks.deploy_action + + +@problog_export("-int") # type: ignore +def deploy_command_check() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + return build_as_code_subchecks.deploy_command + + +@problog_export("-int") # type: ignore +def deploy_kws_check() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + return build_as_code_subchecks.deploy_kws diff --git a/src/macaron/slsa_analyzer/registry.py b/src/macaron/slsa_analyzer/registry.py index 9fd3b487e..b98de6136 100644 --- a/src/macaron/slsa_analyzer/registry.py +++ b/src/macaron/slsa_analyzer/registry.py @@ -375,6 +375,7 @@ def scan(self, target: AnalyzeContext, skipped_checks: list[SkippedInfo]) -> dic justification=[message], result_type=CheckResultType.UNKNOWN, result_tables=[], + confidence_score=0, ) graph.done(check_id) else: From 6bfaa0c817a95857a1184483890bbbdc2484f99b Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Wed, 7 Jun 2023 12:08:07 +1000 Subject: [PATCH 03/29] refactor: specify build as code subcheck dependencies and invoke through problog inference Signed-off-by: sophie-bates --- src/macaron/slsa_analyzer/checks/bac_.py | 18 - .../checks/build_as_code_check.py | 324 +++--------------- .../checks/build_as_code_subchecks.py | 248 ++++++++++++++ .../checks/problog_predicates.py | 10 +- 4 files changed, 305 insertions(+), 295 deletions(-) delete mode 100644 src/macaron/slsa_analyzer/checks/bac_.py create mode 100644 src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py diff --git a/src/macaron/slsa_analyzer/checks/bac_.py b/src/macaron/slsa_analyzer/checks/bac_.py deleted file mode 100644 index 6a993dcf0..000000000 --- a/src/macaron/slsa_analyzer/checks/bac_.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. -# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. - -"""This module contains a class to store results from the BuildAsCodeCheck subchecks.""" -from attr import dataclass - - -@dataclass -class BuildAsCodeSubchecks: - """Dataclass for storing the results from the BuildAsCodeCheck subchecks.""" - - ci_parsed: float - deploy_action: float - deploy_command: float - deploy_kws: float - - -build_as_code_subchecks: BuildAsCodeSubchecks = None # type: ignore # pylint: disable=invalid-name diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index 7a00c27ee..ad3c369e8 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -4,31 +4,23 @@ """This module contains the BuildAsCodeCheck class.""" import logging -import os from problog import get_evaluatable from problog.program import PrologString from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.sql.sqltypes import Float, String -from macaron.config.defaults import defaults from macaron.database.database_manager import ORMBase from macaron.database.table_definitions import CheckFactsTable from macaron.slsa_analyzer.analyze_context import AnalyzeContext -from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, NoneBuildTool -from macaron.slsa_analyzer.build_tool.pip import Pip -from macaron.slsa_analyzer.checks import bac_ +from macaron.slsa_analyzer.build_tool.base_build_tool import NoneBuildTool +from macaron.slsa_analyzer.checks import build_as_code_subchecks from macaron.slsa_analyzer.checks.base_check import BaseCheck +from macaron.slsa_analyzer.checks.build_as_code_subchecks import BuildAsCodeSubchecks from macaron.slsa_analyzer.checks.check_result import CheckResult, CheckResultType -from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService, NoneCIService -from macaron.slsa_analyzer.ci_service.circleci import CircleCI -from macaron.slsa_analyzer.ci_service.github_actions import GHWorkflowType -from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI -from macaron.slsa_analyzer.ci_service.jenkins import Jenkins -from macaron.slsa_analyzer.ci_service.travis import Travis +from macaron.slsa_analyzer.ci_service.base_ci_service import NoneCIService from macaron.slsa_analyzer.registry import registry from macaron.slsa_analyzer.slsa_req import ReqName -from macaron.slsa_analyzer.specs.ci_spec import CIInfo logger: logging.Logger = logging.getLogger(__name__) @@ -45,196 +37,6 @@ class BuildAsCodeTable(CheckFactsTable, ORMBase): confidence_score: Mapped[float] = mapped_column(Float, nullable=True) -def has_deploy_command(commands: list[list[str]], build_tool: BaseBuildTool) -> str: - """Check if the bash command is a build and deploy command.""" - # Account for Python projects having separate tools for packaging and publishing. - deploy_tool = build_tool.publisher if build_tool.publisher else build_tool.builder - for com in commands: - - # Check for empty or invalid commands. - if not com or not com[0]: - continue - # The first argument in a bash command is the program name. - # So first check that the program name is a supported build tool name. - # We need to handle cases where the first argument is a path to the program. - cmd_program_name = os.path.basename(com[0]) - if not cmd_program_name: - logger.debug("Found invalid program name %s.", com[0]) - continue - - check_build_commands = any(build_cmd for build_cmd in deploy_tool if build_cmd == cmd_program_name) - - # Support the use of interpreters like Python that load modules, i.e., 'python -m pip install'. - check_module_build_commands = any( - interpreter == cmd_program_name - and com[1] - and com[1] in build_tool.interpreter_flag - and com[2] - and com[2] in deploy_tool - for interpreter in build_tool.interpreter - ) - prog_name_index = 2 if check_module_build_commands else 0 - - if check_build_commands or check_module_build_commands: - # Check the arguments in the bash command for the deploy goals. - # If there are no deploy args for this build tool, accept as deploy command. - if not build_tool.deploy_arg: - logger.info("No deploy arguments required. Accept %s as deploy command.", str(com)) - return str(com) - - for word in com[(prog_name_index + 1) :]: - # TODO: allow plugin versions in arguments, e.g., maven-plugin:1.6.8:deploy. - if word in build_tool.deploy_arg: - logger.info("Found deploy command %s.", str(com)) - return str(com) - return "" - - -def ci_parsed_subcheck(ci_info: CIInfo) -> dict: - """Check whether parsing is supported for this CI service's CI config files.""" - check_certainty = 1 - - justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."] - - if ci_info["bash_commands"]: - return {"certainty": check_certainty, "justification": justification} - return {"certainty": 0, "justification": [{"The CI workflow files for this CI service aren't parsed."}]} - - -def deploy_action_subcheck( - ctx: AnalyzeContext, ci_info: CIInfo, ci_service: BaseCIService, build_tool: BaseBuildTool -) -> dict: - """Check for use of a trusted Github Actions workflow to publish/deploy.""" - # TODO: verify that deployment is legitimate and not a test - check_certainty = 0.8 - - if isinstance(build_tool, Pip): - trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[]) - - for callee in ci_info["callgraph"].bfs(): - workflow_name = callee.name.split("@")[0] - - if not workflow_name or callee.node_type not in [ - GHWorkflowType.EXTERNAL, - GHWorkflowType.REUSABLE, - ]: - logger.debug("Workflow %s is not relevant. Skipping...", callee.name) - continue - if workflow_name in trusted_deploy_actions: - trigger_link = ci_service.api_client.get_file_link( - ctx.repo_full_name, - ctx.commit_sha, - ci_service.api_client.get_relative_path_of_workflow(os.path.basename(callee.caller_path)), - ) - deploy_action_source_link = ci_service.api_client.get_file_link( - ctx.repo_full_name, ctx.commit_sha, callee.caller_path - ) - - html_url = ci_service.has_latest_run_passed( - ctx.repo_full_name, - ctx.branch_name, - ctx.commit_sha, - ctx.commit_date, - os.path.basename(callee.caller_path), - ) - - # TODO: include in the justification multiple cases of external action usage - justification: list[str | dict[str, str]] = [ - { - "To deploy": deploy_action_source_link, - "The build is triggered by": trigger_link, - }, - f"Deploy action: {workflow_name}", - {"The status of the build can be seen at": html_url} - if html_url - else "However, could not find a passing workflow run.", - ] - - return { - "certainty": check_certainty, - "justification": justification, - "deploy_command": workflow_name, - "trigger_link": trigger_link, - "deploy_action_source_link": deploy_action_source_link, - "html_url": html_url, - } - - return {"certainty": 0, "justification": []} - - -def deploy_command_subcheck( - ctx: AnalyzeContext, ci_info: CIInfo, ci_service: BaseCIService, build_tool: BaseBuildTool -) -> dict: - """Check for the use of deploy command to deploy.""" - check_certainty = 0.7 - for bash_cmd in ci_info["bash_commands"]: - deploy_cmd = has_deploy_command(bash_cmd["commands"], build_tool) - if deploy_cmd: - # Get the permalink and HTML hyperlink tag of the CI file that triggered the bash command. - trigger_link = ci_service.api_client.get_file_link( - ctx.repo_full_name, - ctx.commit_sha, - ci_service.api_client.get_relative_path_of_workflow(os.path.basename(bash_cmd["CI_path"])), - ) - # Get the permalink of the source file of the bash command. - bash_source_link = ci_service.api_client.get_file_link( - ctx.repo_full_name, ctx.commit_sha, bash_cmd["caller_path"] - ) - - html_url = ci_service.has_latest_run_passed( - ctx.repo_full_name, - ctx.branch_name, - ctx.commit_sha, - ctx.commit_date, - os.path.basename(bash_cmd["CI_path"]), - ) - - justification: list[str | dict[str, str]] = [ - { - f"The target repository uses build tool {build_tool.name} to deploy": bash_source_link, - "The build is triggered by": trigger_link, - }, - f"Deploy command: {deploy_cmd}", - {"The status of the build can be seen at": html_url} - if html_url - else "However, could not find a passing workflow run.", - ] - return { - "certainty": check_certainty, - "justification": justification, - "deploy_cmd": deploy_cmd, - "trigger_link": trigger_link, - "bash_source_link": bash_source_link, - "html_url": html_url, - } - return {"certainty": 0, "justification": ""} - - -def deploy_kws_subcheck(ctx: AnalyzeContext, ci_service: BaseCIService, build_tool: BaseBuildTool) -> dict: - """Check for the use of deploy keywords to deploy.""" - check_certainty = 0.6 - # We currently don't parse these CI configuration files. - # We just look for a keyword for now. - for unparsed_ci in (Jenkins, Travis, CircleCI, GitLabCI): - if isinstance(ci_service, unparsed_ci): - if build_tool.ci_deploy_kws[ci_service.name]: - deploy_kw, config_name = ci_service.has_kws_in_config( - build_tool.ci_deploy_kws[ci_service.name], repo_path=ctx.repo_path - ) - if not config_name: - return {"certainty": 0, "justification": ""} - - justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."] - - return { - "certainty": check_certainty, - "justification": justification, - "deploy_kw": deploy_kw, - "config_name": config_name, - } - return {"certainty": 0, "justification": []} - - class BuildAsCodeCheck(BaseCheck): """This class checks the build as code requirement. @@ -290,65 +92,8 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu if isinstance(ci_service, NoneCIService): continue - # Run subchecks - ci_parsed = ci_parsed_subcheck(ci_info) - deploy_action = deploy_action_subcheck( - ctx=ctx, ci_info=ci_info, ci_service=ci_service, build_tool=build_tool - ) - deploy_command = deploy_command_subcheck( - ctx=ctx, ci_info=ci_info, ci_service=ci_service, build_tool=build_tool - ) - deploy_kws = deploy_kws_subcheck(ctx=ctx, ci_service=ci_service, build_tool=build_tool) - - # Compile justifications from subchecks - for subcheck in [ci_parsed, deploy_action, deploy_command, deploy_kws]: - check_result["justification"].extend(subcheck["justification"]) - - deploy_source_link = deploy_cmd = html_url = trigger_link = "" - - # TODO: do we want to populate this information regardless of whether the check passes or not? - if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: - - if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: - predicate = ci_info["provenances"][0]["predicate"] - predicate["buildType"] = f"Custom {ci_service.name}" - predicate["invocation"]["configSource"][ - "uri" - ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}" - predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha - - # TODO: Change this. Need a better method for deciding which of the values to store. - # Could decide based on preliminary queries in the prolog string. - if deploy_action["certainty"]: - deploy_source_link = deploy_action["deploy_action_source_link"] - deploy_cmd = deploy_action["deploy_command"] - html_url = deploy_action["html_url"] - trigger_link = deploy_action["trigger_link"] - predicate["metadata"]["buildInvocationId"] = html_url - predicate["invocation"]["configSource"]["entryPoint"] = trigger_link - predicate["builder"]["id"] = deploy_source_link - elif deploy_command["certainty"]: - deploy_source_link = deploy_command["deploy_action_source_link"] - deploy_cmd = deploy_command["deploy_command"] - html_url = deploy_command["html_url"] - predicate["metadata"]["buildInvocationId"] = html_url - predicate["invocation"]["configSource"]["entryPoint"] = trigger_link - predicate["builder"]["id"] = deploy_source_link - elif deploy_kws["certainty"]: - deploy_cmd = deploy_kws["config_name"] - predicate["builder"]["id"] = deploy_command - predicate["invocation"]["configSource"]["entryPoint"] = deploy_command - - # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores. - # TODO: just decide on one deploy method to pass to the database. - # Populate the BuildAsCodeSubchecks object with the certainty results from subchecks. - bac_.build_as_code_subchecks = bac_.BuildAsCodeSubchecks( - ci_parsed=ci_parsed["certainty"], - deploy_action=deploy_action["certainty"], - deploy_command=deploy_command["certainty"], - deploy_kws=deploy_kws["certainty"], - ) + build_as_code_subchecks.build_as_code_subcheck_results = BuildAsCodeSubchecks(ctx=ctx, ci_info=ci_info) prolog_string = PrologString( """ @@ -377,10 +122,10 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu confidence_score = 0.0 result = get_evaluatable().create_from(prolog_string).evaluate() for key, value in result.items(): + print(key, value) if str(key) == "build_as_code_check": confidence_score = float(value) - # logger.info("%s : %s", key, value) - results = vars(bac_.build_as_code_subchecks) + results = vars(build_as_code_subchecks.build_as_code_subcheck_results) # TODO: Ideas: # - Query the intermediate checks to construct the check_result table for the highest @@ -394,17 +139,52 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu subcheck_results: list[str | dict[str, str]] = [results] check_result["justification"].extend(subcheck_results) + # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores. + # TODO: determine a better way to save these values to the database. + + # if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: + + # if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: + # predicate = ci_info["provenances"][0]["predicate"] + # predicate["buildType"] = f"Custom {ci_service.name}" + # predicate["invocation"]["configSource"][ + # "uri" + # ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}" + # predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha + + # # TODO: Change this. Need a better method for deciding which of the values to store. + # # Could decide based on preliminary queries in the prolog string. + # if deploy_action["certainty"]: + # deploy_source_link = deploy_action["deploy_action_source_link"] + # deploy_cmd = deploy_action["deploy_command"] + # html_url = deploy_action["html_url"] + # trigger_link = deploy_action["trigger_link"] + # predicate["metadata"]["buildInvocationId"] = html_url + # predicate["invocation"]["configSource"]["entryPoint"] = trigger_link + # predicate["builder"]["id"] = deploy_source_link + # elif deploy_command["certainty"]: + # deploy_source_link = deploy_command["deploy_action_source_link"] + # deploy_cmd = deploy_command["deploy_command"] + # html_url = deploy_command["html_url"] + # predicate["metadata"]["buildInvocationId"] = html_url + # predicate["invocation"]["configSource"]["entryPoint"] = trigger_link + # predicate["builder"]["id"] = deploy_source_link + # elif deploy_kws["certainty"]: + # deploy_cmd = deploy_kws["config_name"] + # predicate["builder"]["id"] = deploy_command + # predicate["invocation"]["configSource"]["entryPoint"] = deploy_command + # TODO: Return subcheck certainties - check_result["result_tables"] = [ - BuildAsCodeTable( - build_tool_name=build_tool.name, - ci_service_name=ci_service.name, - build_trigger=trigger_link, - deploy_command=deploy_cmd, - build_status_url=html_url, - confidence_score=confidence_score, - ) - ] + # check_result["result_tables"] = [ + # BuildAsCodeTable( + # build_tool_name=build_tool.name, + # ci_service_name=ci_service.name, + # build_trigger=trigger_link, + # deploy_command=deploy_cmd, + # build_status_url=html_url, + # confidence_score=confidence_score, + # ) + # ] # Check whether the confidence score is greater than the minimum threshold for this check. if confidence_score >= self.confidence_score_threshold: diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py new file mode 100644 index 000000000..1cb6a4f6c --- /dev/null +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -0,0 +1,248 @@ +# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains a class to store results from the BuildAsCodeCheck subchecks.""" + +import logging +import os + +from macaron.config.defaults import defaults +from macaron.slsa_analyzer.analyze_context import AnalyzeContext +from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool +from macaron.slsa_analyzer.build_tool.pip import Pip +from macaron.slsa_analyzer.ci_service.circleci import CircleCI +from macaron.slsa_analyzer.ci_service.github_actions import GHWorkflowType +from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI +from macaron.slsa_analyzer.ci_service.jenkins import Jenkins +from macaron.slsa_analyzer.ci_service.travis import Travis +from macaron.slsa_analyzer.specs.ci_spec import CIInfo + +logger: logging.Logger = logging.getLogger(__name__) + + +def has_deploy_command(commands: list[list[str]], build_tool: BaseBuildTool) -> str: + """Check if the bash command is a build and deploy command.""" + # Account for Python projects having separate tools for packaging and publishing. + deploy_tool = build_tool.publisher if build_tool.publisher else build_tool.builder + for com in commands: + + # Check for empty or invalid commands. + if not com or not com[0]: + continue + # The first argument in a bash command is the program name. + # So first check that the program name is a supported build tool name. + # We need to handle cases where the first argument is a path to the program. + cmd_program_name = os.path.basename(com[0]) + if not cmd_program_name: + logger.debug("Found invalid program name %s.", com[0]) + continue + + check_build_commands = any(build_cmd for build_cmd in deploy_tool if build_cmd == cmd_program_name) + + # Support the use of interpreters like Python that load modules, i.e., 'python -m pip install'. + check_module_build_commands = any( + interpreter == cmd_program_name + and com[1] + and com[1] in build_tool.interpreter_flag + and com[2] + and com[2] in deploy_tool + for interpreter in build_tool.interpreter + ) + prog_name_index = 2 if check_module_build_commands else 0 + + if check_build_commands or check_module_build_commands: + # Check the arguments in the bash command for the deploy goals. + # If there are no deploy args for this build tool, accept as deploy command. + if not build_tool.deploy_arg: + logger.info("No deploy arguments required. Accept %s as deploy command.", str(com)) + return str(com) + + for word in com[(prog_name_index + 1) :]: + # TODO: allow plugin versions in arguments, e.g., maven-plugin:1.6.8:deploy. + if word in build_tool.deploy_arg: + logger.info("Found deploy command %s.", str(com)) + return str(com) + return "" + + +class BuildAsCodeSubchecks: + """Class for storing the results from the BuildAsCodeCheck subchecks.""" + + # store analyze context + def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None: + self.ctx = ctx + self.build_tool: BaseBuildTool = ctx.dynamic_data["build_spec"].get("tool") # type: ignore + self.ci_services = ctx.dynamic_data["ci_services"] + self.check_results: dict = {} # Update this with each check. + self.ci_info = ci_info + self.ci_service = ci_info["service"] + self.failed_check = 0.0 + + # TODO: Make subcheck functions available to other checks. + + # TODO: Before each check is run, check whether a certainty result already exists in self.check_results + # to avoid re-running unecessarily. + + def ci_parsed(self) -> float: + """Check whether parsing is supported for this CI service's CI config files.""" + check_certainty = 1.0 + # If this check has already been run on this repo, return certainty. + + justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."] + + if self.ci_info["bash_commands"]: + self.check_results["ci_parsed"] = {"certainty": check_certainty, "justification": justification} + return check_certainty + return self.failed_check + + def deploy_command(self) -> float: + """Check for the use of deploy command to deploy.""" + check_certainty = 0.7 + depends_on = [self.ci_parsed() > 0] + if not all(depends_on): + return self.failed_check + + for bash_cmd in self.ci_info["bash_commands"]: + deploy_cmd = has_deploy_command(bash_cmd["commands"], self.build_tool) + if deploy_cmd: + # Get the permalink and HTML hyperlink tag of the CI file that triggered the bash command. + trigger_link = self.ci_service.api_client.get_file_link( + self.ctx.repo_full_name, + self.ctx.commit_sha, + self.ci_service.api_client.get_relative_path_of_workflow(os.path.basename(bash_cmd["CI_path"])), + ) + # Get the permalink of the source file of the bash command. + bash_source_link = self.ci_service.api_client.get_file_link( + self.ctx.repo_full_name, self.ctx.commit_sha, bash_cmd["caller_path"] + ) + + html_url = self.ci_service.has_latest_run_passed( + self.ctx.repo_full_name, + self.ctx.branch_name, + self.ctx.commit_sha, + self.ctx.commit_date, + os.path.basename(bash_cmd["CI_path"]), + ) + + justification: list[str | dict[str, str]] = [ + { + f"The target repository uses build tool {self.build_tool.name} to deploy": bash_source_link, + "The build is triggered by": trigger_link, + }, + f"Deploy command: {deploy_cmd}", + {"The status of the build can be seen at": html_url} + if html_url + else "However, could not find a passing workflow run.", + ] + + self.check_results["deploy_command"] = { + "certainty": check_certainty, + "justification": justification, + "deploy_cmd": deploy_cmd, + "trigger_link": trigger_link, + "bash_source_link": bash_source_link, + "html_url": html_url, + } + + return check_certainty + return self.failed_check + + def deploy_kws(self) -> float: + """Check for the use of deploy keywords to deploy.""" + check_certainty = 0.6 + depends_on = [self.ci_parsed() == 0.0] + # If this check has already been run on this repo, return certainty. + + if not all(depends_on): + return self.failed_check + + # We currently don't parse these CI configuration files. + # We just look for a keyword for now. + for unparsed_ci in (Jenkins, Travis, CircleCI, GitLabCI): + if isinstance(self.ci_service, unparsed_ci): + if self.build_tool.ci_deploy_kws[self.ci_service.name]: + deploy_kw, config_name = self.ci_service.has_kws_in_config( + self.build_tool.ci_deploy_kws[self.ci_service.name], repo_path=self.ctx.repo_path + ) + if not config_name: + return self.failed_check + + justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."] + + self.check_results["deploy_kws"] = { + "certainty": check_certainty, + "justification": justification, + "deploy_kw": deploy_kw, + "config_name": config_name, + } + return check_certainty + + return self.failed_check + + def deploy_action(self) -> float: + """Check for use of a trusted Github Actions workflow to publish/deploy.""" + # TODO: verify that deployment is legitimate and not a test + check_certainty = 0.8 + depends_on = [self.ci_parsed() > 0] + # If this check has already been run on this repo, return certainty. + print("CI PARSED: ", self.ci_parsed()) + if not all(depends_on): + return self.failed_check + + if isinstance(self.build_tool, Pip): + trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[]) + + for callee in self.ci_info["callgraph"].bfs(): + workflow_name = callee.name.split("@")[0] + + if not workflow_name or callee.node_type not in [ + GHWorkflowType.EXTERNAL, + GHWorkflowType.REUSABLE, + ]: + logger.debug("Workflow %s is not relevant. Skipping...", callee.name) + continue + if workflow_name in trusted_deploy_actions: + trigger_link = self.ci_service.api_client.get_file_link( + self.ctx.repo_full_name, + self.ctx.commit_sha, + self.ci_service.api_client.get_relative_path_of_workflow(os.path.basename(callee.caller_path)), + ) + deploy_action_source_link = self.ci_service.api_client.get_file_link( + self.ctx.repo_full_name, self.ctx.commit_sha, callee.caller_path + ) + + html_url = self.ci_service.has_latest_run_passed( + self.ctx.repo_full_name, + self.ctx.branch_name, + self.ctx.commit_sha, + self.ctx.commit_date, + os.path.basename(callee.caller_path), + ) + + # TODO: include in the justification multiple cases of external action usage + justification: list[str | dict[str, str]] = [ + { + "To deploy": deploy_action_source_link, + "The build is triggered by": trigger_link, + }, + f"Deploy action: {workflow_name}", + {"The status of the build can be seen at": html_url} + if html_url + else "However, could not find a passing workflow run.", + ] + + self.check_results["deploy_action"] = { + "certainty": check_certainty, + "justification": justification, + "deploy_command": workflow_name, + "trigger_link": trigger_link, + "deploy_action_source_link": deploy_action_source_link, + "html_url": html_url, + } + + return check_certainty + + return self.failed_check + + +build_as_code_subcheck_results: BuildAsCodeSubchecks = None # type: ignore # pylint: disable=invalid-name diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index 3dcce24b9..89b1733b2 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -4,7 +4,7 @@ """Contains ProbLog predicates that return the results stored in the BuildAsCodeSubchecks dataclass.""" from problog.extern import problog_export -from macaron.slsa_analyzer.checks.bac_ import build_as_code_subchecks +from macaron.slsa_analyzer.checks.build_as_code_subchecks import build_as_code_subcheck_results @problog_export("-int") # type: ignore @@ -16,7 +16,7 @@ def ci_parsed_check() -> float: Certainty The certainty of the check. """ - return build_as_code_subchecks.ci_parsed + return build_as_code_subcheck_results.ci_parsed() @problog_export("-int") # type: ignore @@ -28,7 +28,7 @@ def deploy_action_check() -> float: Certainty The certainty of the check. """ - return build_as_code_subchecks.deploy_action + return build_as_code_subcheck_results.deploy_action() @problog_export("-int") # type: ignore @@ -40,7 +40,7 @@ def deploy_command_check() -> float: Certainty The certainty of the check. """ - return build_as_code_subchecks.deploy_command + return build_as_code_subcheck_results.deploy_command() @problog_export("-int") # type: ignore @@ -52,4 +52,4 @@ def deploy_kws_check() -> float: Certainty The certainty of the check. """ - return build_as_code_subchecks.deploy_kws + return build_as_code_subcheck_results.deploy_kws() From 41372fafc3eaca5f627ecfa153c6b4cbeef0508f Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Wed, 7 Jun 2023 12:18:29 +1000 Subject: [PATCH 04/29] fix: update test_gha_workflow_deployment so that it passes the ci_parsed subcheck Signed-off-by: sophie-bates --- src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py | 4 +--- tests/slsa_analyzer/checks/test_build_as_code_check.py | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 1cb6a4f6c..4fc3fe49d 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -98,7 +98,7 @@ def ci_parsed(self) -> float: def deploy_command(self) -> float: """Check for the use of deploy command to deploy.""" check_certainty = 0.7 - depends_on = [self.ci_parsed() > 0] + depends_on = [self.ci_parsed() > 0.0] if not all(depends_on): return self.failed_check @@ -184,8 +184,6 @@ def deploy_action(self) -> float: # TODO: verify that deployment is legitimate and not a test check_certainty = 0.8 depends_on = [self.ci_parsed() > 0] - # If this check has already been run on this repo, return certainty. - print("CI PARSED: ", self.ci_parsed()) if not all(depends_on): return self.failed_check diff --git a/tests/slsa_analyzer/checks/test_build_as_code_check.py b/tests/slsa_analyzer/checks/test_build_as_code_check.py index b7a07b0f9..8f3252844 100644 --- a/tests/slsa_analyzer/checks/test_build_as_code_check.py +++ b/tests/slsa_analyzer/checks/test_build_as_code_check.py @@ -180,9 +180,10 @@ def test_gha_workflow_deployment( """Test the use of verified GitHub Actions to deploy.""" check = BuildAsCodeCheck() check_result = CheckResult(justification=[]) # type: ignore + bash_commands = BashCommands(caller_path="source_file", CI_path="ci_file", CI_type="github_actions", commands=[[]]) ci_info = CIInfo( service=github_actions_service, - bash_commands=[], + bash_commands=[bash_commands], callgraph=CallGraph(BaseNode(), ""), provenance_assets=[], latest_release={}, From ca0a3988988f02c2b9a0d2605c243a986a97945e Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Wed, 7 Jun 2023 13:02:23 +1000 Subject: [PATCH 05/29] chore: convert problog result dictionary to use str keys Signed-off-by: sophie-bates --- .../checks/build_as_code_check.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index ad3c369e8..5312a3806 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -6,7 +6,7 @@ import logging from problog import get_evaluatable -from problog.program import PrologString +from problog.program import PrologString, Term from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.sql.sqltypes import Float, String @@ -86,15 +86,16 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu # Checking if a build tool is discovered for this repo. if build_tool and not isinstance(build_tool, NoneBuildTool): for ci_info in ci_services: - + confidence_score = 0.0 ci_service = ci_info["service"] # Checking if a CI service is discovered for this repo. if isinstance(ci_service, NoneCIService): continue - # Populate the BuildAsCodeSubchecks object with the certainty results from subchecks. + # Initialize the BuildAsCodeSubchecks object with the AnalyzeContext. build_as_code_subchecks.build_as_code_subcheck_results = BuildAsCodeSubchecks(ctx=ctx, ci_info=ci_info) + # ProbLog rules to be evaluated. prolog_string = PrologString( """ :- use_module('src/macaron/slsa_analyzer/checks/problog_predicates.py'). @@ -114,18 +115,20 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu build_as_code_check :- deploy_action_certainty; deploy_command_certainty; deploy_kws_certainty. + query(deploy_command_certainty). + query(deploy_action_certainty). + query(deploy_kws_certainty). query(build_as_code_check). """ ) # TODO: query each of the methods, and take the values from the one with the highest confidence. - confidence_score = 0.0 - result = get_evaluatable().create_from(prolog_string).evaluate() - for key, value in result.items(): - print(key, value) - if str(key) == "build_as_code_check": - confidence_score = float(value) - results = vars(build_as_code_subchecks.build_as_code_subcheck_results) + + # Convert the result dictionary from Term:float to str:float + term_result: dict[Term, float] = get_evaluatable().create_from(prolog_string).evaluate() + result: dict[str, float] = {str(k): v for k, v in term_result.items()} + + confidence_score = result["build_as_code_check"] # TODO: Ideas: # - Query the intermediate checks to construct the check_result table for the highest @@ -136,9 +139,6 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu check_result["confidence_score"] = confidence_score - subcheck_results: list[str | dict[str, str]] = [results] - check_result["justification"].extend(subcheck_results) - # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores. # TODO: determine a better way to save these values to the database. From 6fcfbd1cc0aca09d99ea34bf7fff592ad1b0455a Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Wed, 7 Jun 2023 15:46:23 +1000 Subject: [PATCH 06/29] feat: perform intermediate querying on deploy method subchecks to determine which results to store Signed-off-by: sophie-bates --- .../checks/build_as_code_check.py | 106 ++++++++---------- .../checks/build_as_code_subchecks.py | 69 +++++++----- 2 files changed, 91 insertions(+), 84 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index 5312a3806..78eac81ed 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -16,7 +16,7 @@ from macaron.slsa_analyzer.build_tool.base_build_tool import NoneBuildTool from macaron.slsa_analyzer.checks import build_as_code_subchecks from macaron.slsa_analyzer.checks.base_check import BaseCheck -from macaron.slsa_analyzer.checks.build_as_code_subchecks import BuildAsCodeSubchecks +from macaron.slsa_analyzer.checks.build_as_code_subchecks import BuildAsCodeSubchecks, DeploySubcheckResults from macaron.slsa_analyzer.checks.check_result import CheckResult, CheckResultType from macaron.slsa_analyzer.ci_service.base_ci_service import NoneCIService from macaron.slsa_analyzer.registry import registry @@ -122,69 +122,57 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu """ ) - # TODO: query each of the methods, and take the values from the one with the highest confidence. - # Convert the result dictionary from Term:float to str:float term_result: dict[Term, float] = get_evaluatable().create_from(prolog_string).evaluate() - result: dict[str, float] = {str(k): v for k, v in term_result.items()} - + result: dict[str, float] = {str(key): value for key, value in term_result.items()} + deploy_methods = { + "deploy_command": result["deploy_command_certainty"], + "deploy_action": result["deploy_action_certainty"], + "deploy_kws": result["deploy_kws_certainty"], + } + deploy_methods_valid = {key: value for key, value in deploy_methods.items() if value != 0} confidence_score = result["build_as_code_check"] - - # TODO: Ideas: - # - Query the intermediate checks to construct the check_result table for the highest - # confidence score? - # - Can we find the evidence that contributes the most to this check to output the confidence - # scores for it, and populate the check_result table. - # - Print intermediate proofs? - check_result["confidence_score"] = confidence_score - # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores. - # TODO: determine a better way to save these values to the database. - - # if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: - - # if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: - # predicate = ci_info["provenances"][0]["predicate"] - # predicate["buildType"] = f"Custom {ci_service.name}" - # predicate["invocation"]["configSource"][ - # "uri" - # ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}" - # predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha - - # # TODO: Change this. Need a better method for deciding which of the values to store. - # # Could decide based on preliminary queries in the prolog string. - # if deploy_action["certainty"]: - # deploy_source_link = deploy_action["deploy_action_source_link"] - # deploy_cmd = deploy_action["deploy_command"] - # html_url = deploy_action["html_url"] - # trigger_link = deploy_action["trigger_link"] - # predicate["metadata"]["buildInvocationId"] = html_url - # predicate["invocation"]["configSource"]["entryPoint"] = trigger_link - # predicate["builder"]["id"] = deploy_source_link - # elif deploy_command["certainty"]: - # deploy_source_link = deploy_command["deploy_action_source_link"] - # deploy_cmd = deploy_command["deploy_command"] - # html_url = deploy_command["html_url"] - # predicate["metadata"]["buildInvocationId"] = html_url - # predicate["invocation"]["configSource"]["entryPoint"] = trigger_link - # predicate["builder"]["id"] = deploy_source_link - # elif deploy_kws["certainty"]: - # deploy_cmd = deploy_kws["config_name"] - # predicate["builder"]["id"] = deploy_command - # predicate["invocation"]["configSource"]["entryPoint"] = deploy_command - - # TODO: Return subcheck certainties - # check_result["result_tables"] = [ - # BuildAsCodeTable( - # build_tool_name=build_tool.name, - # ci_service_name=ci_service.name, - # build_trigger=trigger_link, - # deploy_command=deploy_cmd, - # build_status_url=html_url, - # confidence_score=confidence_score, - # ) - # ] + if deploy_methods_valid.values(): + # Determine the deployment method with the highest certainty score. + highest_certainty = max(deploy_methods_valid, key=deploy_methods_valid.__getitem__) + deploy_method = build_as_code_subchecks.build_as_code_subcheck_results.get_subcheck_results( + highest_certainty + ) + + if isinstance(deploy_method, DeploySubcheckResults): + if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: + predicate = ci_info["provenances"][0]["predicate"] + predicate["buildType"] = f"Custom {ci_service.name}" + predicate["invocation"]["configSource"][ + "uri" + ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}" + predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha + + predicate["metadata"]["buildInvocationId"] = deploy_method.html_url + predicate["builder"]["id"] = deploy_method.source_link + predicate["invocation"]["configSource"]["entryPoint"] = deploy_method.trigger_link + + if highest_certainty == "deploy_kws": + predicate["builder"]["id"] = deploy_method.config_name + predicate["invocation"]["configSource"]["entryPoint"] = deploy_method.config_name + + check_result["result_tables"] = [ + BuildAsCodeTable( + build_tool_name=build_tool.name, + ci_service_name=ci_service.name, + build_trigger=deploy_method.trigger_link, + deploy_command=deploy_method.deploy_cmd, + build_status_url=deploy_method.html_url, + confidence_score=confidence_score, + ) + ] + + # TODO: compile all justifications + # check_result["justification"].append() + + # TODO: Investigate using proofs # Check whether the confidence score is greater than the minimum threshold for this check. if confidence_score >= self.confidence_score_threshold: diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 4fc3fe49d..9b2928d93 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -6,6 +6,8 @@ import logging import os +from attr import dataclass + from macaron.config.defaults import defaults from macaron.slsa_analyzer.analyze_context import AnalyzeContext from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool @@ -65,6 +67,19 @@ def has_deploy_command(commands: list[list[str]], build_tool: BaseBuildTool) -> return "" +@dataclass +class DeploySubcheckResults: + """DataClass containing information required from deploy command subchecks.""" + + certainty: float = 0.0 + justification: list[str | dict[str, str]] = [""] + deploy_cmd: str = "" + trigger_link: str = "" + source_link: str = "" + html_url: str = "" + config_name: str = "" + + class BuildAsCodeSubchecks: """Class for storing the results from the BuildAsCodeCheck subchecks.""" @@ -73,9 +88,10 @@ def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None: self.ctx = ctx self.build_tool: BaseBuildTool = ctx.dynamic_data["build_spec"].get("tool") # type: ignore self.ci_services = ctx.dynamic_data["ci_services"] - self.check_results: dict = {} # Update this with each check. + self.check_results: dict[str, dict | DeploySubcheckResults] = {} # Update this with each check. self.ci_info = ci_info self.ci_service = ci_info["service"] + # Certainty value to be returned if a subcheck fails. self.failed_check = 0.0 # TODO: Make subcheck functions available to other checks. @@ -88,9 +104,8 @@ def ci_parsed(self) -> float: check_certainty = 1.0 # If this check has already been run on this repo, return certainty. - justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."] - if self.ci_info["bash_commands"]: + justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."] self.check_results["ci_parsed"] = {"certainty": check_certainty, "justification": justification} return check_certainty return self.failed_check @@ -135,14 +150,14 @@ def deploy_command(self) -> float: else "However, could not find a passing workflow run.", ] - self.check_results["deploy_command"] = { - "certainty": check_certainty, - "justification": justification, - "deploy_cmd": deploy_cmd, - "trigger_link": trigger_link, - "bash_source_link": bash_source_link, - "html_url": html_url, - } + self.check_results["deploy_command"] = DeploySubcheckResults( + certainty=check_certainty, + justification=justification, + deploy_cmd=deploy_cmd, + trigger_link=trigger_link, + source_link=bash_source_link, + html_url=html_url, + ) return check_certainty return self.failed_check @@ -169,12 +184,12 @@ def deploy_kws(self) -> float: justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."] - self.check_results["deploy_kws"] = { - "certainty": check_certainty, - "justification": justification, - "deploy_kw": deploy_kw, - "config_name": config_name, - } + self.check_results["deploy_kws"] = DeploySubcheckResults( + certainty=check_certainty, + justification=justification, + deploy_cmd=deploy_kw, + config_name=config_name, + ) return check_certainty return self.failed_check @@ -229,18 +244,22 @@ def deploy_action(self) -> float: else "However, could not find a passing workflow run.", ] - self.check_results["deploy_action"] = { - "certainty": check_certainty, - "justification": justification, - "deploy_command": workflow_name, - "trigger_link": trigger_link, - "deploy_action_source_link": deploy_action_source_link, - "html_url": html_url, - } + self.check_results["deploy_action"] = DeploySubcheckResults( + certainty=check_certainty, + justification=justification, + deploy_cmd=workflow_name, + trigger_link=trigger_link, + source_link=deploy_action_source_link, + html_url=html_url, + ) return check_certainty return self.failed_check + def get_subcheck_results(self, subcheck_name: str) -> dict | DeploySubcheckResults: + """Return the results for a particular subcheck.""" + return self.check_results[subcheck_name] + build_as_code_subcheck_results: BuildAsCodeSubchecks = None # type: ignore # pylint: disable=invalid-name From 35ae23b7d0c9e0ee3bd0068b906a4661b6f368bf Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Fri, 9 Jun 2023 11:34:32 +1000 Subject: [PATCH 07/29] feat: add sub-check for workflow trigger event type Signed-off-by: sophie-bates --- .../slsa_analyzer/checks/base_check.py | 2 +- .../checks/build_as_code_check.py | 3 + .../checks/build_as_code_subchecks.py | 56 ++++++++++++++++++- .../checks/problog_predicates.py | 24 ++++++++ 4 files changed, 81 insertions(+), 4 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/base_check.py b/src/macaron/slsa_analyzer/checks/base_check.py index 8b12d2005..3a43cf635 100644 --- a/src/macaron/slsa_analyzer/checks/base_check.py +++ b/src/macaron/slsa_analyzer/checks/base_check.py @@ -86,7 +86,7 @@ def run(self, target: AnalyzeContext, skipped_info: Optional[SkippedInfo] = None justification=[], result_type=CheckResultType.SKIPPED, result_tables=[], - confidence_score=0, + confidence_score=0.0, ) if skipped_info: diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index 78eac81ed..89bed17fb 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -122,6 +122,8 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu """ ) + build_as_code_subchecks.build_as_code_subcheck_results.workflow_trigger("publish.yaml") + # Convert the result dictionary from Term:float to str:float term_result: dict[Term, float] = get_evaluatable().create_from(prolog_string).evaluate() result: dict[str, float] = {str(key): value for key, value in term_result.items()} @@ -143,6 +145,7 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu if isinstance(deploy_method, DeploySubcheckResults): if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]: + # Store the values for the inferred provenance representation. predicate = ci_info["provenances"][0]["predicate"] predicate["buildType"] = f"Custom {ci_service.name}" predicate["invocation"]["configSource"][ diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 9b2928d93..af6813464 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -78,6 +78,7 @@ class DeploySubcheckResults: source_link: str = "" html_url: str = "" config_name: str = "" + workflow_name: str = "" class BuildAsCodeSubchecks: @@ -88,7 +89,7 @@ def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None: self.ctx = ctx self.build_tool: BaseBuildTool = ctx.dynamic_data["build_spec"].get("tool") # type: ignore self.ci_services = ctx.dynamic_data["ci_services"] - self.check_results: dict[str, dict | DeploySubcheckResults] = {} # Update this with each check. + self.check_results: dict[str, DeploySubcheckResults] = {} # Update this with each check. self.ci_info = ci_info self.ci_service = ci_info["service"] # Certainty value to be returned if a subcheck fails. @@ -106,7 +107,9 @@ def ci_parsed(self) -> float: if self.ci_info["bash_commands"]: justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."] - self.check_results["ci_parsed"] = {"certainty": check_certainty, "justification": justification} + self.check_results["ci_parsed"] = DeploySubcheckResults( + certainty=check_certainty, justification=justification + ) return check_certainty return self.failed_check @@ -139,6 +142,8 @@ def deploy_command(self) -> float: os.path.basename(bash_cmd["CI_path"]), ) + workflow_name = os.path.basename(html_url) + justification: list[str | dict[str, str]] = [ { f"The target repository uses build tool {self.build_tool.name} to deploy": bash_source_link, @@ -157,6 +162,7 @@ def deploy_command(self) -> float: trigger_link=trigger_link, source_link=bash_source_link, html_url=html_url, + workflow_name=workflow_name, ) return check_certainty @@ -251,13 +257,57 @@ def deploy_action(self) -> float: trigger_link=trigger_link, source_link=deploy_action_source_link, html_url=html_url, + workflow_name=workflow_name, ) return check_certainty return self.failed_check - def get_subcheck_results(self, subcheck_name: str) -> dict | DeploySubcheckResults: + def workflow_trigger(self, workflow_name: str) -> str: + """Check that the workflow is triggered by a valid event.""" + valid_trigger_events = ["workflow-dispatch", "push", "release"] + for callee in self.ci_info["callgraph"].bfs(): + if callee.name == workflow_name: + trigger_events = callee.parsed_obj.get("On", {}) + for event in trigger_events: + hook = event.get("Hook", {}) + trigger_type = str(hook.get("Value", "")) + if trigger_type in valid_trigger_events: + return trigger_type + return "" + + def workflow_trigger_deploy_command(self) -> float: + """Check the workflow trigger for the required deploy_command workflow file.""" + check_certainty = 0.9 + depends_on = [self.deploy_command() > 0.0] + if not all(depends_on): + return self.failed_check + + workflow_name = self.check_results["deploy_command"].workflow_name + if workflow_name: + trigger_type = self.workflow_trigger(workflow_name=workflow_name) + if trigger_type: + logger.info("Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name) + return check_certainty + return self.failed_check + + def workflow_trigger_deploy_action(self) -> float: + """Check the workflow trigger for the required deploy_action workflow file.""" + check_certainty = 0.9 + depends_on = [self.deploy_action() > 0.0] + if not all(depends_on): + return self.failed_check + + workflow_name = self.check_results["deploy_action"].workflow_name + if workflow_name: + trigger_type = self.workflow_trigger(workflow_name=workflow_name) + if trigger_type: + logger.info("Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name) + return check_certainty + return self.failed_check + + def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults: """Return the results for a particular subcheck.""" return self.check_results[subcheck_name] diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index 89b1733b2..93aa7324e 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -53,3 +53,27 @@ def deploy_kws_check() -> float: The certainty of the check. """ return build_as_code_subcheck_results.deploy_kws() + + +@problog_export("-int") # type: ignore +def workflow_trigger_deploy_commmand() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + return build_as_code_subcheck_results.workflow_trigger_deploy_command() + + +@problog_export("-int") # type: ignore +def workflow_trigger_deploy_action() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + return build_as_code_subcheck_results.workflow_trigger_deploy_action() From c7502c6d983bc32dac9eb94c9a871871a3b8206a Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Sat, 10 Jun 2023 12:23:18 +1000 Subject: [PATCH 08/29] chore: store workflow info object for each node in GitHub Actions callgraph Signed-off-by: sophie-bates --- src/macaron/slsa_analyzer/ci_service/github_actions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions.py b/src/macaron/slsa_analyzer/ci_service/github_actions.py index 7ae85dee9..7d44f5816 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module analyzes GitHub Actions CI.""" @@ -170,7 +170,7 @@ def build_call_graph_from_node(self, node: GitHubNode) -> None: name=step["Exec"]["Uses"]["Value"], node_type=GHWorkflowType.EXTERNAL, source_path="", - parsed_obj={}, + parsed_obj=step["Exec"], caller_path=node.source_path, ) ) From e48003fdabba779f78cc4c2f941001586fc19d17 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Sun, 11 Jun 2023 13:47:56 +1000 Subject: [PATCH 09/29] chore: check that deploy action doesn't have a repository url specified Signed-off-by: sophie-bates --- .../slsa_analyzer/checks/build_as_code_subchecks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index af6813464..67398e470 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -221,6 +221,18 @@ def deploy_action(self) -> float: logger.debug("Workflow %s is not relevant. Skipping...", callee.name) continue if workflow_name in trusted_deploy_actions: + workflow_info = callee.parsed_obj + inputs = workflow_info.get("Inputs", {}) + + # Deployment is to Pypi if there isn't a repository url + if inputs and inputs.get("repository_url"): + logger.debug( + "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...", + callee.name, + ) + continue + + # TODO: all of this logic could be generalized in build_as_code body. trigger_link = self.ci_service.api_client.get_file_link( self.ctx.repo_full_name, self.ctx.commit_sha, From 0bc69bd0e2955401bc8f1e23f79a15d60927385f Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Mon, 12 Jun 2023 00:51:55 +1000 Subject: [PATCH 10/29] feat: add sub-check for test publish to pypi Signed-off-by: sophie-bates --- .../checks/build_as_code_subchecks.py | 23 +++++++++++++++++++ .../checks/problog_predicates.py | 10 ++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 67398e470..e73afaf6e 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -200,6 +200,29 @@ def deploy_kws(self) -> float: return self.failed_check + def test_deploy_action(self, workflow_name: str) -> float: + """Check for the use of a test deploy to PyPi given a CI workflow.""" + check_certainty = 0.7 + for callee in self.ci_info["callgraph"].bfs(): + # TODO: figure out a way to generalize this implementation for other external GHAs. + # Currently just checks for the pypa/gh-action-pypi-publish action. + if not workflow_name or callee.node_type not in [ + GHWorkflowType.EXTERNAL, + GHWorkflowType.REUSABLE, + ]: + logger.debug("Workflow %s is not relevant. Skipping...", callee.name) + continue + callee_name = callee.name.split("@")[0] + + if callee_name == workflow_name == "pypa/gh-action-pypi-publish": + workflow_info = callee.parsed_obj + inputs = workflow_info.get("Inputs", {}) + repo_url = inputs.get("repository_url", {}).get("Value", {}).get("Value", "") + # TODO: Use values that come from defaults.ini rather than hardcoded. + if repo_url == "https://test.pypi.org/legacy/": + return check_certainty + return self.failed_check + def deploy_action(self) -> float: """Check for use of a trusted Github Actions workflow to publish/deploy.""" # TODO: verify that deployment is legitimate and not a test diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index 93aa7324e..e8c78f609 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -6,6 +6,8 @@ from macaron.slsa_analyzer.checks.build_as_code_subchecks import build_as_code_subcheck_results +FAILED_CHECK = 0.0 + @problog_export("-int") # type: ignore def ci_parsed_check() -> float: @@ -68,7 +70,7 @@ def workflow_trigger_deploy_commmand() -> float: @problog_export("-int") # type: ignore -def workflow_trigger_deploy_action() -> float: +def test_deploy_action_check() -> float: """Get the value of the subcheck. Returns @@ -76,4 +78,8 @@ def workflow_trigger_deploy_action() -> float: Certainty The certainty of the check. """ - return build_as_code_subcheck_results.workflow_trigger_deploy_action() + depends_on = [deploy_action_check() > 0.0] + if not all(depends_on): + return FAILED_CHECK + workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name + return build_as_code_subcheck_results.test_deploy_action(workflow_name=workflow_name) From 8c6f80d47e07cb76c8b787833eccd14fb84df09c Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Mon, 12 Jun 2023 00:57:02 +1000 Subject: [PATCH 11/29] chore: restructure problog predicate functions Signed-off-by: sophie-bates --- .../checks/build_as_code_subchecks.py | 77 ++++++++++++------- .../checks/problog_predicates.py | 26 ++++++- 2 files changed, 72 insertions(+), 31 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index e73afaf6e..6651161f9 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -104,6 +104,7 @@ def ci_parsed(self) -> float: """Check whether parsing is supported for this CI service's CI config files.""" check_certainty = 1.0 # If this check has already been run on this repo, return certainty. + logger.info("CI PARSED") if self.ci_info["bash_commands"]: justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."] @@ -120,6 +121,8 @@ def deploy_command(self) -> float: if not all(depends_on): return self.failed_check + logger.info("DEPLOY COMMAND") + for bash_cmd in self.ci_info["bash_commands"]: deploy_cmd = has_deploy_command(bash_cmd["commands"], self.build_tool) if deploy_cmd: @@ -299,9 +302,14 @@ def deploy_action(self) -> float: return self.failed_check - def workflow_trigger(self, workflow_name: str) -> str: + def workflow_trigger(self, workflow_name: str = "") -> float: """Check that the workflow is triggered by a valid event.""" + check_certainty = 0.9 + if not workflow_name: + return self.failed_check + valid_trigger_events = ["workflow-dispatch", "push", "release"] + for callee in self.ci_info["callgraph"].bfs(): if callee.name == workflow_name: trigger_events = callee.parsed_obj.get("On", {}) @@ -309,38 +317,49 @@ def workflow_trigger(self, workflow_name: str) -> str: hook = event.get("Hook", {}) trigger_type = str(hook.get("Value", "")) if trigger_type in valid_trigger_events: - return trigger_type - return "" + logger.info( + "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name + ) + return check_certainty + return self.failed_check - def workflow_trigger_deploy_command(self) -> float: - """Check the workflow trigger for the required deploy_command workflow file.""" - check_certainty = 0.9 - depends_on = [self.deploy_command() > 0.0] - if not all(depends_on): - return self.failed_check + # def workflow_uses_secrets(self, ) -> float: + # # TODO: we just want for this specific workflow + # for callee in self.ci_info["callgraph"].bfs(): + # workflow_name = callee.name.split("@")[0] + # blah = callee.parsed_obj - workflow_name = self.check_results["deploy_command"].workflow_name - if workflow_name: - trigger_type = self.workflow_trigger(workflow_name=workflow_name) - if trigger_type: - logger.info("Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name) - return check_certainty - return self.failed_check + # logger.info("WORKFLOW NAME: %s", workflow_name) + # logger.info(blah) - def workflow_trigger_deploy_action(self) -> float: - """Check the workflow trigger for the required deploy_action workflow file.""" - check_certainty = 0.9 - depends_on = [self.deploy_action() > 0.0] - if not all(depends_on): - return self.failed_check + # if not workflow_name or callee.node_type not in [ + # GHWorkflowType.EXTERNAL, + # GHWorkflowType.REUSABLE, + # ]: + # logger.debug("Workflow %s is not relevant. Skipping...", callee.name) + # continue + # if workflow_name in trusted_deploy_actions: + # return 0.0 - workflow_name = self.check_results["deploy_action"].workflow_name - if workflow_name: - trigger_type = self.workflow_trigger(workflow_name=workflow_name) - if trigger_type: - logger.info("Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name) - return check_certainty - return self.failed_check + # def pypi_publishing_workflow(self, workflow_id): + # depends_on = [self.workflow_trigger_deploy_command() > 0.0 or self.workflow_trigger_deploy_action() > 0.0] + + # # TODO: + # # 1. Figure out how to get the pypi name etc. + + # # 1. Get timestamp of the PyPi package + # # To do this, we need the url of the pypi package + + # # curl returns null if it doesn't exist + + # # 2. Get timestamp of github workflow run + # # Depends on has_latest_run_passed + + # # 2. If timestamp of the publishing workflow is close enough, then chance of the workflow + # # being the one to publish the package is high (rather than manual upload). + + # # http_request + # return def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults: """Return the results for a particular subcheck.""" diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index e8c78f609..fe9cebdd6 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -58,7 +58,7 @@ def deploy_kws_check() -> float: @problog_export("-int") # type: ignore -def workflow_trigger_deploy_commmand() -> float: +def workflow_trigger_deploy_command_check() -> float: """Get the value of the subcheck. Returns @@ -66,7 +66,29 @@ def workflow_trigger_deploy_commmand() -> float: Certainty The certainty of the check. """ - return build_as_code_subcheck_results.workflow_trigger_deploy_command() + depends_on = [deploy_command_check() > 0.0] + print(all(depends_on)) + if not all(depends_on): + return FAILED_CHECK + workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name + return build_as_code_subcheck_results.workflow_trigger(workflow_name=workflow_name) + + +@problog_export("-int") # type: ignore +def workflow_trigger_deploy_action_check() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + depends_on = [deploy_action_check() > 0.0] + print(all(depends_on)) + if not all(depends_on): + return FAILED_CHECK + workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name + return build_as_code_subcheck_results.workflow_trigger(workflow_name=workflow_name) @problog_export("-int") # type: ignore From 9e80d79dbf0ca61d6767c80ea4a48f629346227e Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Mon, 12 Jun 2023 09:39:08 +1000 Subject: [PATCH 12/29] chore: verify sub-check dependencies in ProbLog predicates rather than sub-checks Signed-off-by: sophie-bates --- .../checks/build_as_code_subchecks.py | 35 +++---------------- .../checks/problog_predicates.py | 9 +++++ 2 files changed, 13 insertions(+), 31 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 6651161f9..12ca06cdb 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -117,11 +117,6 @@ def ci_parsed(self) -> float: def deploy_command(self) -> float: """Check for the use of deploy command to deploy.""" check_certainty = 0.7 - depends_on = [self.ci_parsed() > 0.0] - if not all(depends_on): - return self.failed_check - - logger.info("DEPLOY COMMAND") for bash_cmd in self.ci_info["bash_commands"]: deploy_cmd = has_deploy_command(bash_cmd["commands"], self.build_tool) @@ -174,11 +169,6 @@ def deploy_command(self) -> float: def deploy_kws(self) -> float: """Check for the use of deploy keywords to deploy.""" check_certainty = 0.6 - depends_on = [self.ci_parsed() == 0.0] - # If this check has already been run on this repo, return certainty. - - if not all(depends_on): - return self.failed_check # We currently don't parse these CI configuration files. # We just look for a keyword for now. @@ -228,11 +218,7 @@ def test_deploy_action(self, workflow_name: str) -> float: def deploy_action(self) -> float: """Check for use of a trusted Github Actions workflow to publish/deploy.""" - # TODO: verify that deployment is legitimate and not a test check_certainty = 0.8 - depends_on = [self.ci_parsed() > 0] - if not all(depends_on): - return self.failed_check if isinstance(self.build_tool, Pip): trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[]) @@ -251,7 +237,7 @@ def deploy_action(self) -> float: inputs = workflow_info.get("Inputs", {}) # Deployment is to Pypi if there isn't a repository url - if inputs and inputs.get("repository_url"): + if inputs.get("repository_url"): logger.debug( "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...", callee.name, @@ -310,6 +296,8 @@ def workflow_trigger(self, workflow_name: str = "") -> float: valid_trigger_events = ["workflow-dispatch", "push", "release"] + # TODO: Consider activity types for release, i.e. prereleased + for callee in self.ci_info["callgraph"].bfs(): if callee.name == workflow_name: trigger_events = callee.parsed_obj.get("On", {}) @@ -324,22 +312,7 @@ def workflow_trigger(self, workflow_name: str = "") -> float: return self.failed_check # def workflow_uses_secrets(self, ) -> float: - # # TODO: we just want for this specific workflow - # for callee in self.ci_info["callgraph"].bfs(): - # workflow_name = callee.name.split("@")[0] - # blah = callee.parsed_obj - - # logger.info("WORKFLOW NAME: %s", workflow_name) - # logger.info(blah) - - # if not workflow_name or callee.node_type not in [ - # GHWorkflowType.EXTERNAL, - # GHWorkflowType.REUSABLE, - # ]: - # logger.debug("Workflow %s is not relevant. Skipping...", callee.name) - # continue - # if workflow_name in trusted_deploy_actions: - # return 0.0 + # return # def pypi_publishing_workflow(self, workflow_id): # depends_on = [self.workflow_trigger_deploy_command() > 0.0 or self.workflow_trigger_deploy_action() > 0.0] diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index fe9cebdd6..95a2ea3e1 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -30,6 +30,9 @@ def deploy_action_check() -> float: Certainty The certainty of the check. """ + depends_on = [ci_parsed_check() > 0] + if not all(depends_on): + return FAILED_CHECK return build_as_code_subcheck_results.deploy_action() @@ -42,6 +45,9 @@ def deploy_command_check() -> float: Certainty The certainty of the check. """ + depends_on = [ci_parsed_check() > 0.0] + if not all(depends_on): + return FAILED_CHECK return build_as_code_subcheck_results.deploy_command() @@ -54,6 +60,9 @@ def deploy_kws_check() -> float: Certainty The certainty of the check. """ + depends_on = [ci_parsed_check() == 0.0] + if not all(depends_on): + return FAILED_CHECK return build_as_code_subcheck_results.deploy_kws() From 74da2c9f193016ec36ec9dea5de9700c96de8509 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Mon, 12 Jun 2023 15:33:15 +1000 Subject: [PATCH 13/29] feat: add API client for PyPI Signed-off-by: sophie-bates --- .../registry_service/api_client.py | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 src/macaron/slsa_analyzer/registry_service/api_client.py diff --git a/src/macaron/slsa_analyzer/registry_service/api_client.py b/src/macaron/slsa_analyzer/registry_service/api_client.py new file mode 100644 index 000000000..bb610766d --- /dev/null +++ b/src/macaron/slsa_analyzer/registry_service/api_client.py @@ -0,0 +1,82 @@ +# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module provides API clients for Registry services, such as PyPi.""" + +import logging + +from macaron.util import send_get_http + +logger: logging.Logger = logging.getLogger(__name__) + +# TODO: Create BaseAPIClient + + +class PyPIAPIClient: + """This class acts as a client to use PyPi API. + + See https://warehouse.pypa.io/api-reference/ for the PyPI API documentation. + """ + + _PYPI_API_URL = "https://pypi.org/pypi" + + def get_all_project_data(self, project_name: str) -> dict: + """Query PyPi JSON API for the information about an individual project at the latest version. + + The url would be in the following form: + ``https://pypi.org/pypi/{project_name}/json`` + + Parameters + ---------- + project_name : str + The full name of the project (case insensitive). + + Returns + ------- + dict + The json query result or an empty dict if failed. + + Examples + -------- + The following call to this method will perform a query to ``https://pypi.org/pypi/flask/json`` + + >>> pypi_client.get_all_project_data( + project_name="flask" + ) + """ + logger.debug("Query for project %s 's data", project_name) + url = f"{PyPIAPIClient._PYPI_API_URL}/{project_name}/json" + response_data = send_get_http(url, {}) + return response_data + + def get_release_data(self, project_name: str, version: str) -> dict: + """Query PyPi JSON API for the information about an individual release at a specific version. + + The url would be in the following form: + ``https://pypi.org/pypi/{project_name}/{version}/json`` + + Parameters + ---------- + project_name : str + The full name of the project (case insensitive). + version : str + The version of the project in the form ``*.*.*``. + + Returns + ------- + dict + The json query result or an empty dict if failed. + + Examples + -------- + The following call to this method will perform a query to ``https://pypi.org/pypi/flask/1.0.0/json`` + + >>> pypi_client.get_release_data( + project_name="flask", + version="1.0.0" + ) + """ + logger.debug("Query for project %s 's data at version %s", project_name, version) + url = f"{PyPIAPIClient._PYPI_API_URL}/{project_name}/{version}/json" + response_data = send_get_http(url, {}) + return response_data From d9203072a5397756c4a489fc9da9463338f5b46c Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Mon, 12 Jun 2023 17:49:58 +1000 Subject: [PATCH 14/29] chore: get project name from poetry config file Signed-off-by: sophie-bates --- src/macaron/slsa_analyzer/build_tool/base_build_tool.py | 1 + src/macaron/slsa_analyzer/build_tool/poetry.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py index 2235b8e5d..64e93d76f 100644 --- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py +++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py @@ -79,6 +79,7 @@ def __init__(self, name: str) -> None: } self.build_log: list[str] = [] self.wrapper_files: list[str] = [] + self.project_name: str = "" def __str__(self) -> str: return self.name diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py index c101a368d..dd7d276b9 100644 --- a/src/macaron/slsa_analyzer/build_tool/poetry.py +++ b/src/macaron/slsa_analyzer/build_tool/poetry.py @@ -64,8 +64,9 @@ def is_detected(self, repo_path: str) -> bool: if files_detected: # If a package_lock file exists, and a config file is present, Poetry build tool is detected. + # TODO: package_lock_exists check removed for now so poetry # tool name is stored. if package_lock_exists: - return True + logger.info("Lock file found.") # return True # TODO: this implementation assumes one build type, so when multiple build types are supported, this # needs to be updated. # Take the highest level file, if there are two at the same level, take the first in the list. @@ -76,7 +77,10 @@ def is_detected(self, repo_path: str) -> bool: try: data = tomllib.load(toml_file) # Check for the existence of a [tool.poetry] section. - if ("tool" in data) and ("poetry" in data["tool"]): + poetry_tool = data.get("tool", {}).get("poetry", {}) + if poetry_tool: + # Store the project name + self.project_name = poetry_tool.get("name") return True except tomllib.TOMLDecodeError: logger.error("Failed to read the %s file: invalid toml file.", conf) From ee392e35cdb192a33e16dfed3ef9e0edd44d2b35 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 13 Jun 2023 02:44:50 +1000 Subject: [PATCH 15/29] chore: extract project name from pip config files Signed-off-by: sophie-bates --- src/macaron/slsa_analyzer/build_tool/pip.py | 56 ++++++++++++++++++++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py index 4abdbd09b..dede70047 100644 --- a/src/macaron/slsa_analyzer/build_tool/pip.py +++ b/src/macaron/slsa_analyzer/build_tool/pip.py @@ -6,7 +6,11 @@ This module is used to work with repositories that use pip for dependency management. """ +import ast +import configparser import logging +import os +import tomllib from macaron.config.defaults import defaults from macaron.dependency_analyzer import DependencyAnalyzer, NoneDependencyAnalyzer @@ -49,7 +53,57 @@ def is_detected(self, repo_path: str) -> bool: """ for file in self.build_configs: if file_exists(repo_path, file): - return True + # Find project name value from the config file. + # TODO: improve this approach. + file_path = os.path.join(repo_path, file) + file_found = "" + if file == "pyproject.toml": + try: + with open(file_path, "rb") as toml_file: + try: + data = tomllib.load(toml_file) + poetry_tool = data.get("tool", {}).get("poetry", {}) + if poetry_tool: + # Store the project name + self.project_name = poetry_tool.get("name") + file_found = file + except tomllib.TOMLDecodeError: + logger.error("Failed to read the %s file: invalid toml file.", file) + except FileNotFoundError: + logger.error("Failed to read the %s file.", file) + + if file == "setup.cfg": + config = configparser.ConfigParser() + try: + config.read(file_path, encoding="utf8") + if "metadata" in config and "name" in config["metadata"]: + self.project_name = config["metadata"]["name"] + file_found = file + except (configparser.Error, ValueError) as error: + logger.error("Failed to read the %s file.", file) + logger.error(error) + + if file == "setup.py": + try: + with open(file_path, "rb") as config_file: + content = config_file.read() + tree = ast.parse(content) + for node in ast.walk(tree): + if ( + isinstance(node, ast.Call) + and isinstance(node.func, ast.Name) + and node.func.id == "setup" + ): + for keyword in node.keywords: + if keyword.arg == "name": + self.project_name = str(keyword.value) + file_found = file + except FileNotFoundError: + logger.info("Failed to read the %s file.", file) + if self.project_name: + return True + if file_found: + return True return False def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool: From 067a2c048d60b0b6aadb13ec0bb0601f6a683ff1 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 13 Jun 2023 02:45:58 +1000 Subject: [PATCH 16/29] chore: setup PyPI registry_service with the PyPI API client Signed-off-by: sophie-bates --- .../slsa_analyzer/registry_service/pypi.py | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/macaron/slsa_analyzer/registry_service/pypi.py diff --git a/src/macaron/slsa_analyzer/registry_service/pypi.py b/src/macaron/slsa_analyzer/registry_service/pypi.py new file mode 100644 index 000000000..8e822d07b --- /dev/null +++ b/src/macaron/slsa_analyzer/registry_service/pypi.py @@ -0,0 +1,25 @@ +# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module contains the spec for the PyPI service.""" + +from macaron.slsa_analyzer.registry_service.api_client import PyPIAPIClient + + +class PyPI: + """This class contains the spec of the PyPI service.""" + + def __init__(self) -> None: + """Initialize instance.""" + self._api_client: PyPIAPIClient = None # type: ignore + + @property + def api_client(self) -> PyPIAPIClient: + """Return the API client used for querying PyPI API. + + This API is used to check if a PyPI repo can be cloned. + """ + if not self._api_client: + self._api_client = PyPIAPIClient() + + return self._api_client From 6a04c4393c9db4454621d691b56d2640604a5127 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 13 Jun 2023 10:27:14 +1000 Subject: [PATCH 17/29] feat: implement sub-check to compare PyPI project timestamp with GHA workflow timestamp Signed-off-by: sophie-bates --- src/macaron/slsa_analyzer/build_tool/pip.py | 4 +- .../checks/build_as_code_subchecks.py | 61 +++++++++++-------- .../checks/problog_predicates.py | 25 +++++++- 3 files changed, 58 insertions(+), 32 deletions(-) diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py index dede70047..a23746dd7 100644 --- a/src/macaron/slsa_analyzer/build_tool/pip.py +++ b/src/macaron/slsa_analyzer/build_tool/pip.py @@ -77,7 +77,7 @@ def is_detected(self, repo_path: str) -> bool: try: config.read(file_path, encoding="utf8") if "metadata" in config and "name" in config["metadata"]: - self.project_name = config["metadata"]["name"] + self.project_name = str(config["metadata"]["name"]) file_found = file except (configparser.Error, ValueError) as error: logger.error("Failed to read the %s file.", file) @@ -96,7 +96,7 @@ def is_detected(self, repo_path: str) -> bool: ): for keyword in node.keywords: if keyword.arg == "name": - self.project_name = str(keyword.value) + self.project_name = ast.literal_eval(keyword.value) file_found = file except FileNotFoundError: logger.info("Failed to read the %s file.", file) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 12ca06cdb..5aff490ac 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -17,6 +17,7 @@ from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI from macaron.slsa_analyzer.ci_service.jenkins import Jenkins from macaron.slsa_analyzer.ci_service.travis import Travis +from macaron.slsa_analyzer.registry_service.api_client import PyPIAPIClient from macaron.slsa_analyzer.specs.ci_spec import CIInfo logger: logging.Logger = logging.getLogger(__name__) @@ -103,9 +104,7 @@ def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None: def ci_parsed(self) -> float: """Check whether parsing is supported for this CI service's CI config files.""" check_certainty = 1.0 - # If this check has already been run on this repo, return certainty. - logger.info("CI PARSED") - + # TODO: If this check has already been run on this repo, return certainty. if self.ci_info["bash_commands"]: justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."] self.check_results["ci_parsed"] = DeploySubcheckResults( @@ -193,9 +192,10 @@ def deploy_kws(self) -> float: return self.failed_check - def test_deploy_action(self, workflow_name: str) -> float: + def test_deploy_action(self, workflow_file: str = "", workflow_name: str = "") -> float: """Check for the use of a test deploy to PyPi given a CI workflow.""" check_certainty = 0.7 + logger.info("File name: %s", workflow_file) for callee in self.ci_info["callgraph"].bfs(): # TODO: figure out a way to generalize this implementation for other external GHAs. # Currently just checks for the pypa/gh-action-pypi-publish action. @@ -288,10 +288,12 @@ def deploy_action(self) -> float: return self.failed_check - def workflow_trigger(self, workflow_name: str = "") -> float: + # TODO: workflow_name isn't used as a file in some places! + + def workflow_trigger(self, workflow_file: str = "") -> float: """Check that the workflow is triggered by a valid event.""" check_certainty = 0.9 - if not workflow_name: + if not workflow_file: return self.failed_check valid_trigger_events = ["workflow-dispatch", "push", "release"] @@ -299,40 +301,45 @@ def workflow_trigger(self, workflow_name: str = "") -> float: # TODO: Consider activity types for release, i.e. prereleased for callee in self.ci_info["callgraph"].bfs(): - if callee.name == workflow_name: + if callee.name == workflow_file: trigger_events = callee.parsed_obj.get("On", {}) for event in trigger_events: hook = event.get("Hook", {}) trigger_type = str(hook.get("Value", "")) if trigger_type in valid_trigger_events: logger.info( - "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name + "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_file ) return check_certainty return self.failed_check - # def workflow_uses_secrets(self, ) -> float: - # return - - # def pypi_publishing_workflow(self, workflow_id): - # depends_on = [self.workflow_trigger_deploy_command() > 0.0 or self.workflow_trigger_deploy_action() > 0.0] - - # # TODO: - # # 1. Figure out how to get the pypi name etc. - - # # 1. Get timestamp of the PyPi package - # # To do this, we need the url of the pypi package - - # # curl returns null if it doesn't exist + def pypi_publishing_workflow(self) -> float: + """Compare PyPI release timestamp with GHA publishing workflow timestamps.""" + check_certainty = 0.5 + project_name = self.build_tool.project_name + pypi_timestamp = "" + # Query PyPI API for the timestamp of the latest release. + if project_name: + api_client = PyPIAPIClient() + response = api_client.get_all_project_data(project_name=project_name) + latest = response.get("urls", [""])[0] + if latest: + pypi_timestamp = latest.get("upload_time") + if not pypi_timestamp: + return self.failed_check - # # 2. Get timestamp of github workflow run - # # Depends on has_latest_run_passed + # TODO: Collect 10 (?) of the most recent successful workflow runs + workflow_data: dict = {} - # # 2. If timestamp of the publishing workflow is close enough, then chance of the workflow - # # being the one to publish the package is high (rather than manual upload). + workflow_created_timestamp = workflow_data.get("created_at", "") + workflow_updated_timestamp = workflow_data.get("updated_at", "") - # # http_request - # return + # Compare timestamp of most recent PyPI release with several GHAs workflow runs. + if workflow_created_timestamp and workflow_updated_timestamp: + # TODO: convert into datetime object to compare + if workflow_created_timestamp <= pypi_timestamp <= workflow_updated_timestamp: + return check_certainty + return self.failed_check def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults: """Return the results for a particular subcheck.""" diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index 95a2ea3e1..88d10c890 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -2,12 +2,16 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Contains ProbLog predicates that return the results stored in the BuildAsCodeSubchecks dataclass.""" +import logging + from problog.extern import problog_export from macaron.slsa_analyzer.checks.build_as_code_subchecks import build_as_code_subcheck_results FAILED_CHECK = 0.0 +logger: logging.Logger = logging.getLogger(__name__) + @problog_export("-int") # type: ignore def ci_parsed_check() -> float: @@ -76,11 +80,10 @@ def workflow_trigger_deploy_command_check() -> float: The certainty of the check. """ depends_on = [deploy_command_check() > 0.0] - print(all(depends_on)) if not all(depends_on): return FAILED_CHECK workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name - return build_as_code_subcheck_results.workflow_trigger(workflow_name=workflow_name) + return build_as_code_subcheck_results.workflow_trigger(workflow_file=workflow_name) @problog_export("-int") # type: ignore @@ -97,7 +100,7 @@ def workflow_trigger_deploy_action_check() -> float: if not all(depends_on): return FAILED_CHECK workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name - return build_as_code_subcheck_results.workflow_trigger(workflow_name=workflow_name) + return build_as_code_subcheck_results.workflow_trigger(workflow_file=workflow_name) @problog_export("-int") # type: ignore @@ -114,3 +117,19 @@ def test_deploy_action_check() -> float: return FAILED_CHECK workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name return build_as_code_subcheck_results.test_deploy_action(workflow_name=workflow_name) + + +@problog_export("-int") # type: ignore +def publishing_workflow_check() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + depends_on = [workflow_trigger_deploy_action_check() > 0.0] + if not all(depends_on): + return FAILED_CHECK + # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"] + return build_as_code_subcheck_results.pypi_publishing_workflow() From ddb2d0c6fb88fe7752f4e605b40a60a1eaf8e2ec Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 13 Jun 2023 10:40:02 +1000 Subject: [PATCH 18/29] chore: fix poetry is_detected logic to pass snapshots Signed-off-by: sophie-bates --- src/macaron/slsa_analyzer/build_tool/poetry.py | 6 ++---- .../slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr | 1 - 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py index dd7d276b9..8177af1fa 100644 --- a/src/macaron/slsa_analyzer/build_tool/poetry.py +++ b/src/macaron/slsa_analyzer/build_tool/poetry.py @@ -84,12 +84,10 @@ def is_detected(self, repo_path: str) -> bool: return True except tomllib.TOMLDecodeError: logger.error("Failed to read the %s file: invalid toml file.", conf) - return False - return False except FileNotFoundError: logger.error("Failed to read the %s file.", conf) - return False - + if package_lock_exists: + return True return False def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool: diff --git a/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr b/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr index 24a7fc494..51b7a8431 100644 --- a/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr +++ b/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr @@ -1,7 +1,6 @@ # serializer version: 1 # name: test_get_build_dirs[mock_repo0] list([ - PosixPath('.'), ]) # --- # name: test_get_build_dirs[mock_repo1] From 7d9a3aa7e57e819c0360d3e506d2fc6682dba335 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 13 Jun 2023 10:40:50 +1000 Subject: [PATCH 19/29] chore: update poetry snapshot Signed-off-by: sophie-bates --- tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr b/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr index 51b7a8431..24a7fc494 100644 --- a/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr +++ b/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr @@ -1,6 +1,7 @@ # serializer version: 1 # name: test_get_build_dirs[mock_repo0] list([ + PosixPath('.'), ]) # --- # name: test_get_build_dirs[mock_repo1] From 08e58bde39fc3ed8663e31c5323b942b22cd9f66 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 20 Jun 2023 09:21:11 +1000 Subject: [PATCH 20/29] chore: remove setup.py file parsing from pip build tool detection Signed-off-by: sophie-bates --- src/macaron/slsa_analyzer/build_tool/pip.py | 51 ++++----------------- 1 file changed, 8 insertions(+), 43 deletions(-) diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py index a23746dd7..0c9865338 100644 --- a/src/macaron/slsa_analyzer/build_tool/pip.py +++ b/src/macaron/slsa_analyzer/build_tool/pip.py @@ -6,8 +6,6 @@ This module is used to work with repositories that use pip for dependency management. """ -import ast -import configparser import logging import os import tomllib @@ -54,56 +52,23 @@ def is_detected(self, repo_path: str) -> bool: for file in self.build_configs: if file_exists(repo_path, file): # Find project name value from the config file. - # TODO: improve this approach. + # TODO: improve this approach, support setup.py file_path = os.path.join(repo_path, file) - file_found = "" if file == "pyproject.toml": try: with open(file_path, "rb") as toml_file: try: data = tomllib.load(toml_file) - poetry_tool = data.get("tool", {}).get("poetry", {}) - if poetry_tool: + project = data.get("project", {}) + if project: # Store the project name - self.project_name = poetry_tool.get("name") - file_found = file + self.project_name = project.get("name", "") + logger.info("Package name: %s", self.project_name) except tomllib.TOMLDecodeError: - logger.error("Failed to read the %s file: invalid toml file.", file) + logger.debug("Failed to read the %s file: invalid toml file.", file) except FileNotFoundError: - logger.error("Failed to read the %s file.", file) - - if file == "setup.cfg": - config = configparser.ConfigParser() - try: - config.read(file_path, encoding="utf8") - if "metadata" in config and "name" in config["metadata"]: - self.project_name = str(config["metadata"]["name"]) - file_found = file - except (configparser.Error, ValueError) as error: - logger.error("Failed to read the %s file.", file) - logger.error(error) - - if file == "setup.py": - try: - with open(file_path, "rb") as config_file: - content = config_file.read() - tree = ast.parse(content) - for node in ast.walk(tree): - if ( - isinstance(node, ast.Call) - and isinstance(node.func, ast.Name) - and node.func.id == "setup" - ): - for keyword in node.keywords: - if keyword.arg == "name": - self.project_name = ast.literal_eval(keyword.value) - file_found = file - except FileNotFoundError: - logger.info("Failed to read the %s file.", file) - if self.project_name: - return True - if file_found: - return True + logger.debug("Failed to read the %s file.", file) + return True return False def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool: From 03621ae182c076d58b936f2a91006f8cd5b7f373 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 20 Jun 2023 09:44:10 +1000 Subject: [PATCH 21/29] chore: add evidence to BuildAsCodeTable and update ProbLog rules Signed-off-by: sophie-bates --- .../checks/build_as_code_check.py | 70 ++++++++++++------- .../checks/build_as_code_subchecks.py | 48 ++++++++++--- .../checks/problog_predicates.py | 70 +++++++++++++++---- 3 files changed, 138 insertions(+), 50 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index 89bed17fb..23a577751 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -35,6 +35,7 @@ class BuildAsCodeTable(CheckFactsTable, ORMBase): deploy_command: Mapped[str] = mapped_column(String, nullable=True) build_status_url: Mapped[str] = mapped_column(String, nullable=True) confidence_score: Mapped[float] = mapped_column(Float, nullable=True) + evidence: Mapped[str] = mapped_column(String, nullable=True) class BuildAsCodeCheck(BaseCheck): @@ -98,31 +99,36 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu # ProbLog rules to be evaluated. prolog_string = PrologString( """ - :- use_module('src/macaron/slsa_analyzer/checks/problog_predicates.py'). - - A :: ci_parsed :- ci_parsed_check(A). - B :: deploy_action :- deploy_action_check(B). - C :: deploy_command :- deploy_command_check(C). - D :: deploy_kws :- deploy_kws_check(D). - - 0.80 :: deploy_action_certainty :- deploy_action. - 0.15 :: deploy_action_certainty :- deploy_action, ci_parsed. - - 0.70 :: deploy_command_certainty :- deploy_command. - 0.15 :: deploy_command_certainty :- deploy_command, ci_parsed. - - 0.60 :: deploy_kws_certainty :- deploy_kws. - - build_as_code_check :- deploy_action_certainty; deploy_command_certainty; deploy_kws_certainty. - - query(deploy_command_certainty). - query(deploy_action_certainty). - query(deploy_kws_certainty). - query(build_as_code_check). - """ + :- use_module('src/macaron/slsa_analyzer/checks/problog_predicates.py'). + + A :: ci_parsed :- ci_parsed_check(A). + B :: deploy_action :- deploy_action_check(B). + C :: deploy_command :- deploy_command_check(C). + D :: deploy_kws :- deploy_kws_check(D). + E :: release_workflow_trigger_deploy_command :- release_workflow_trigger_deploy_command_check(E). + F :: release_workflow_trigger_deploy_action :- release_workflow_trigger_deploy_action_check(F). + G :: tested_deploy_action :- tested_deploy_action_check(G). + H :: publishing_workflow_deploy_command :- publishing_workflow_deploy_command_check(H). + I :: publishing_workflow_deploy_action :- publishing_workflow_deploy_action_check(I). + + 0.6 :: deploy_action_certainty :- deploy_action. + %0.10 :: deploy_action_certainty :- tested_deploy_action. + %0.80 :: deploy_action_certainty :- release_workflow_trigger_deploy_action. + %0.90 :: deploy_action_certainty :- publishing_workflow_deploy_action. + + 0.45 :: deploy_command_certainty :- deploy_command. + %0.80 :: deploy_command_certainty :- release_workflow_trigger_deploy_command. + %0.90 :: deploy_command_certainty :- publishing_workflow_deploy_command. + + 0.60 :: deploy_kws_certainty :- deploy_kws. + + query(deploy_command_certainty). + query(deploy_action_certainty). + query(deploy_kws_certainty). + """ ) - - build_as_code_subchecks.build_as_code_subcheck_results.workflow_trigger("publish.yaml") + # TODO: we want all the logic to be happening inside the rules, + # can we make decisions in here instead of intermediate querying? # Convert the result dictionary from Term:float to str:float term_result: dict[Term, float] = get_evaluatable().create_from(prolog_string).evaluate() @@ -133,12 +139,11 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu "deploy_kws": result["deploy_kws_certainty"], } deploy_methods_valid = {key: value for key, value in deploy_methods.items() if value != 0} - confidence_score = result["build_as_code_check"] - check_result["confidence_score"] = confidence_score if deploy_methods_valid.values(): # Determine the deployment method with the highest certainty score. highest_certainty = max(deploy_methods_valid, key=deploy_methods_valid.__getitem__) + highest_certainty_score = deploy_methods[highest_certainty] deploy_method = build_as_code_subchecks.build_as_code_subcheck_results.get_subcheck_results( highest_certainty ) @@ -161,6 +166,15 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu predicate["builder"]["id"] = deploy_method.config_name predicate["invocation"]["configSource"]["entryPoint"] = deploy_method.config_name + logger.info(build_as_code_subchecks.build_as_code_subcheck_results.check_results.values()) + + all_evidence = build_as_code_subchecks.build_as_code_subcheck_results.evidence + + distinct_evidence = [*set(all_evidence)] + ev_string = ", ".join(distinct_evidence) + logger.info("Evidence vals %s", ev_string) + + confidence_score = round(highest_certainty_score, 4) check_result["result_tables"] = [ BuildAsCodeTable( build_tool_name=build_tool.name, @@ -169,17 +183,19 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu deploy_command=deploy_method.deploy_cmd, build_status_url=deploy_method.html_url, confidence_score=confidence_score, + evidence=ev_string, ) ] + check_result["confidence_score"] = confidence_score # TODO: compile all justifications # check_result["justification"].append() # TODO: Investigate using proofs + logger.info("The certainty of this check passing is: %s", confidence_score) # Check whether the confidence score is greater than the minimum threshold for this check. if confidence_score >= self.confidence_score_threshold: - logger.info("The certainty of this check passing is: %s", confidence_score) return CheckResultType.PASSED pass_msg = f"The target repository does not use {build_tool.name} to deploy." diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 5aff490ac..e22ce4119 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -95,6 +95,7 @@ def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None: self.ci_service = ci_info["service"] # Certainty value to be returned if a subcheck fails. self.failed_check = 0.0 + self.evidence: list[str] = [] # TODO: Make subcheck functions available to other checks. @@ -110,12 +111,13 @@ def ci_parsed(self) -> float: self.check_results["ci_parsed"] = DeploySubcheckResults( certainty=check_certainty, justification=justification ) + self.evidence.append("ci_parsed") return check_certainty return self.failed_check def deploy_command(self) -> float: """Check for the use of deploy command to deploy.""" - check_certainty = 0.7 + check_certainty = 0.8 for bash_cmd in self.ci_info["bash_commands"]: deploy_cmd = has_deploy_command(bash_cmd["commands"], self.build_tool) @@ -151,6 +153,7 @@ def deploy_command(self) -> float: if html_url else "However, could not find a passing workflow run.", ] + self.evidence.append("deploy_command") self.check_results["deploy_command"] = DeploySubcheckResults( certainty=check_certainty, @@ -167,7 +170,7 @@ def deploy_command(self) -> float: def deploy_kws(self) -> float: """Check for the use of deploy keywords to deploy.""" - check_certainty = 0.6 + check_certainty = 0.4 # We currently don't parse these CI configuration files. # We just look for a keyword for now. @@ -181,6 +184,7 @@ def deploy_kws(self) -> float: return self.failed_check justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."] + self.evidence.append("deploy_kws") self.check_results["deploy_kws"] = DeploySubcheckResults( certainty=check_certainty, @@ -192,9 +196,9 @@ def deploy_kws(self) -> float: return self.failed_check - def test_deploy_action(self, workflow_file: str = "", workflow_name: str = "") -> float: + def tested_deploy_action(self, workflow_file: str = "", workflow_name: str = "") -> float: """Check for the use of a test deploy to PyPi given a CI workflow.""" - check_certainty = 0.7 + check_certainty = 0.9 logger.info("File name: %s", workflow_file) for callee in self.ci_info["callgraph"].bfs(): # TODO: figure out a way to generalize this implementation for other external GHAs. @@ -213,12 +217,13 @@ def test_deploy_action(self, workflow_file: str = "", workflow_name: str = "") - repo_url = inputs.get("repository_url", {}).get("Value", {}).get("Value", "") # TODO: Use values that come from defaults.ini rather than hardcoded. if repo_url == "https://test.pypi.org/legacy/": + self.evidence.append("tested_deploy_action") return check_certainty return self.failed_check def deploy_action(self) -> float: """Check for use of a trusted Github Actions workflow to publish/deploy.""" - check_certainty = 0.8 + check_certainty = 0.95 if isinstance(self.build_tool, Pip): trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[]) @@ -232,11 +237,15 @@ def deploy_action(self) -> float: ]: logger.debug("Workflow %s is not relevant. Skipping...", callee.name) continue + + # TODO if workflow_name in trusted_deploy_actions: workflow_info = callee.parsed_obj inputs = workflow_info.get("Inputs", {}) # Deployment is to Pypi if there isn't a repository url + # https://packaging.python.org/en/latest/guides/ + # publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ if inputs.get("repository_url"): logger.debug( "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...", @@ -274,6 +283,8 @@ def deploy_action(self) -> float: else "However, could not find a passing workflow run.", ] + self.evidence.append("deploy_action") + self.check_results["deploy_action"] = DeploySubcheckResults( certainty=check_certainty, justification=justification, @@ -290,7 +301,7 @@ def deploy_action(self) -> float: # TODO: workflow_name isn't used as a file in some places! - def workflow_trigger(self, workflow_file: str = "") -> float: + def release_workflow_trigger(self, workflow_file: str = "") -> float: """Check that the workflow is triggered by a valid event.""" check_certainty = 0.9 if not workflow_file: @@ -299,7 +310,6 @@ def workflow_trigger(self, workflow_file: str = "") -> float: valid_trigger_events = ["workflow-dispatch", "push", "release"] # TODO: Consider activity types for release, i.e. prereleased - for callee in self.ci_info["callgraph"].bfs(): if callee.name == workflow_file: trigger_events = callee.parsed_obj.get("On", {}) @@ -310,12 +320,19 @@ def workflow_trigger(self, workflow_file: str = "") -> float: logger.info( "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_file ) + self.evidence.append("release_workflow_trigger") + justification: list[str | dict[str, str]] = [ + f"Valid trigger event type {trigger_type} used in workflow: {workflow_file}" + ] + self.check_results["release_workflow_trigger"] = DeploySubcheckResults( + justification=justification + ) return check_certainty return self.failed_check - def pypi_publishing_workflow(self) -> float: + def pypi_publishing_workflow_timestamp(self) -> float: """Compare PyPI release timestamp with GHA publishing workflow timestamps.""" - check_certainty = 0.5 + check_certainty = 0.9 project_name = self.build_tool.project_name pypi_timestamp = "" # Query PyPI API for the timestamp of the latest release. @@ -328,8 +345,9 @@ def pypi_publishing_workflow(self) -> float: if not pypi_timestamp: return self.failed_check - # TODO: Collect 10 (?) of the most recent successful workflow runs + # TODO: Collect 5 of the most recent successful workflow runs workflow_data: dict = {} + workflow_name = "" workflow_created_timestamp = workflow_data.get("created_at", "") workflow_updated_timestamp = workflow_data.get("updated_at", "") @@ -338,9 +356,19 @@ def pypi_publishing_workflow(self) -> float: if workflow_created_timestamp and workflow_updated_timestamp: # TODO: convert into datetime object to compare if workflow_created_timestamp <= pypi_timestamp <= workflow_updated_timestamp: + self.evidence.append("publish_timestamp") + justification: list[str | dict[str, str]] = [ + f"The timestamp of workflow {workflow_name} matches with the PyPI package release time." + ] + self.check_results["publish_timestamp"] = DeploySubcheckResults(justification=justification) return check_certainty return self.failed_check + def step_uses_secrets(self) -> float: + """Identify whether a workflow step uses secrets.""" + check_certainty = 0.85 + return check_certainty + def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults: """Return the results for a particular subcheck.""" return self.check_results[subcheck_name] diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index 88d10c890..b952c758e 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -22,7 +22,10 @@ def ci_parsed_check() -> float: Certainty The certainty of the check. """ - return build_as_code_subcheck_results.ci_parsed() + subtask = build_as_code_subcheck_results.ci_parsed() + if subtask > 0: + logger.info("Evidence found: ci_parsed -> %s", subtask) + return subtask @problog_export("-int") # type: ignore @@ -37,7 +40,10 @@ def deploy_action_check() -> float: depends_on = [ci_parsed_check() > 0] if not all(depends_on): return FAILED_CHECK - return build_as_code_subcheck_results.deploy_action() + subtask = build_as_code_subcheck_results.deploy_action() + if subtask > 0: + logger.info("Evidence found: deploy_action -> %s", subtask) + return subtask @problog_export("-int") # type: ignore @@ -52,7 +58,10 @@ def deploy_command_check() -> float: depends_on = [ci_parsed_check() > 0.0] if not all(depends_on): return FAILED_CHECK - return build_as_code_subcheck_results.deploy_command() + subtask = build_as_code_subcheck_results.deploy_command() + if subtask > 0: + logger.info("Evidence found: deploy_command -> %s", subtask) + return subtask @problog_export("-int") # type: ignore @@ -67,11 +76,14 @@ def deploy_kws_check() -> float: depends_on = [ci_parsed_check() == 0.0] if not all(depends_on): return FAILED_CHECK - return build_as_code_subcheck_results.deploy_kws() + subtask = build_as_code_subcheck_results.deploy_kws() + if subtask > 0: + logger.info("Evidence found: deploy_kws -> %s", subtask) + return subtask @problog_export("-int") # type: ignore -def workflow_trigger_deploy_command_check() -> float: +def release_workflow_trigger_deploy_command_check() -> float: """Get the value of the subcheck. Returns @@ -83,11 +95,15 @@ def workflow_trigger_deploy_command_check() -> float: if not all(depends_on): return FAILED_CHECK workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name - return build_as_code_subcheck_results.workflow_trigger(workflow_file=workflow_name) + subtask = build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name) + if subtask > 0: + logger.info("Evidence found: release_workflow_trigger_command -> %s", subtask) + # build_as_code_subcheck_results.check_results["deploy_command"].sub_tasks["release_workflow_trigger"] = subtask + return subtask @problog_export("-int") # type: ignore -def workflow_trigger_deploy_action_check() -> float: +def release_workflow_trigger_deploy_action_check() -> float: """Get the value of the subcheck. Returns @@ -100,11 +116,14 @@ def workflow_trigger_deploy_action_check() -> float: if not all(depends_on): return FAILED_CHECK workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name - return build_as_code_subcheck_results.workflow_trigger(workflow_file=workflow_name) + subtask = build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name) + if subtask > 0: + logger.info("Evidence found: release_workflow_trigger_action -> %s", subtask) + return subtask @problog_export("-int") # type: ignore -def test_deploy_action_check() -> float: +def tested_deploy_action_check() -> float: """Get the value of the subcheck. Returns @@ -116,11 +135,14 @@ def test_deploy_action_check() -> float: if not all(depends_on): return FAILED_CHECK workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name - return build_as_code_subcheck_results.test_deploy_action(workflow_name=workflow_name) + subtask = build_as_code_subcheck_results.tested_deploy_action(workflow_name=workflow_name) + if subtask > 0: + logger.info("Evidence found: test_deploy_action -> %s", subtask) + return subtask @problog_export("-int") # type: ignore -def publishing_workflow_check() -> float: +def publishing_workflow_deploy_action_check() -> float: """Get the value of the subcheck. Returns @@ -128,8 +150,30 @@ def publishing_workflow_check() -> float: Certainty The certainty of the check. """ - depends_on = [workflow_trigger_deploy_action_check() > 0.0] + depends_on = [release_workflow_trigger_deploy_action_check()] if not all(depends_on): return FAILED_CHECK # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"] - return build_as_code_subcheck_results.pypi_publishing_workflow() + subtask = build_as_code_subcheck_results.pypi_publishing_workflow_timestamp() + if subtask > 0: + logger.info("Evidence found: publishing_workflow_check -> %s", subtask) + return subtask + + +@problog_export("-int") # type: ignore +def publishing_workflow_deploy_command_check() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + depends_on = [release_workflow_trigger_deploy_command_check() > 0.0] + if not all(depends_on): + return FAILED_CHECK + # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"] + subtask = build_as_code_subcheck_results.pypi_publishing_workflow_timestamp() + if subtask > 0: + logger.info("Evidence found: publishing_workflow_check -> %s", subtask) + return subtask From ca00db49eadf81b27de9e5d42a277e48aaf2a080 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 20 Jun 2023 10:48:54 +1000 Subject: [PATCH 22/29] chore: fix repository_url check Signed-off-by: sophie-bates --- src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index e22ce4119..18aa6111c 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -246,7 +246,7 @@ def deploy_action(self) -> float: # Deployment is to Pypi if there isn't a repository url # https://packaging.python.org/en/latest/guides/ # publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ - if inputs.get("repository_url"): + if inputs and inputs.get("repository_url"): logger.debug( "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...", callee.name, From c04d887d90f4b5ebe4d4f07092082edeb88bca18 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 20 Jun 2023 10:50:21 +1000 Subject: [PATCH 23/29] chore: fix logging of sub-task results Signed-off-by: sophie-bates --- .../checks/build_as_code_subchecks.py | 17 +++++-- .../checks/problog_predicates.py | 48 +++++-------------- 2 files changed, 25 insertions(+), 40 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 18aa6111c..874b37eed 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -112,6 +112,7 @@ def ci_parsed(self) -> float: certainty=check_certainty, justification=justification ) self.evidence.append("ci_parsed") + logger.info("Evidence found: ci_parsed -> %s", check_certainty) return check_certainty return self.failed_check @@ -154,7 +155,7 @@ def deploy_command(self) -> float: else "However, could not find a passing workflow run.", ] self.evidence.append("deploy_command") - + logger.info("Evidence found: deploy_command -> %s", check_certainty) self.check_results["deploy_command"] = DeploySubcheckResults( certainty=check_certainty, justification=justification, @@ -192,6 +193,7 @@ def deploy_kws(self) -> float: deploy_cmd=deploy_kw, config_name=config_name, ) + logger.info("Evidence found: deploy_kws -> %s", check_certainty) return check_certainty return self.failed_check @@ -218,6 +220,7 @@ def tested_deploy_action(self, workflow_file: str = "", workflow_name: str = "") # TODO: Use values that come from defaults.ini rather than hardcoded. if repo_url == "https://test.pypi.org/legacy/": self.evidence.append("tested_deploy_action") + logger.info("Evidence found: tested_deploy_action -> %s", check_certainty) return check_certainty return self.failed_check @@ -246,7 +249,8 @@ def deploy_action(self) -> float: # Deployment is to Pypi if there isn't a repository url # https://packaging.python.org/en/latest/guides/ # publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ - if inputs and inputs.get("repository_url"): + logger.info("inputs") + if inputs and inputs.get("repository_url", ""): logger.debug( "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...", callee.name, @@ -284,6 +288,7 @@ def deploy_action(self) -> float: ] self.evidence.append("deploy_action") + logger.info("Evidence found: deploy_action -> %s", check_certainty) self.check_results["deploy_action"] = DeploySubcheckResults( certainty=check_certainty, @@ -327,6 +332,8 @@ def release_workflow_trigger(self, workflow_file: str = "") -> float: self.check_results["release_workflow_trigger"] = DeploySubcheckResults( justification=justification ) + logger.info("Evidence found: release_workflow_trigger -> %s", check_certainty) + return check_certainty return self.failed_check @@ -361,12 +368,16 @@ def pypi_publishing_workflow_timestamp(self) -> float: f"The timestamp of workflow {workflow_name} matches with the PyPI package release time." ] self.check_results["publish_timestamp"] = DeploySubcheckResults(justification=justification) + logger.info("Evidence found: publishing_workflow_timestamp -> %s", check_certainty) return check_certainty + return self.failed_check def step_uses_secrets(self) -> float: """Identify whether a workflow step uses secrets.""" - check_certainty = 0.85 + check_certainty = 0 # 0.85 + logger.info("Evidence found: step_secrets -> %s", check_certainty) + return check_certainty def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults: diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index b952c758e..1dfde62ca 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -12,6 +12,8 @@ logger: logging.Logger = logging.getLogger(__name__) +# TODO: check that a result doesn't already exist before running the check. + @problog_export("-int") # type: ignore def ci_parsed_check() -> float: @@ -22,10 +24,7 @@ def ci_parsed_check() -> float: Certainty The certainty of the check. """ - subtask = build_as_code_subcheck_results.ci_parsed() - if subtask > 0: - logger.info("Evidence found: ci_parsed -> %s", subtask) - return subtask + return build_as_code_subcheck_results.ci_parsed() @problog_export("-int") # type: ignore @@ -40,10 +39,7 @@ def deploy_action_check() -> float: depends_on = [ci_parsed_check() > 0] if not all(depends_on): return FAILED_CHECK - subtask = build_as_code_subcheck_results.deploy_action() - if subtask > 0: - logger.info("Evidence found: deploy_action -> %s", subtask) - return subtask + return build_as_code_subcheck_results.deploy_action() @problog_export("-int") # type: ignore @@ -58,10 +54,7 @@ def deploy_command_check() -> float: depends_on = [ci_parsed_check() > 0.0] if not all(depends_on): return FAILED_CHECK - subtask = build_as_code_subcheck_results.deploy_command() - if subtask > 0: - logger.info("Evidence found: deploy_command -> %s", subtask) - return subtask + return build_as_code_subcheck_results.deploy_command() @problog_export("-int") # type: ignore @@ -76,10 +69,7 @@ def deploy_kws_check() -> float: depends_on = [ci_parsed_check() == 0.0] if not all(depends_on): return FAILED_CHECK - subtask = build_as_code_subcheck_results.deploy_kws() - if subtask > 0: - logger.info("Evidence found: deploy_kws -> %s", subtask) - return subtask + return build_as_code_subcheck_results.deploy_kws() @problog_export("-int") # type: ignore @@ -95,11 +85,7 @@ def release_workflow_trigger_deploy_command_check() -> float: if not all(depends_on): return FAILED_CHECK workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name - subtask = build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name) - if subtask > 0: - logger.info("Evidence found: release_workflow_trigger_command -> %s", subtask) - # build_as_code_subcheck_results.check_results["deploy_command"].sub_tasks["release_workflow_trigger"] = subtask - return subtask + return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name) @problog_export("-int") # type: ignore @@ -116,10 +102,7 @@ def release_workflow_trigger_deploy_action_check() -> float: if not all(depends_on): return FAILED_CHECK workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name - subtask = build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name) - if subtask > 0: - logger.info("Evidence found: release_workflow_trigger_action -> %s", subtask) - return subtask + return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name) @problog_export("-int") # type: ignore @@ -135,10 +118,7 @@ def tested_deploy_action_check() -> float: if not all(depends_on): return FAILED_CHECK workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name - subtask = build_as_code_subcheck_results.tested_deploy_action(workflow_name=workflow_name) - if subtask > 0: - logger.info("Evidence found: test_deploy_action -> %s", subtask) - return subtask + return build_as_code_subcheck_results.tested_deploy_action(workflow_name=workflow_name) @problog_export("-int") # type: ignore @@ -154,10 +134,7 @@ def publishing_workflow_deploy_action_check() -> float: if not all(depends_on): return FAILED_CHECK # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"] - subtask = build_as_code_subcheck_results.pypi_publishing_workflow_timestamp() - if subtask > 0: - logger.info("Evidence found: publishing_workflow_check -> %s", subtask) - return subtask + return build_as_code_subcheck_results.pypi_publishing_workflow_timestamp() @problog_export("-int") # type: ignore @@ -173,7 +150,4 @@ def publishing_workflow_deploy_command_check() -> float: if not all(depends_on): return FAILED_CHECK # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"] - subtask = build_as_code_subcheck_results.pypi_publishing_workflow_timestamp() - if subtask > 0: - logger.info("Evidence found: publishing_workflow_check -> %s", subtask) - return subtask + return build_as_code_subcheck_results.pypi_publishing_workflow_timestamp() From 24be921b7ac33ea8dc07ec608ec861c0a5106f46 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Tue, 20 Jun 2023 10:58:10 +1000 Subject: [PATCH 24/29] chore: update ProbLog rules likelihood values Signed-off-by: sophie-bates --- .../slsa_analyzer/checks/build_as_code_check.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index 23a577751..56cb9c4f1 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -111,16 +111,16 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu H :: publishing_workflow_deploy_command :- publishing_workflow_deploy_command_check(H). I :: publishing_workflow_deploy_action :- publishing_workflow_deploy_action_check(I). - 0.6 :: deploy_action_certainty :- deploy_action. + 0.8 :: deploy_action_certainty :- deploy_action. %0.10 :: deploy_action_certainty :- tested_deploy_action. - %0.80 :: deploy_action_certainty :- release_workflow_trigger_deploy_action. - %0.90 :: deploy_action_certainty :- publishing_workflow_deploy_action. + %0.85 :: deploy_action_certainty :- release_workflow_trigger_deploy_action. + %0.95 :: deploy_action_certainty :- publishing_workflow_deploy_action. - 0.45 :: deploy_command_certainty :- deploy_command. - %0.80 :: deploy_command_certainty :- release_workflow_trigger_deploy_command. - %0.90 :: deploy_command_certainty :- publishing_workflow_deploy_command. + 0.75 :: deploy_command_certainty :- deploy_command. + %0.85 :: deploy_command_certainty :- release_workflow_trigger_deploy_command. + %0.95 :: deploy_command_certainty :- publishing_workflow_deploy_command. - 0.60 :: deploy_kws_certainty :- deploy_kws. + 0.70 :: deploy_kws_certainty :- deploy_kws. query(deploy_command_certainty). query(deploy_action_certainty). From 5c641b82fcac86c6d8ecfbc2d090e33af424ac59 Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Wed, 21 Jun 2023 00:10:07 +1000 Subject: [PATCH 25/29] feat: add sub-task to check for secrets used in same workflow step as deployment method Signed-off-by: sophie-bates --- src/macaron/parsers/bashparser.py | 9 +++- .../checks/build_as_code_check.py | 7 ++- .../checks/build_as_code_subchecks.py | 47 ++++++++++++++++--- .../checks/problog_predicates.py | 46 ++++++++++++++++++ .../ci_service/github_actions.py | 1 + 5 files changed, 101 insertions(+), 9 deletions(-) diff --git a/src/macaron/parsers/bashparser.py b/src/macaron/parsers/bashparser.py index f7b03d9f5..7f44d52be 100644 --- a/src/macaron/parsers/bashparser.py +++ b/src/macaron/parsers/bashparser.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module is a Python wrapper for the compiled bashparser binary. @@ -33,6 +33,7 @@ class BashCommands(TypedDict): """CI service type.""" commands: list[list[str]] """Parsed bash commands.""" + workflow_info: dict def parse_file(file_path: str, macaron_path: str = "") -> dict: @@ -115,6 +116,7 @@ def extract_bash_from_ci( bash_content: str, ci_file: str, ci_type: str, + workflow_info: dict, macaron_path: str = "", recursive: bool = False, repo_path: str = "", @@ -152,7 +154,9 @@ def extract_bash_from_ci( parsed_parent = parse(bash_content) caller_commands = parsed_parent.get("commands", []) if caller_commands: - yield BashCommands(caller_path=ci_file, CI_path=ci_file, CI_type=ci_type, commands=caller_commands) + yield BashCommands( + caller_path=ci_file, CI_path=ci_file, CI_type=ci_type, commands=caller_commands, workflow_info=workflow_info + ) # Parse the bash script files called from the current script. if recursive and repo_path: @@ -171,4 +175,5 @@ def extract_bash_from_ci( CI_path=ci_file, CI_type=ci_type, commands=callee_commands, + workflow_info=workflow_info, ) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index 56cb9c4f1..4a081b753 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -55,7 +55,7 @@ def __init__(self) -> None: ("mcn_trusted_builder_level_three_1", CheckResultType.FAILED), ] eval_reqs = [ReqName.BUILD_AS_CODE] - self.confidence_score_threshold = 0.3 + self.confidence_score_threshold = 0.7 super().__init__( check_id="mcn_build_as_code_1", @@ -110,15 +110,19 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu G :: tested_deploy_action :- tested_deploy_action_check(G). H :: publishing_workflow_deploy_command :- publishing_workflow_deploy_command_check(H). I :: publishing_workflow_deploy_action :- publishing_workflow_deploy_action_check(I). + J :: step_uses_secrets_deploy_action :- step_uses_secrets_deploy_action_check(J). + K :: step_uses_secrets_deploy_command :- step_uses_secrets_deploy_command_check(K). 0.8 :: deploy_action_certainty :- deploy_action. %0.10 :: deploy_action_certainty :- tested_deploy_action. %0.85 :: deploy_action_certainty :- release_workflow_trigger_deploy_action. %0.95 :: deploy_action_certainty :- publishing_workflow_deploy_action. + 0.65 :: deploy_action_certainty :- step_uses_secrets_deploy_action. 0.75 :: deploy_command_certainty :- deploy_command. %0.85 :: deploy_command_certainty :- release_workflow_trigger_deploy_command. %0.95 :: deploy_command_certainty :- publishing_workflow_deploy_command. + 0.65 :: deploy_command_certainty :- step_uses_secrets_deploy_command. 0.70 :: deploy_kws_certainty :- deploy_kws. @@ -138,6 +142,7 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu "deploy_action": result["deploy_action_certainty"], "deploy_kws": result["deploy_kws_certainty"], } + deploy_methods_valid = {key: value for key, value in deploy_methods.items() if value != 0} if deploy_methods_valid.values(): diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 874b37eed..1f6597695 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -5,6 +5,7 @@ import logging import os +import re from attr import dataclass @@ -80,6 +81,7 @@ class DeploySubcheckResults: html_url: str = "" config_name: str = "" workflow_name: str = "" + workflow_info: dict = {} class BuildAsCodeSubchecks: @@ -143,6 +145,7 @@ def deploy_command(self) -> float: ) workflow_name = os.path.basename(html_url) + workflow_info = bash_cmd["workflow_info"] justification: list[str | dict[str, str]] = [ { @@ -164,6 +167,7 @@ def deploy_command(self) -> float: source_link=bash_source_link, html_url=html_url, workflow_name=workflow_name, + workflow_info=workflow_info, ) return check_certainty @@ -249,8 +253,7 @@ def deploy_action(self) -> float: # Deployment is to Pypi if there isn't a repository url # https://packaging.python.org/en/latest/guides/ # publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ - logger.info("inputs") - if inputs and inputs.get("repository_url", ""): + if inputs and inputs.get("repository_url"): logger.debug( "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...", callee.name, @@ -298,6 +301,7 @@ def deploy_action(self) -> float: source_link=deploy_action_source_link, html_url=html_url, workflow_name=workflow_name, + workflow_info=workflow_info, ) return check_certainty @@ -373,12 +377,43 @@ def pypi_publishing_workflow_timestamp(self) -> float: return self.failed_check - def step_uses_secrets(self) -> float: + def step_uses_secrets(self, step_info: dict) -> float: """Identify whether a workflow step uses secrets.""" - check_certainty = 0 # 0.85 - logger.info("Evidence found: step_secrets -> %s", check_certainty) + check_certainty = 0.9 + + logger.info("STEP") + logger.info(step_info) + + # inputs = step_info.get("Inputs", {}) + logger.info("inputs: %s", step_info) + if self._step_uses_secrets(step_info): + self.evidence.append("deploy_step_uses_secrets") + logger.info("Evidence found: step_secrets -> %s", check_certainty) + justification: list[str | dict[str, str]] = [ + "The workflow step that contains the deployment method uses secrets." + ] + self.check_results["step_secrets"] = DeploySubcheckResults(justification=justification) + return check_certainty + return self.failed_check - return check_certainty + def _step_uses_secrets(self, inputs: dict) -> bool: + """Recurse through GitHub Actions syntax tree to find the use of secrets.""" + for value in inputs.values(): + if isinstance(value, str): + # Match the pattern '${{ content }}' + pattern = re.compile(r"\$\{\{([^}]*)\}\}", re.IGNORECASE) + match = pattern.match(value) + if match is not None: + content = match.group(1).strip() + contents = content.split(".") + # Note that we only support the case: ${{ secrets.TOKEN }} for now. + # Exclude 'secrets.GITHUB_TOKEN'.. + if len(contents) == 2 and (contents[0] == "secrets") and (contents[1] != "GITHUB_TOKEN"): + return True + elif isinstance(value, dict): + if self._step_uses_secrets(value): + return True + return False def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults: """Return the results for a particular subcheck.""" diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index 1dfde62ca..7140ada1e 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -37,8 +37,12 @@ def deploy_action_check() -> float: The certainty of the check. """ depends_on = [ci_parsed_check() > 0] + # Verify dependencies and that this check hasn't already been run. if not all(depends_on): return FAILED_CHECK + check = build_as_code_subcheck_results.check_results.get("deploy_action") + if check: + return check.certainty return build_as_code_subcheck_results.deploy_action() @@ -52,8 +56,12 @@ def deploy_command_check() -> float: The certainty of the check. """ depends_on = [ci_parsed_check() > 0.0] + # Verify dependencies and that this check hasn't already been run. + check = build_as_code_subcheck_results.check_results.get("deploy_command") if not all(depends_on): return FAILED_CHECK + if check: + return check.certainty return build_as_code_subcheck_results.deploy_command() @@ -151,3 +159,41 @@ def publishing_workflow_deploy_command_check() -> float: return FAILED_CHECK # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"] return build_as_code_subcheck_results.pypi_publishing_workflow_timestamp() + + +@problog_export("-int") # type: ignore +def step_uses_secrets_deploy_command_check() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + # TODO: currently we don't store the GHA object during deploy_command_check so + # can't perform this sub-task (no workflow_info available). + depends_on = [deploy_command_check() > 0.0] + if not all(depends_on): + return FAILED_CHECK + step_info = build_as_code_subcheck_results.check_results["deploy_command"].workflow_info + if step_info: + return build_as_code_subcheck_results.step_uses_secrets(step_info=step_info) + return FAILED_CHECK + + +@problog_export("-int") # type: ignore +def step_uses_secrets_deploy_action_check() -> float: + """Get the value of the subcheck. + + Returns + ------- + Certainty + The certainty of the check. + """ + depends_on = [deploy_action_check() > 0.0] + if not all(depends_on): + return FAILED_CHECK + step_info = build_as_code_subcheck_results.check_results["deploy_action"].workflow_info + if step_info: + return build_as_code_subcheck_results.step_uses_secrets(step_info=step_info) + return FAILED_CHECK diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions.py b/src/macaron/slsa_analyzer/ci_service/github_actions.py index 7d44f5816..836b4d0f8 100644 --- a/src/macaron/slsa_analyzer/ci_service/github_actions.py +++ b/src/macaron/slsa_analyzer/ci_service/github_actions.py @@ -278,6 +278,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter step["Exec"]["Run"]["Value"], ci_file=self.api_client.get_relative_path_of_workflow(callee.name), ci_type="github_actions", + workflow_info=step, recursive=True, repo_path=callgraph.repo_path, working_dir=step["Exec"]["WorkingDirectory"] or "", From 2ebdea2b19efd521d869a8214da8b479e7b7e7ca Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Wed, 21 Jun 2023 10:19:24 +1000 Subject: [PATCH 26/29] chore: store workflow_file in deploy_action and deploy_command checks for trigger event type sub-task Signed-off-by: sophie-bates --- .../checks/build_as_code_check.py | 6 +++--- .../checks/build_as_code_subchecks.py | 20 ++++++++++++------- .../checks/problog_predicates.py | 12 ++++++----- 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py index 4a081b753..40d5b7401 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py @@ -114,13 +114,13 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu K :: step_uses_secrets_deploy_command :- step_uses_secrets_deploy_command_check(K). 0.8 :: deploy_action_certainty :- deploy_action. - %0.10 :: deploy_action_certainty :- tested_deploy_action. - %0.85 :: deploy_action_certainty :- release_workflow_trigger_deploy_action. + 0.10 :: deploy_action_certainty :- tested_deploy_action. + 0.85 :: deploy_action_certainty :- release_workflow_trigger_deploy_action. %0.95 :: deploy_action_certainty :- publishing_workflow_deploy_action. 0.65 :: deploy_action_certainty :- step_uses_secrets_deploy_action. 0.75 :: deploy_command_certainty :- deploy_command. - %0.85 :: deploy_command_certainty :- release_workflow_trigger_deploy_command. + 0.85 :: deploy_command_certainty :- release_workflow_trigger_deploy_command. %0.95 :: deploy_command_certainty :- publishing_workflow_deploy_command. 0.65 :: deploy_command_certainty :- step_uses_secrets_deploy_command. diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 1f6597695..7d0c982a6 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -81,6 +81,7 @@ class DeploySubcheckResults: html_url: str = "" config_name: str = "" workflow_name: str = "" + workflow_file: str = "" workflow_info: dict = {} @@ -144,7 +145,7 @@ def deploy_command(self) -> float: os.path.basename(bash_cmd["CI_path"]), ) - workflow_name = os.path.basename(html_url) + workflow_file = os.path.basename(trigger_link) workflow_info = bash_cmd["workflow_info"] justification: list[str | dict[str, str]] = [ @@ -166,7 +167,7 @@ def deploy_command(self) -> float: trigger_link=trigger_link, source_link=bash_source_link, html_url=html_url, - workflow_name=workflow_name, + workflow_file=workflow_file, workflow_info=workflow_info, ) @@ -220,7 +221,9 @@ def tested_deploy_action(self, workflow_file: str = "", workflow_name: str = "") if callee_name == workflow_name == "pypa/gh-action-pypi-publish": workflow_info = callee.parsed_obj inputs = workflow_info.get("Inputs", {}) - repo_url = inputs.get("repository_url", {}).get("Value", {}).get("Value", "") + repo_url = "" + if inputs: + repo_url = inputs.get("repository_url", {}).get("Value", {}).get("Value", "") # TODO: Use values that come from defaults.ini rather than hardcoded. if repo_url == "https://test.pypi.org/legacy/": self.evidence.append("tested_deploy_action") @@ -245,7 +248,6 @@ def deploy_action(self) -> float: logger.debug("Workflow %s is not relevant. Skipping...", callee.name) continue - # TODO if workflow_name in trusted_deploy_actions: workflow_info = callee.parsed_obj inputs = workflow_info.get("Inputs", {}) @@ -278,6 +280,8 @@ def deploy_action(self) -> float: os.path.basename(callee.caller_path), ) + workflow_file = os.path.basename(trigger_link) + # TODO: include in the justification multiple cases of external action usage justification: list[str | dict[str, str]] = [ { @@ -301,6 +305,7 @@ def deploy_action(self) -> float: source_link=deploy_action_source_link, html_url=html_url, workflow_name=workflow_name, + workflow_file=workflow_file, workflow_info=workflow_info, ) @@ -313,6 +318,7 @@ def deploy_action(self) -> float: def release_workflow_trigger(self, workflow_file: str = "") -> float: """Check that the workflow is triggered by a valid event.""" check_certainty = 0.9 + if not workflow_file: return self.failed_check @@ -320,11 +326,14 @@ def release_workflow_trigger(self, workflow_file: str = "") -> float: # TODO: Consider activity types for release, i.e. prereleased for callee in self.ci_info["callgraph"].bfs(): + # Find the workflow file that the deployment method was used in and + # extract the trigger event types. if callee.name == workflow_file: trigger_events = callee.parsed_obj.get("On", {}) for event in trigger_events: hook = event.get("Hook", {}) trigger_type = str(hook.get("Value", "")) + # Check that the identified event trigger type is a valid release event. if trigger_type in valid_trigger_events: logger.info( "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_file @@ -381,9 +390,6 @@ def step_uses_secrets(self, step_info: dict) -> float: """Identify whether a workflow step uses secrets.""" check_certainty = 0.9 - logger.info("STEP") - logger.info(step_info) - # inputs = step_info.get("Inputs", {}) logger.info("inputs: %s", step_info) if self._step_uses_secrets(step_info): diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py index 7140ada1e..0cbbdff51 100644 --- a/src/macaron/slsa_analyzer/checks/problog_predicates.py +++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py @@ -24,6 +24,9 @@ def ci_parsed_check() -> float: Certainty The certainty of the check. """ + check = build_as_code_subcheck_results.check_results.get("ci_parsed") + if check: + return check.certainty return build_as_code_subcheck_results.ci_parsed() @@ -92,8 +95,8 @@ def release_workflow_trigger_deploy_command_check() -> float: depends_on = [deploy_command_check() > 0.0] if not all(depends_on): return FAILED_CHECK - workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name - return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name) + workflow_file = build_as_code_subcheck_results.check_results["deploy_command"].workflow_file + return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_file) @problog_export("-int") # type: ignore @@ -106,11 +109,10 @@ def release_workflow_trigger_deploy_action_check() -> float: The certainty of the check. """ depends_on = [deploy_action_check() > 0.0] - print(all(depends_on)) if not all(depends_on): return FAILED_CHECK - workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name - return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name) + workflow_file = build_as_code_subcheck_results.check_results["deploy_action"].workflow_file + return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_file) @problog_export("-int") # type: ignore From a84c2e4b8e61138fbe2c38dbae276316d1cb947c Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Wed, 21 Jun 2023 17:50:47 +1000 Subject: [PATCH 27/29] chore: add tox -e release as supported deploy tool Signed-off-by: sophie-bates --- src/macaron/config/defaults.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini index 355dfea56..80c818be8 100644 --- a/src/macaron/config/defaults.ini +++ b/src/macaron/config/defaults.ini @@ -237,6 +237,7 @@ publisher = twine flit conda + tox # These are the Python interpreters that may be used to load modules. interpreter = python @@ -250,6 +251,7 @@ build_arg = deploy_arg = publish upload + release [builder.pip.ci.deploy] github_actions = pypa/gh-action-pypi-publish From 48ca217864aef58dc3b2463a9916d55de843aa2a Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Wed, 21 Jun 2023 17:53:02 +1000 Subject: [PATCH 28/29] chore: include Poetry projects for deploy_action check Signed-off-by: sophie-bates --- .../slsa_analyzer/checks/build_as_code_subchecks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 7d0c982a6..925432a09 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -13,6 +13,7 @@ from macaron.slsa_analyzer.analyze_context import AnalyzeContext from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool from macaron.slsa_analyzer.build_tool.pip import Pip +from macaron.slsa_analyzer.build_tool.poetry import Poetry from macaron.slsa_analyzer.ci_service.circleci import CircleCI from macaron.slsa_analyzer.ci_service.github_actions import GHWorkflowType from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI @@ -235,7 +236,7 @@ def deploy_action(self) -> float: """Check for use of a trusted Github Actions workflow to publish/deploy.""" check_certainty = 0.95 - if isinstance(self.build_tool, Pip): + if isinstance(self.build_tool, (Pip, Poetry)): trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[]) for callee in self.ci_info["callgraph"].bfs(): @@ -336,11 +337,11 @@ def release_workflow_trigger(self, workflow_file: str = "") -> float: # Check that the identified event trigger type is a valid release event. if trigger_type in valid_trigger_events: logger.info( - "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_file + "Valid trigger event '%s' found for the workflow file: %s.", trigger_type, workflow_file ) self.evidence.append("release_workflow_trigger") justification: list[str | dict[str, str]] = [ - f"Valid trigger event type {trigger_type} used in workflow: {workflow_file}" + f"Valid trigger event type '{trigger_type}' used in workflow file: {workflow_file}" ] self.check_results["release_workflow_trigger"] = DeploySubcheckResults( justification=justification From 44b36c87d59b1a83cddc4560d0eec7b28762c1bc Mon Sep 17 00:00:00 2001 From: sophie-bates Date: Fri, 30 Jun 2023 17:16:13 +1000 Subject: [PATCH 29/29] chore: update release workflow trigger sub-task to penalize certainty for particular event types Signed-off-by: sophie-bates --- .../checks/build_as_code_subchecks.py | 38 ++++++++++++------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py index 925432a09..f257e8cc2 100644 --- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py +++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py @@ -319,11 +319,15 @@ def deploy_action(self) -> float: def release_workflow_trigger(self, workflow_file: str = "") -> float: """Check that the workflow is triggered by a valid event.""" check_certainty = 0.9 + check_certainty_lowered = 0.75 if not workflow_file: return self.failed_check - valid_trigger_events = ["workflow-dispatch", "push", "release"] + valid_trigger_events = ["workflow_dispatch", "push", "release", "create"] + invalid_trigger_events = ["pull_request"] + valid_trigger = [""] + invalid_trigger = "" # TODO: Consider activity types for release, i.e. prereleased for callee in self.ci_info["callgraph"].bfs(): @@ -336,19 +340,25 @@ def release_workflow_trigger(self, workflow_file: str = "") -> float: trigger_type = str(hook.get("Value", "")) # Check that the identified event trigger type is a valid release event. if trigger_type in valid_trigger_events: - logger.info( - "Valid trigger event '%s' found for the workflow file: %s.", trigger_type, workflow_file - ) - self.evidence.append("release_workflow_trigger") - justification: list[str | dict[str, str]] = [ - f"Valid trigger event type '{trigger_type}' used in workflow file: {workflow_file}" - ] - self.check_results["release_workflow_trigger"] = DeploySubcheckResults( - justification=justification - ) - logger.info("Evidence found: release_workflow_trigger -> %s", check_certainty) + valid_trigger.append(trigger_type) + if trigger_type in invalid_trigger_events: + invalid_trigger = trigger_type - return check_certainty + if valid_trigger: + logger.info( + "Valid trigger event '%s' found for the workflow file: %s.", valid_trigger[0], workflow_file + ) + self.evidence.append("release_workflow_trigger") + justification: list[str | dict[str, str]] = [ + f"Valid trigger event type '{valid_trigger[0]}' used in workflow file: {workflow_file}" + ] + self.check_results["release_workflow_trigger"] = DeploySubcheckResults(justification=justification) + if invalid_trigger: + logger.info("Evidence found: release_workflow_trigger -> %s", check_certainty_lowered) + return check_certainty_lowered + + logger.info("Evidence found: release_workflow_trigger -> %s", check_certainty) + return check_certainty return self.failed_check def pypi_publishing_workflow_timestamp(self) -> float: @@ -409,7 +419,7 @@ def _step_uses_secrets(self, inputs: dict) -> bool: if isinstance(value, str): # Match the pattern '${{ content }}' pattern = re.compile(r"\$\{\{([^}]*)\}\}", re.IGNORECASE) - match = pattern.match(value) + match = pattern.search(value) if match is not None: content = match.group(1).strip() contents = content.split(".")