From 9415acc1845f87c50af48ac5b1e0343d9cdf9376 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 30 May 2023 09:44:57 +1000
Subject: [PATCH 01/29] chore(deps): add problog dependency to pyproject.toml

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index 1d697f32e..14735b4bc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,7 @@ dependencies = [
     "jinja2 >=3.1.2,<4.0.0",
     "SQLAlchemy >=2.0.0,<3.0.0",
     "defusedxml >=0.7.1,<1.0.0",
+    "problog >=2.2.4,<3.0.0"
 ]
 keywords = []
 # https://pypi.org/classifiers/

From 8cc866d1733745f05aed0a4facfd70bb8a559ec7 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Wed, 31 May 2023 16:56:33 +1000
Subject: [PATCH 02/29] feat: split build_as_code_check into subchecks and
 aggregate certainty values using ProbLog.

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 pyproject.toml                                |   1 +
 src/macaron/slsa_analyzer/checks/bac_.py      |  18 +
 .../slsa_analyzer/checks/base_check.py        |   1 +
 .../checks/build_as_code_check.py             | 516 +++++++++++-------
 .../slsa_analyzer/checks/check_result.py      |   1 +
 .../checks/problog_predicates.py              |  55 ++
 src/macaron/slsa_analyzer/registry.py         |   1 +
 7 files changed, 396 insertions(+), 197 deletions(-)
 create mode 100644 src/macaron/slsa_analyzer/checks/bac_.py
 create mode 100644 src/macaron/slsa_analyzer/checks/problog_predicates.py

diff --git a/pyproject.toml b/pyproject.toml
index 14735b4bc..529587b4e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -188,6 +188,7 @@ module = [
     "gitdb.*",
     "yamale.*",
     "defusedxml.*",
+    "problog.*"
 ]
 ignore_missing_imports = true
 
diff --git a/src/macaron/slsa_analyzer/checks/bac_.py b/src/macaron/slsa_analyzer/checks/bac_.py
new file mode 100644
index 000000000..6a993dcf0
--- /dev/null
+++ b/src/macaron/slsa_analyzer/checks/bac_.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module contains a class to store results from the BuildAsCodeCheck subchecks."""
+from attr import dataclass
+
+
+@dataclass
+class BuildAsCodeSubchecks:
+    """Dataclass for storing the results from the BuildAsCodeCheck subchecks."""
+
+    ci_parsed: float
+    deploy_action: float
+    deploy_command: float
+    deploy_kws: float
+
+
+build_as_code_subchecks: BuildAsCodeSubchecks = None  # type: ignore # pylint: disable=invalid-name
diff --git a/src/macaron/slsa_analyzer/checks/base_check.py b/src/macaron/slsa_analyzer/checks/base_check.py
index 432a61158..8b12d2005 100644
--- a/src/macaron/slsa_analyzer/checks/base_check.py
+++ b/src/macaron/slsa_analyzer/checks/base_check.py
@@ -86,6 +86,7 @@ def run(self, target: AnalyzeContext, skipped_info: Optional[SkippedInfo] = None
             justification=[],
             result_type=CheckResultType.SKIPPED,
             result_tables=[],
+            confidence_score=0,
         )
 
         if skipped_info:
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index e25768049..7a00c27ee 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -6,17 +6,21 @@
 import logging
 import os
 
+from problog import get_evaluatable
+from problog.program import PrologString
 from sqlalchemy.orm import Mapped, mapped_column
-from sqlalchemy.sql.sqltypes import String
+from sqlalchemy.sql.sqltypes import Float, String
 
 from macaron.config.defaults import defaults
 from macaron.database.database_manager import ORMBase
 from macaron.database.table_definitions import CheckFactsTable
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext
 from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, NoneBuildTool
+from macaron.slsa_analyzer.build_tool.pip import Pip
+from macaron.slsa_analyzer.checks import bac_
 from macaron.slsa_analyzer.checks.base_check import BaseCheck
 from macaron.slsa_analyzer.checks.check_result import CheckResult, CheckResultType
-from macaron.slsa_analyzer.ci_service.base_ci_service import NoneCIService
+from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService, NoneCIService
 from macaron.slsa_analyzer.ci_service.circleci import CircleCI
 from macaron.slsa_analyzer.ci_service.github_actions import GHWorkflowType
 from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI
@@ -24,6 +28,7 @@
 from macaron.slsa_analyzer.ci_service.travis import Travis
 from macaron.slsa_analyzer.registry import registry
 from macaron.slsa_analyzer.slsa_req import ReqName
+from macaron.slsa_analyzer.specs.ci_spec import CIInfo
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -37,6 +42,197 @@ class BuildAsCodeTable(CheckFactsTable, ORMBase):
     build_trigger: Mapped[str] = mapped_column(String, nullable=True)
     deploy_command: Mapped[str] = mapped_column(String, nullable=True)
     build_status_url: Mapped[str] = mapped_column(String, nullable=True)
+    confidence_score: Mapped[float] = mapped_column(Float, nullable=True)
+
+
+def has_deploy_command(commands: list[list[str]], build_tool: BaseBuildTool) -> str:
+    """Check if the bash command is a build and deploy command."""
+    # Account for Python projects having separate tools for packaging and publishing.
+    deploy_tool = build_tool.publisher if build_tool.publisher else build_tool.builder
+    for com in commands:
+
+        # Check for empty or invalid commands.
+        if not com or not com[0]:
+            continue
+        # The first argument in a bash command is the program name.
+        # So first check that the program name is a supported build tool name.
+        # We need to handle cases where the first argument is a path to the program.
+        cmd_program_name = os.path.basename(com[0])
+        if not cmd_program_name:
+            logger.debug("Found invalid program name %s.", com[0])
+            continue
+
+        check_build_commands = any(build_cmd for build_cmd in deploy_tool if build_cmd == cmd_program_name)
+
+        # Support the use of interpreters like Python that load modules, i.e., 'python -m pip install'.
+        check_module_build_commands = any(
+            interpreter == cmd_program_name
+            and com[1]
+            and com[1] in build_tool.interpreter_flag
+            and com[2]
+            and com[2] in deploy_tool
+            for interpreter in build_tool.interpreter
+        )
+        prog_name_index = 2 if check_module_build_commands else 0
+
+        if check_build_commands or check_module_build_commands:
+            # Check the arguments in the bash command for the deploy goals.
+            # If there are no deploy args for this build tool, accept as deploy command.
+            if not build_tool.deploy_arg:
+                logger.info("No deploy arguments required. Accept %s as deploy command.", str(com))
+                return str(com)
+
+            for word in com[(prog_name_index + 1) :]:
+                # TODO: allow plugin versions in arguments, e.g., maven-plugin:1.6.8:deploy.
+                if word in build_tool.deploy_arg:
+                    logger.info("Found deploy command %s.", str(com))
+                    return str(com)
+    return ""
+
+
+def ci_parsed_subcheck(ci_info: CIInfo) -> dict:
+    """Check whether parsing is supported for this CI service's CI config files."""
+    check_certainty = 1
+
+    justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."]
+
+    if ci_info["bash_commands"]:
+        return {"certainty": check_certainty, "justification": justification}
+    return {"certainty": 0, "justification": [{"The CI workflow files for this CI service aren't parsed."}]}
+
+
+def deploy_action_subcheck(
+    ctx: AnalyzeContext, ci_info: CIInfo, ci_service: BaseCIService, build_tool: BaseBuildTool
+) -> dict:
+    """Check for use of a trusted Github Actions workflow to publish/deploy."""
+    # TODO: verify that deployment is legitimate and not a test
+    check_certainty = 0.8
+
+    if isinstance(build_tool, Pip):
+        trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[])
+
+        for callee in ci_info["callgraph"].bfs():
+            workflow_name = callee.name.split("@")[0]
+
+            if not workflow_name or callee.node_type not in [
+                GHWorkflowType.EXTERNAL,
+                GHWorkflowType.REUSABLE,
+            ]:
+                logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
+                continue
+            if workflow_name in trusted_deploy_actions:
+                trigger_link = ci_service.api_client.get_file_link(
+                    ctx.repo_full_name,
+                    ctx.commit_sha,
+                    ci_service.api_client.get_relative_path_of_workflow(os.path.basename(callee.caller_path)),
+                )
+                deploy_action_source_link = ci_service.api_client.get_file_link(
+                    ctx.repo_full_name, ctx.commit_sha, callee.caller_path
+                )
+
+                html_url = ci_service.has_latest_run_passed(
+                    ctx.repo_full_name,
+                    ctx.branch_name,
+                    ctx.commit_sha,
+                    ctx.commit_date,
+                    os.path.basename(callee.caller_path),
+                )
+
+                # TODO: include in the justification multiple cases of external action usage
+                justification: list[str | dict[str, str]] = [
+                    {
+                        "To deploy": deploy_action_source_link,
+                        "The build is triggered by": trigger_link,
+                    },
+                    f"Deploy action: {workflow_name}",
+                    {"The status of the build can be seen at": html_url}
+                    if html_url
+                    else "However, could not find a passing workflow run.",
+                ]
+
+                return {
+                    "certainty": check_certainty,
+                    "justification": justification,
+                    "deploy_command": workflow_name,
+                    "trigger_link": trigger_link,
+                    "deploy_action_source_link": deploy_action_source_link,
+                    "html_url": html_url,
+                }
+
+    return {"certainty": 0, "justification": []}
+
+
+def deploy_command_subcheck(
+    ctx: AnalyzeContext, ci_info: CIInfo, ci_service: BaseCIService, build_tool: BaseBuildTool
+) -> dict:
+    """Check for the use of deploy command to deploy."""
+    check_certainty = 0.7
+    for bash_cmd in ci_info["bash_commands"]:
+        deploy_cmd = has_deploy_command(bash_cmd["commands"], build_tool)
+        if deploy_cmd:
+            # Get the permalink and HTML hyperlink tag of the CI file that triggered the bash command.
+            trigger_link = ci_service.api_client.get_file_link(
+                ctx.repo_full_name,
+                ctx.commit_sha,
+                ci_service.api_client.get_relative_path_of_workflow(os.path.basename(bash_cmd["CI_path"])),
+            )
+            # Get the permalink of the source file of the bash command.
+            bash_source_link = ci_service.api_client.get_file_link(
+                ctx.repo_full_name, ctx.commit_sha, bash_cmd["caller_path"]
+            )
+
+            html_url = ci_service.has_latest_run_passed(
+                ctx.repo_full_name,
+                ctx.branch_name,
+                ctx.commit_sha,
+                ctx.commit_date,
+                os.path.basename(bash_cmd["CI_path"]),
+            )
+
+            justification: list[str | dict[str, str]] = [
+                {
+                    f"The target repository uses build tool {build_tool.name} to deploy": bash_source_link,
+                    "The build is triggered by": trigger_link,
+                },
+                f"Deploy command: {deploy_cmd}",
+                {"The status of the build can be seen at": html_url}
+                if html_url
+                else "However, could not find a passing workflow run.",
+            ]
+            return {
+                "certainty": check_certainty,
+                "justification": justification,
+                "deploy_cmd": deploy_cmd,
+                "trigger_link": trigger_link,
+                "bash_source_link": bash_source_link,
+                "html_url": html_url,
+            }
+    return {"certainty": 0, "justification": ""}
+
+
+def deploy_kws_subcheck(ctx: AnalyzeContext, ci_service: BaseCIService, build_tool: BaseBuildTool) -> dict:
+    """Check for the use of deploy keywords to deploy."""
+    check_certainty = 0.6
+    # We currently don't parse these CI configuration files.
+    # We just look for a keyword for now.
+    for unparsed_ci in (Jenkins, Travis, CircleCI, GitLabCI):
+        if isinstance(ci_service, unparsed_ci):
+            if build_tool.ci_deploy_kws[ci_service.name]:
+                deploy_kw, config_name = ci_service.has_kws_in_config(
+                    build_tool.ci_deploy_kws[ci_service.name], repo_path=ctx.repo_path
+                )
+                if not config_name:
+                    return {"certainty": 0, "justification": ""}
+
+                justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."]
+
+                return {
+                    "certainty": check_certainty,
+                    "justification": justification,
+                    "deploy_kw": deploy_kw,
+                    "config_name": config_name,
+                }
+    return {"certainty": 0, "justification": []}
 
 
 class BuildAsCodeCheck(BaseCheck):
@@ -56,6 +252,8 @@ def __init__(self) -> None:
             ("mcn_trusted_builder_level_three_1", CheckResultType.FAILED),
         ]
         eval_reqs = [ReqName.BUILD_AS_CODE]
+        self.confidence_score_threshold = 0.3
+
         super().__init__(
             check_id="mcn_build_as_code_1",
             description=description,
@@ -64,50 +262,6 @@ def __init__(self) -> None:
             result_on_skip=CheckResultType.PASSED,
         )
 
-    def _has_deploy_command(self, commands: list[list[str]], build_tool: BaseBuildTool) -> str:
-        """Check if the bash command is a build and deploy command."""
-        # Account for Python projects having separate tools for packaging and publishing.
-        deploy_tool = build_tool.publisher if build_tool.publisher else build_tool.builder
-        for com in commands:
-
-            # Check for empty or invalid commands.
-            if not com or not com[0]:
-                continue
-            # The first argument in a bash command is the program name.
-            # So first check that the program name is a supported build tool name.
-            # We need to handle cases where the first argument is a path to the program.
-            cmd_program_name = os.path.basename(com[0])
-            if not cmd_program_name:
-                logger.debug("Found invalid program name %s.", com[0])
-                continue
-
-            check_build_commands = any(build_cmd for build_cmd in deploy_tool if build_cmd == cmd_program_name)
-
-            # Support the use of interpreters like Python that load modules, i.e., 'python -m pip install'.
-            check_module_build_commands = any(
-                interpreter == cmd_program_name
-                and com[1]
-                and com[1] in build_tool.interpreter_flag
-                and com[2]
-                and com[2] in deploy_tool
-                for interpreter in build_tool.interpreter
-            )
-            prog_name_index = 2 if check_module_build_commands else 0
-
-            if check_build_commands or check_module_build_commands:
-                # Check the arguments in the bash command for the deploy goals.
-                # If there are no deploy args for this build tool, accept as deploy command.
-                if not build_tool.deploy_arg:
-                    logger.info("No deploy arguments required. Accept %s as deploy command.", str(com))
-                    return str(com)
-
-                for word in com[(prog_name_index + 1) :]:
-                    # TODO: allow plugin versions in arguments, e.g., maven-plugin:1.6.8:deploy.
-                    if word in build_tool.deploy_arg:
-                        logger.info("Found deploy command %s.", str(com))
-                        return str(com)
-        return ""
-
     def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResultType:
         """Implement the check in this method.
 
@@ -130,164 +284,132 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
         # Checking if a build tool is discovered for this repo.
         if build_tool and not isinstance(build_tool, NoneBuildTool):
             for ci_info in ci_services:
+
                 ci_service = ci_info["service"]
                 # Checking if a CI service is discovered for this repo.
                 if isinstance(ci_service, NoneCIService):
                     continue
 
-                trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[])
-
-                # Check for use of a trusted Github Actions workflow to publish/deploy.
-                # TODO: verify that deployment is legitimate and not a test
-                if trusted_deploy_actions:
-                    for callee in ci_info["callgraph"].bfs():
-                        workflow_name = callee.name.split("@")[0]
-
-                        if not workflow_name or callee.node_type not in [
-                            GHWorkflowType.EXTERNAL,
-                            GHWorkflowType.REUSABLE,
-                        ]:
-                            logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
-                            continue
-                        if workflow_name in trusted_deploy_actions:
-                            trigger_link = ci_service.api_client.get_file_link(
-                                ctx.repo_full_name,
-                                ctx.commit_sha,
-                                ci_service.api_client.get_relative_path_of_workflow(
-                                    os.path.basename(callee.caller_path)
-                                ),
-                            )
-                            deploy_action_source_link = ci_service.api_client.get_file_link(
-                                ctx.repo_full_name, ctx.commit_sha, callee.caller_path
-                            )
-
-                            html_url = ci_service.has_latest_run_passed(
-                                ctx.repo_full_name,
-                                ctx.branch_name,
-                                ctx.commit_sha,
-                                ctx.commit_date,
-                                os.path.basename(callee.caller_path),
-                            )
-
-                            # TODO: include in the justification multiple cases of external action usage
-                            justification: list[str | dict[str, str]] = [
-                                {
-                                    f"The target repository uses build tool {build_tool.name}"
-                                    " to deploy": deploy_action_source_link,
-                                    "The build is triggered by": trigger_link,
-                                },
-                                f"Deploy action: {workflow_name}",
-                                {"The status of the build can be seen at": html_url}
-                                if html_url
-                                else "However, could not find a passing workflow run.",
-                            ]
-                            check_result["justification"].extend(justification)
-                            if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
-                                predicate = ci_info["provenances"][0]["predicate"]
-                                predicate["buildType"] = f"Custom {ci_service.name}"
-                                predicate["builder"]["id"] = deploy_action_source_link
-                                predicate["invocation"]["configSource"][
-                                    "uri"
-                                ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}"
-                                predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha
-                                predicate["invocation"]["configSource"]["entryPoint"] = trigger_link
-                                predicate["metadata"]["buildInvocationId"] = html_url
-                                check_result["result_tables"] = [
-                                    BuildAsCodeTable(
-                                        build_tool_name=build_tool.name,
-                                        ci_service_name=ci_service.name,
-                                        build_trigger=trigger_link,
-                                        deploy_command=workflow_name,
-                                        build_status_url=html_url,
-                                    )
-                                ]
-                            return CheckResultType.PASSED
-
-                for bash_cmd in ci_info["bash_commands"]:
-                    deploy_cmd = self._has_deploy_command(bash_cmd["commands"], build_tool)
-                    if deploy_cmd:
-                        # Get the permalink and HTML hyperlink tag of the CI file that triggered the bash command.
-                        trigger_link = ci_service.api_client.get_file_link(
-                            ctx.repo_full_name,
-                            ctx.commit_sha,
-                            ci_service.api_client.get_relative_path_of_workflow(os.path.basename(bash_cmd["CI_path"])),
-                        )
-                        # Get the permalink of the source file of the bash command.
-                        bash_source_link = ci_service.api_client.get_file_link(
-                            ctx.repo_full_name, ctx.commit_sha, bash_cmd["caller_path"]
-                        )
-
-                        html_url = ci_service.has_latest_run_passed(
-                            ctx.repo_full_name,
-                            ctx.branch_name,
-                            ctx.commit_sha,
-                            ctx.commit_date,
-                            os.path.basename(bash_cmd["CI_path"]),
-                        )
-
-                        justification_cmd: list[str | dict[str, str]] = [
-                            {
-                                f"The target repository uses build tool {build_tool.name} to deploy": bash_source_link,
-                                "The build is triggered by": trigger_link,
-                            },
-                            f"Deploy command: {deploy_cmd}",
-                            {"The status of the build can be seen at": html_url}
-                            if html_url
-                            else "However, could not find a passing workflow run.",
-                        ]
-                        check_result["justification"].extend(justification_cmd)
-                        if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
-                            predicate = ci_info["provenances"][0]["predicate"]
-                            predicate["buildType"] = f"Custom {ci_service.name}"
-                            predicate["builder"]["id"] = bash_source_link
-                            predicate["invocation"]["configSource"][
-                                "uri"
-                            ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}"
-                            predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha
+                # Run subchecks
+                ci_parsed = ci_parsed_subcheck(ci_info)
+                deploy_action = deploy_action_subcheck(
+                    ctx=ctx, ci_info=ci_info, ci_service=ci_service, build_tool=build_tool
+                )
+                deploy_command = deploy_command_subcheck(
+                    ctx=ctx, ci_info=ci_info, ci_service=ci_service, build_tool=build_tool
+                )
+                deploy_kws = deploy_kws_subcheck(ctx=ctx, ci_service=ci_service, build_tool=build_tool)
+
+                # Compile justifications from subchecks
+                for subcheck in [ci_parsed, deploy_action, deploy_command, deploy_kws]:
+                    check_result["justification"].extend(subcheck["justification"])
+
+                deploy_source_link = deploy_cmd = html_url = trigger_link = ""
+
+                # TODO: do we want to populate this information regardless of whether the check passes or not?
+                if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
+
+                    if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
+                        predicate = ci_info["provenances"][0]["predicate"]
+                        predicate["buildType"] = f"Custom {ci_service.name}"
+                        predicate["invocation"]["configSource"][
+                            "uri"
+                        ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}"
+                        predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha
+
+                        # TODO: Change this. Need a better method for deciding which of the values to store.
+                        # Could decide based on preliminary queries in the prolog string.
+                        if deploy_action["certainty"]:
+                            deploy_source_link = deploy_action["deploy_action_source_link"]
+                            deploy_cmd = deploy_action["deploy_command"]
+                            html_url = deploy_action["html_url"]
+                            trigger_link = deploy_action["trigger_link"]
+                            predicate["metadata"]["buildInvocationId"] = html_url
                             predicate["invocation"]["configSource"]["entryPoint"] = trigger_link
+                            predicate["builder"]["id"] = deploy_source_link
+                        elif deploy_command["certainty"]:
+                            deploy_source_link = deploy_command["deploy_action_source_link"]
+                            deploy_cmd = deploy_command["deploy_command"]
+                            html_url = deploy_command["html_url"]
                             predicate["metadata"]["buildInvocationId"] = html_url
-                            check_result["result_tables"] = [
-                                BuildAsCodeTable(
-                                    build_tool_name=build_tool.name,
-                                    ci_service_name=ci_service.name,
-                                    build_trigger=trigger_link,
-                                    deploy_command=deploy_cmd,
-                                    build_status_url=html_url,
-                                )
-                            ]
-                        return CheckResultType.PASSED
-
-                # We currently don't parse these CI configuration files.
-                # We just look for a keyword for now.
-                for unparsed_ci in (Jenkins, Travis, CircleCI, GitLabCI):
-                    if isinstance(ci_service, unparsed_ci):
-                        if build_tool.ci_deploy_kws[ci_service.name]:
-                            deploy_kw, config_name = ci_service.has_kws_in_config(
-                                build_tool.ci_deploy_kws[ci_service.name], repo_path=ctx.repo_path
-                            )
-                            if not config_name:
-                                break
-                            check_result["justification"].append(
-                                f"The target repository uses build tool {build_tool.name}"
-                                + f" in {ci_service.name} using {deploy_kw} to deploy."
-                            )
-                            if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
-                                predicate = ci_info["provenances"][0]["predicate"]
-                                predicate["buildType"] = f"Custom {ci_service.name}"
-                                predicate["builder"]["id"] = config_name
-                                predicate["invocation"]["configSource"][
-                                    "uri"
-                                ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}"
-                                predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha
-                                predicate["invocation"]["configSource"]["entryPoint"] = config_name
-                            check_result["result_tables"] = [
-                                BuildAsCodeTable(
-                                    build_tool_name=build_tool.name,
-                                    ci_service_name=ci_service.name,
-                                    deploy_command=deploy_kw,
-                                )
-                            ]
-                            return CheckResultType.PASSED
+                            predicate["invocation"]["configSource"]["entryPoint"] = trigger_link
+                            predicate["builder"]["id"] = deploy_source_link
+                        elif deploy_kws["certainty"]:
+                            deploy_cmd = deploy_kws["config_name"]
+                            predicate["builder"]["id"] = deploy_command
+                            predicate["invocation"]["configSource"]["entryPoint"] = deploy_command
+
+                # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores.
+                # TODO: just decide on one deploy method to pass to the database.
+
+                # Populate the BuildAsCodeSubchecks object with the certainty results from subchecks.
+                bac_.build_as_code_subchecks = bac_.BuildAsCodeSubchecks(
+                    ci_parsed=ci_parsed["certainty"],
+                    deploy_action=deploy_action["certainty"],
+                    deploy_command=deploy_command["certainty"],
+                    deploy_kws=deploy_kws["certainty"],
+                )
+
+                prolog_string = PrologString(
+                    """
+                    :- use_module('src/macaron/slsa_analyzer/checks/problog_predicates.py').
+
+                    A :: ci_parsed :- ci_parsed_check(A).
+                    B :: deploy_action :- deploy_action_check(B).
+                    C :: deploy_command :- deploy_command_check(C).
+                    D :: deploy_kws :- deploy_kws_check(D).
+
+                    0.80 :: deploy_action_certainty :- deploy_action.
+                    0.15 :: deploy_action_certainty :- deploy_action, ci_parsed.
+
+                    0.70 :: deploy_command_certainty :- deploy_command.
+                    0.15 :: deploy_command_certainty :- deploy_command, ci_parsed.
+
+                    0.60 :: deploy_kws_certainty :- deploy_kws.
+
+                    build_as_code_check :- deploy_action_certainty; deploy_command_certainty; deploy_kws_certainty.
+
+                    query(build_as_code_check).
+                    """
+                )
+
+                # TODO: query each of the methods, and take the values from the one with the highest confidence.
+                confidence_score = 0.0
+                result = get_evaluatable().create_from(prolog_string).evaluate()
+                for key, value in result.items():
+                    if str(key) == "build_as_code_check":
+                        confidence_score = float(value)
+                    # logger.info("%s : %s", key, value)
+                results = vars(bac_.build_as_code_subchecks)
+
+                # TODO: Ideas:
+                #  - Query the intermediate checks to construct the check_result table for the highest
+                #       confidence score?
+                #  - Can we find the evidence that contributes the most to this check to output the confidence
+                #       scores for it, and populate the check_result table.
+                #  - Print intermediate proofs?
+
+                check_result["confidence_score"] = confidence_score
+
+                subcheck_results: list[str | dict[str, str]] = [results]
+                check_result["justification"].extend(subcheck_results)
+
+                # TODO: Return subcheck certainties
+                check_result["result_tables"] = [
+                    BuildAsCodeTable(
+                        build_tool_name=build_tool.name,
+                        ci_service_name=ci_service.name,
+                        build_trigger=trigger_link,
+                        deploy_command=deploy_cmd,
+                        build_status_url=html_url,
+                        confidence_score=confidence_score,
+                    )
+                ]
+
+                # Check whether the confidence score is greater than the minimum threshold for this check.
+                if confidence_score >= self.confidence_score_threshold:
+                    logger.info("The certainty of this check passing is: %s", confidence_score)
+                    return CheckResultType.PASSED
 
             pass_msg = f"The target repository does not use {build_tool.name} to deploy."
             check_result["justification"].append(pass_msg)
diff --git a/src/macaron/slsa_analyzer/checks/check_result.py b/src/macaron/slsa_analyzer/checks/check_result.py
index ab5531e01..3397dedb3 100644
--- a/src/macaron/slsa_analyzer/checks/check_result.py
+++ b/src/macaron/slsa_analyzer/checks/check_result.py
@@ -40,6 +40,7 @@ class CheckResult(TypedDict):
     result_tables: list[DeclarativeBase | Table]
     # recommendation: str
     result_type: CheckResultType
+    confidence_score: float
 
 
 class SkippedInfo(TypedDict):
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
new file mode 100644
index 000000000..3dcce24b9
--- /dev/null
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""Contains ProbLog predicates that return the results stored in the BuildAsCodeSubchecks dataclass."""
+from problog.extern import problog_export
+
+from macaron.slsa_analyzer.checks.bac_ import build_as_code_subchecks
+
+
+@problog_export("-int")  # type: ignore
+def ci_parsed_check() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    return build_as_code_subchecks.ci_parsed
+
+
+@problog_export("-int")  # type: ignore
+def deploy_action_check() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    return build_as_code_subchecks.deploy_action
+
+
+@problog_export("-int")  # type: ignore
+def deploy_command_check() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    return build_as_code_subchecks.deploy_command
+
+
+@problog_export("-int")  # type: ignore
+def deploy_kws_check() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    return build_as_code_subchecks.deploy_kws
diff --git a/src/macaron/slsa_analyzer/registry.py b/src/macaron/slsa_analyzer/registry.py
index 9fd3b487e..b98de6136 100644
--- a/src/macaron/slsa_analyzer/registry.py
+++ b/src/macaron/slsa_analyzer/registry.py
@@ -375,6 +375,7 @@ def scan(self, target: AnalyzeContext, skipped_checks: list[SkippedInfo]) -> dic
                             justification=[message],
                             result_type=CheckResultType.UNKNOWN,
                             result_tables=[],
+                            confidence_score=0,
                         )
                         graph.done(check_id)
                     else:

From 6bfaa0c817a95857a1184483890bbbdc2484f99b Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Wed, 7 Jun 2023 12:08:07 +1000
Subject: [PATCH 03/29] refactor: specify build as code subcheck dependencies
 and invoke through problog inference

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/slsa_analyzer/checks/bac_.py      |  18 -
 .../checks/build_as_code_check.py             | 324 +++---------------
 .../checks/build_as_code_subchecks.py         | 248 ++++++++++++++
 .../checks/problog_predicates.py              |  10 +-
 4 files changed, 305 insertions(+), 295 deletions(-)
 delete mode 100644 src/macaron/slsa_analyzer/checks/bac_.py
 create mode 100644 src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py

diff --git a/src/macaron/slsa_analyzer/checks/bac_.py b/src/macaron/slsa_analyzer/checks/bac_.py
deleted file mode 100644
index 6a993dcf0..000000000
--- a/src/macaron/slsa_analyzer/checks/bac_.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
-# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
-
-"""This module contains a class to store results from the BuildAsCodeCheck subchecks."""
-from attr import dataclass
-
-
-@dataclass
-class BuildAsCodeSubchecks:
-    """Dataclass for storing the results from the BuildAsCodeCheck subchecks."""
-
-    ci_parsed: float
-    deploy_action: float
-    deploy_command: float
-    deploy_kws: float
-
-
-build_as_code_subchecks: BuildAsCodeSubchecks = None  # type: ignore # pylint: disable=invalid-name
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index 7a00c27ee..ad3c369e8 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -4,31 +4,23 @@
 """This module contains the BuildAsCodeCheck class."""
 
 import logging
-import os
 
 from problog import get_evaluatable
 from problog.program import PrologString
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy.sql.sqltypes import Float, String
 
-from macaron.config.defaults import defaults
 from macaron.database.database_manager import ORMBase
 from macaron.database.table_definitions import CheckFactsTable
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext
-from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool, NoneBuildTool
-from macaron.slsa_analyzer.build_tool.pip import Pip
-from macaron.slsa_analyzer.checks import bac_
+from macaron.slsa_analyzer.build_tool.base_build_tool import NoneBuildTool
+from macaron.slsa_analyzer.checks import build_as_code_subchecks
 from macaron.slsa_analyzer.checks.base_check import BaseCheck
+from macaron.slsa_analyzer.checks.build_as_code_subchecks import BuildAsCodeSubchecks
 from macaron.slsa_analyzer.checks.check_result import CheckResult, CheckResultType
-from macaron.slsa_analyzer.ci_service.base_ci_service import BaseCIService, NoneCIService
-from macaron.slsa_analyzer.ci_service.circleci import CircleCI
-from macaron.slsa_analyzer.ci_service.github_actions import GHWorkflowType
-from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI
-from macaron.slsa_analyzer.ci_service.jenkins import Jenkins
-from macaron.slsa_analyzer.ci_service.travis import Travis
+from macaron.slsa_analyzer.ci_service.base_ci_service import NoneCIService
 from macaron.slsa_analyzer.registry import registry
 from macaron.slsa_analyzer.slsa_req import ReqName
-from macaron.slsa_analyzer.specs.ci_spec import CIInfo
 
 logger: logging.Logger = logging.getLogger(__name__)
 
@@ -45,196 +37,6 @@ class BuildAsCodeTable(CheckFactsTable, ORMBase):
     confidence_score: Mapped[float] = mapped_column(Float, nullable=True)
 
 
-def has_deploy_command(commands: list[list[str]], build_tool: BaseBuildTool) -> str:
-    """Check if the bash command is a build and deploy command."""
-    # Account for Python projects having separate tools for packaging and publishing.
-    deploy_tool = build_tool.publisher if build_tool.publisher else build_tool.builder
-    for com in commands:
-
-        # Check for empty or invalid commands.
-        if not com or not com[0]:
-            continue
-        # The first argument in a bash command is the program name.
-        # So first check that the program name is a supported build tool name.
-        # We need to handle cases where the first argument is a path to the program.
-        cmd_program_name = os.path.basename(com[0])
-        if not cmd_program_name:
-            logger.debug("Found invalid program name %s.", com[0])
-            continue
-
-        check_build_commands = any(build_cmd for build_cmd in deploy_tool if build_cmd == cmd_program_name)
-
-        # Support the use of interpreters like Python that load modules, i.e., 'python -m pip install'.
-        check_module_build_commands = any(
-            interpreter == cmd_program_name
-            and com[1]
-            and com[1] in build_tool.interpreter_flag
-            and com[2]
-            and com[2] in deploy_tool
-            for interpreter in build_tool.interpreter
-        )
-        prog_name_index = 2 if check_module_build_commands else 0
-
-        if check_build_commands or check_module_build_commands:
-            # Check the arguments in the bash command for the deploy goals.
-            # If there are no deploy args for this build tool, accept as deploy command.
-            if not build_tool.deploy_arg:
-                logger.info("No deploy arguments required. Accept %s as deploy command.", str(com))
-                return str(com)
-
-            for word in com[(prog_name_index + 1) :]:
-                # TODO: allow plugin versions in arguments, e.g., maven-plugin:1.6.8:deploy.
-                if word in build_tool.deploy_arg:
-                    logger.info("Found deploy command %s.", str(com))
-                    return str(com)
-    return ""
-
-
-def ci_parsed_subcheck(ci_info: CIInfo) -> dict:
-    """Check whether parsing is supported for this CI service's CI config files."""
-    check_certainty = 1
-
-    justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."]
-
-    if ci_info["bash_commands"]:
-        return {"certainty": check_certainty, "justification": justification}
-    return {"certainty": 0, "justification": [{"The CI workflow files for this CI service aren't parsed."}]}
-
-
-def deploy_action_subcheck(
-    ctx: AnalyzeContext, ci_info: CIInfo, ci_service: BaseCIService, build_tool: BaseBuildTool
-) -> dict:
-    """Check for use of a trusted Github Actions workflow to publish/deploy."""
-    # TODO: verify that deployment is legitimate and not a test
-    check_certainty = 0.8
-
-    if isinstance(build_tool, Pip):
-        trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[])
-
-        for callee in ci_info["callgraph"].bfs():
-            workflow_name = callee.name.split("@")[0]
-
-            if not workflow_name or callee.node_type not in [
-                GHWorkflowType.EXTERNAL,
-                GHWorkflowType.REUSABLE,
-            ]:
-                logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
-                continue
-            if workflow_name in trusted_deploy_actions:
-                trigger_link = ci_service.api_client.get_file_link(
-                    ctx.repo_full_name,
-                    ctx.commit_sha,
-                    ci_service.api_client.get_relative_path_of_workflow(os.path.basename(callee.caller_path)),
-                )
-                deploy_action_source_link = ci_service.api_client.get_file_link(
-                    ctx.repo_full_name, ctx.commit_sha, callee.caller_path
-                )
-
-                html_url = ci_service.has_latest_run_passed(
-                    ctx.repo_full_name,
-                    ctx.branch_name,
-                    ctx.commit_sha,
-                    ctx.commit_date,
-                    os.path.basename(callee.caller_path),
-                )
-
-                # TODO: include in the justification multiple cases of external action usage
-                justification: list[str | dict[str, str]] = [
-                    {
-                        "To deploy": deploy_action_source_link,
-                        "The build is triggered by": trigger_link,
-                    },
-                    f"Deploy action: {workflow_name}",
-                    {"The status of the build can be seen at": html_url}
-                    if html_url
-                    else "However, could not find a passing workflow run.",
-                ]
-
-                return {
-                    "certainty": check_certainty,
-                    "justification": justification,
-                    "deploy_command": workflow_name,
-                    "trigger_link": trigger_link,
-                    "deploy_action_source_link": deploy_action_source_link,
-                    "html_url": html_url,
-                }
-
-    return {"certainty": 0, "justification": []}
-
-
-def deploy_command_subcheck(
-    ctx: AnalyzeContext, ci_info: CIInfo, ci_service: BaseCIService, build_tool: BaseBuildTool
-) -> dict:
-    """Check for the use of deploy command to deploy."""
-    check_certainty = 0.7
-    for bash_cmd in ci_info["bash_commands"]:
-        deploy_cmd = has_deploy_command(bash_cmd["commands"], build_tool)
-        if deploy_cmd:
-            # Get the permalink and HTML hyperlink tag of the CI file that triggered the bash command.
-            trigger_link = ci_service.api_client.get_file_link(
-                ctx.repo_full_name,
-                ctx.commit_sha,
-                ci_service.api_client.get_relative_path_of_workflow(os.path.basename(bash_cmd["CI_path"])),
-            )
-            # Get the permalink of the source file of the bash command.
-            bash_source_link = ci_service.api_client.get_file_link(
-                ctx.repo_full_name, ctx.commit_sha, bash_cmd["caller_path"]
-            )
-
-            html_url = ci_service.has_latest_run_passed(
-                ctx.repo_full_name,
-                ctx.branch_name,
-                ctx.commit_sha,
-                ctx.commit_date,
-                os.path.basename(bash_cmd["CI_path"]),
-            )
-
-            justification: list[str | dict[str, str]] = [
-                {
-                    f"The target repository uses build tool {build_tool.name} to deploy": bash_source_link,
-                    "The build is triggered by": trigger_link,
-                },
-                f"Deploy command: {deploy_cmd}",
-                {"The status of the build can be seen at": html_url}
-                if html_url
-                else "However, could not find a passing workflow run.",
-            ]
-            return {
-                "certainty": check_certainty,
-                "justification": justification,
-                "deploy_cmd": deploy_cmd,
-                "trigger_link": trigger_link,
-                "bash_source_link": bash_source_link,
-                "html_url": html_url,
-            }
-    return {"certainty": 0, "justification": ""}
-
-
-def deploy_kws_subcheck(ctx: AnalyzeContext, ci_service: BaseCIService, build_tool: BaseBuildTool) -> dict:
-    """Check for the use of deploy keywords to deploy."""
-    check_certainty = 0.6
-    # We currently don't parse these CI configuration files.
-    # We just look for a keyword for now.
-    for unparsed_ci in (Jenkins, Travis, CircleCI, GitLabCI):
-        if isinstance(ci_service, unparsed_ci):
-            if build_tool.ci_deploy_kws[ci_service.name]:
-                deploy_kw, config_name = ci_service.has_kws_in_config(
-                    build_tool.ci_deploy_kws[ci_service.name], repo_path=ctx.repo_path
-                )
-                if not config_name:
-                    return {"certainty": 0, "justification": ""}
-
-                justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."]
-
-                return {
-                    "certainty": check_certainty,
-                    "justification": justification,
-                    "deploy_kw": deploy_kw,
-                    "config_name": config_name,
-                }
-    return {"certainty": 0, "justification": []}
-
-
 class BuildAsCodeCheck(BaseCheck):
     """This class checks the build as code requirement.
 
@@ -290,65 +92,8 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                 if isinstance(ci_service, NoneCIService):
                     continue
 
-                # Run subchecks
-                ci_parsed = ci_parsed_subcheck(ci_info)
-                deploy_action = deploy_action_subcheck(
-                    ctx=ctx, ci_info=ci_info, ci_service=ci_service, build_tool=build_tool
-                )
-                deploy_command = deploy_command_subcheck(
-                    ctx=ctx, ci_info=ci_info, ci_service=ci_service, build_tool=build_tool
-                )
-                deploy_kws = deploy_kws_subcheck(ctx=ctx, ci_service=ci_service, build_tool=build_tool)
-
-                # Compile justifications from subchecks
-                for subcheck in [ci_parsed, deploy_action, deploy_command, deploy_kws]:
-                    check_result["justification"].extend(subcheck["justification"])
-
-                deploy_source_link = deploy_cmd = html_url = trigger_link = ""
-
-                # TODO: do we want to populate this information regardless of whether the check passes or not?
-                if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
-
-                    if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
-                        predicate = ci_info["provenances"][0]["predicate"]
-                        predicate["buildType"] = f"Custom {ci_service.name}"
-                        predicate["invocation"]["configSource"][
-                            "uri"
-                        ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}"
-                        predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha
-
-                        # TODO: Change this. Need a better method for deciding which of the values to store.
-                        # Could decide based on preliminary queries in the prolog string.
-                        if deploy_action["certainty"]:
-                            deploy_source_link = deploy_action["deploy_action_source_link"]
-                            deploy_cmd = deploy_action["deploy_command"]
-                            html_url = deploy_action["html_url"]
-                            trigger_link = deploy_action["trigger_link"]
-                            predicate["metadata"]["buildInvocationId"] = html_url
-                            predicate["invocation"]["configSource"]["entryPoint"] = trigger_link
-                            predicate["builder"]["id"] = deploy_source_link
-                        elif deploy_command["certainty"]:
-                            deploy_source_link = deploy_command["deploy_action_source_link"]
-                            deploy_cmd = deploy_command["deploy_command"]
-                            html_url = deploy_command["html_url"]
-                            predicate["metadata"]["buildInvocationId"] = html_url
-                            predicate["invocation"]["configSource"]["entryPoint"] = trigger_link
-                            predicate["builder"]["id"] = deploy_source_link
-                        elif deploy_kws["certainty"]:
-                            deploy_cmd = deploy_kws["config_name"]
-                            predicate["builder"]["id"] = deploy_command
-                            predicate["invocation"]["configSource"]["entryPoint"] = deploy_command
-
-                # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores.
-                # TODO: just decide on one deploy method to pass to the database.
-
                 # Populate the BuildAsCodeSubchecks object with the certainty results from subchecks.
-                bac_.build_as_code_subchecks = bac_.BuildAsCodeSubchecks(
-                    ci_parsed=ci_parsed["certainty"],
-                    deploy_action=deploy_action["certainty"],
-                    deploy_command=deploy_command["certainty"],
-                    deploy_kws=deploy_kws["certainty"],
-                )
+                build_as_code_subchecks.build_as_code_subcheck_results = BuildAsCodeSubchecks(ctx=ctx, ci_info=ci_info)
 
                 prolog_string = PrologString(
                     """
@@ -377,10 +122,10 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                 confidence_score = 0.0
                 result = get_evaluatable().create_from(prolog_string).evaluate()
                 for key, value in result.items():
+                    print(key, value)
                     if str(key) == "build_as_code_check":
                         confidence_score = float(value)
-                    # logger.info("%s : %s", key, value)
-                results = vars(bac_.build_as_code_subchecks)
+                results = vars(build_as_code_subchecks.build_as_code_subcheck_results)
 
                 # TODO: Ideas:
                 #  - Query the intermediate checks to construct the check_result table for the highest
@@ -394,17 +139,52 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                 subcheck_results: list[str | dict[str, str]] = [results]
                 check_result["justification"].extend(subcheck_results)
 
+                # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores.
+                # TODO: determine a better way to save these values to the database.
+
+                # if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
+
+                #     if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
+                #         predicate = ci_info["provenances"][0]["predicate"]
+                #         predicate["buildType"] = f"Custom {ci_service.name}"
+                #         predicate["invocation"]["configSource"][
+                #             "uri"
+                #         ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}"
+                #         predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha
+
+                #         # TODO: Change this. Need a better method for deciding which of the values to store.
+                #         # Could decide based on preliminary queries in the prolog string.
+                #         if deploy_action["certainty"]:
+                #             deploy_source_link = deploy_action["deploy_action_source_link"]
+                #             deploy_cmd = deploy_action["deploy_command"]
+                #             html_url = deploy_action["html_url"]
+                #             trigger_link = deploy_action["trigger_link"]
+                #             predicate["metadata"]["buildInvocationId"] = html_url
+                #             predicate["invocation"]["configSource"]["entryPoint"] = trigger_link
+                #             predicate["builder"]["id"] = deploy_source_link
+                #         elif deploy_command["certainty"]:
+                #             deploy_source_link = deploy_command["deploy_action_source_link"]
+                #             deploy_cmd = deploy_command["deploy_command"]
+                #             html_url = deploy_command["html_url"]
+                #             predicate["metadata"]["buildInvocationId"] = html_url
+                #             predicate["invocation"]["configSource"]["entryPoint"] = trigger_link
+                #             predicate["builder"]["id"] = deploy_source_link
+                #         elif deploy_kws["certainty"]:
+                #             deploy_cmd = deploy_kws["config_name"]
+                #             predicate["builder"]["id"] = deploy_command
+                #             predicate["invocation"]["configSource"]["entryPoint"] = deploy_command
+
                 # TODO: Return subcheck certainties
-                check_result["result_tables"] = [
-                    BuildAsCodeTable(
-                        build_tool_name=build_tool.name,
-                        ci_service_name=ci_service.name,
-                        build_trigger=trigger_link,
-                        deploy_command=deploy_cmd,
-                        build_status_url=html_url,
-                        confidence_score=confidence_score,
-                    )
-                ]
+                # check_result["result_tables"] = [
+                #     BuildAsCodeTable(
+                #         build_tool_name=build_tool.name,
+                #         ci_service_name=ci_service.name,
+                #         build_trigger=trigger_link,
+                #         deploy_command=deploy_cmd,
+                #         build_status_url=html_url,
+                #         confidence_score=confidence_score,
+                #     )
+                # ]
 
                 # Check whether the confidence score is greater than the minimum threshold for this check.
                 if confidence_score >= self.confidence_score_threshold:
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
new file mode 100644
index 000000000..1cb6a4f6c
--- /dev/null
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -0,0 +1,248 @@
+# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module contains a class to store results from the BuildAsCodeCheck subchecks."""
+
+import logging
+import os
+
+from macaron.config.defaults import defaults
+from macaron.slsa_analyzer.analyze_context import AnalyzeContext
+from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
+from macaron.slsa_analyzer.build_tool.pip import Pip
+from macaron.slsa_analyzer.ci_service.circleci import CircleCI
+from macaron.slsa_analyzer.ci_service.github_actions import GHWorkflowType
+from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI
+from macaron.slsa_analyzer.ci_service.jenkins import Jenkins
+from macaron.slsa_analyzer.ci_service.travis import Travis
+from macaron.slsa_analyzer.specs.ci_spec import CIInfo
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+
+def has_deploy_command(commands: list[list[str]], build_tool: BaseBuildTool) -> str:
+    """Check if the bash command is a build and deploy command."""
+    # Account for Python projects having separate tools for packaging and publishing.
+    deploy_tool = build_tool.publisher if build_tool.publisher else build_tool.builder
+    for com in commands:
+
+        # Check for empty or invalid commands.
+        if not com or not com[0]:
+            continue
+        # The first argument in a bash command is the program name.
+        # So first check that the program name is a supported build tool name.
+        # We need to handle cases where the first argument is a path to the program.
+        cmd_program_name = os.path.basename(com[0])
+        if not cmd_program_name:
+            logger.debug("Found invalid program name %s.", com[0])
+            continue
+
+        check_build_commands = any(build_cmd for build_cmd in deploy_tool if build_cmd == cmd_program_name)
+
+        # Support the use of interpreters like Python that load modules, i.e., 'python -m pip install'.
+        check_module_build_commands = any(
+            interpreter == cmd_program_name
+            and com[1]
+            and com[1] in build_tool.interpreter_flag
+            and com[2]
+            and com[2] in deploy_tool
+            for interpreter in build_tool.interpreter
+        )
+        prog_name_index = 2 if check_module_build_commands else 0
+
+        if check_build_commands or check_module_build_commands:
+            # Check the arguments in the bash command for the deploy goals.
+            # If there are no deploy args for this build tool, accept as deploy command.
+            if not build_tool.deploy_arg:
+                logger.info("No deploy arguments required. Accept %s as deploy command.", str(com))
+                return str(com)
+
+            for word in com[(prog_name_index + 1) :]:
+                # TODO: allow plugin versions in arguments, e.g., maven-plugin:1.6.8:deploy.
+                if word in build_tool.deploy_arg:
+                    logger.info("Found deploy command %s.", str(com))
+                    return str(com)
+    return ""
+
+
+class BuildAsCodeSubchecks:
+    """Class for storing the results from the BuildAsCodeCheck subchecks."""
+
+    # store analyze context
+    def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None:
+        self.ctx = ctx
+        self.build_tool: BaseBuildTool = ctx.dynamic_data["build_spec"].get("tool")  # type: ignore
+        self.ci_services = ctx.dynamic_data["ci_services"]
+        self.check_results: dict = {}  # Update this with each check.
+        self.ci_info = ci_info
+        self.ci_service = ci_info["service"]
+        self.failed_check = 0.0
+
+        # TODO: Make subcheck functions available to other checks.
+
+        # TODO: Before each check is run, check whether a certainty result already exists in self.check_results
+        # to avoid re-running unecessarily.
+
+    def ci_parsed(self) -> float:
+        """Check whether parsing is supported for this CI service's CI config files."""
+        check_certainty = 1.0
+        # If this check has already been run on this repo, return certainty.
+
+        justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."]
+
+        if self.ci_info["bash_commands"]:
+            self.check_results["ci_parsed"] = {"certainty": check_certainty, "justification": justification}
+            return check_certainty
+        return self.failed_check
+
+    def deploy_command(self) -> float:
+        """Check for the use of deploy command to deploy."""
+        check_certainty = 0.7
+        depends_on = [self.ci_parsed() > 0]
+        if not all(depends_on):
+            return self.failed_check
+
+        for bash_cmd in self.ci_info["bash_commands"]:
+            deploy_cmd = has_deploy_command(bash_cmd["commands"], self.build_tool)
+            if deploy_cmd:
+                # Get the permalink and HTML hyperlink tag of the CI file that triggered the bash command.
+                trigger_link = self.ci_service.api_client.get_file_link(
+                    self.ctx.repo_full_name,
+                    self.ctx.commit_sha,
+                    self.ci_service.api_client.get_relative_path_of_workflow(os.path.basename(bash_cmd["CI_path"])),
+                )
+                # Get the permalink of the source file of the bash command.
+                bash_source_link = self.ci_service.api_client.get_file_link(
+                    self.ctx.repo_full_name, self.ctx.commit_sha, bash_cmd["caller_path"]
+                )
+
+                html_url = self.ci_service.has_latest_run_passed(
+                    self.ctx.repo_full_name,
+                    self.ctx.branch_name,
+                    self.ctx.commit_sha,
+                    self.ctx.commit_date,
+                    os.path.basename(bash_cmd["CI_path"]),
+                )
+
+                justification: list[str | dict[str, str]] = [
+                    {
+                        f"The target repository uses build tool {self.build_tool.name} to deploy": bash_source_link,
+                        "The build is triggered by": trigger_link,
+                    },
+                    f"Deploy command: {deploy_cmd}",
+                    {"The status of the build can be seen at": html_url}
+                    if html_url
+                    else "However, could not find a passing workflow run.",
+                ]
+
+                self.check_results["deploy_command"] = {
+                    "certainty": check_certainty,
+                    "justification": justification,
+                    "deploy_cmd": deploy_cmd,
+                    "trigger_link": trigger_link,
+                    "bash_source_link": bash_source_link,
+                    "html_url": html_url,
+                }
+
+                return check_certainty
+        return self.failed_check
+
+    def deploy_kws(self) -> float:
+        """Check for the use of deploy keywords to deploy."""
+        check_certainty = 0.6
+        depends_on = [self.ci_parsed() == 0.0]
+        # If this check has already been run on this repo, return certainty.
+
+        if not all(depends_on):
+            return self.failed_check
+
+        # We currently don't parse these CI configuration files.
+        # We just look for a keyword for now.
+        for unparsed_ci in (Jenkins, Travis, CircleCI, GitLabCI):
+            if isinstance(self.ci_service, unparsed_ci):
+                if self.build_tool.ci_deploy_kws[self.ci_service.name]:
+                    deploy_kw, config_name = self.ci_service.has_kws_in_config(
+                        self.build_tool.ci_deploy_kws[self.ci_service.name], repo_path=self.ctx.repo_path
+                    )
+                    if not config_name:
+                        return self.failed_check
+
+                    justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."]
+
+                    self.check_results["deploy_kws"] = {
+                        "certainty": check_certainty,
+                        "justification": justification,
+                        "deploy_kw": deploy_kw,
+                        "config_name": config_name,
+                    }
+                    return check_certainty
+
+        return self.failed_check
+
+    def deploy_action(self) -> float:
+        """Check for use of a trusted Github Actions workflow to publish/deploy."""
+        # TODO: verify that deployment is legitimate and not a test
+        check_certainty = 0.8
+        depends_on = [self.ci_parsed() > 0]
+        # If this check has already been run on this repo, return certainty.
+        print("CI PARSED: ", self.ci_parsed())
+        if not all(depends_on):
+            return self.failed_check
+
+        if isinstance(self.build_tool, Pip):
+            trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[])
+
+            for callee in self.ci_info["callgraph"].bfs():
+                workflow_name = callee.name.split("@")[0]
+
+                if not workflow_name or callee.node_type not in [
+                    GHWorkflowType.EXTERNAL,
+                    GHWorkflowType.REUSABLE,
+                ]:
+                    logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
+                    continue
+                if workflow_name in trusted_deploy_actions:
+                    trigger_link = self.ci_service.api_client.get_file_link(
+                        self.ctx.repo_full_name,
+                        self.ctx.commit_sha,
+                        self.ci_service.api_client.get_relative_path_of_workflow(os.path.basename(callee.caller_path)),
+                    )
+                    deploy_action_source_link = self.ci_service.api_client.get_file_link(
+                        self.ctx.repo_full_name, self.ctx.commit_sha, callee.caller_path
+                    )
+
+                    html_url = self.ci_service.has_latest_run_passed(
+                        self.ctx.repo_full_name,
+                        self.ctx.branch_name,
+                        self.ctx.commit_sha,
+                        self.ctx.commit_date,
+                        os.path.basename(callee.caller_path),
+                    )
+
+                    # TODO: include in the justification multiple cases of external action usage
+                    justification: list[str | dict[str, str]] = [
+                        {
+                            "To deploy": deploy_action_source_link,
+                            "The build is triggered by": trigger_link,
+                        },
+                        f"Deploy action: {workflow_name}",
+                        {"The status of the build can be seen at": html_url}
+                        if html_url
+                        else "However, could not find a passing workflow run.",
+                    ]
+
+                    self.check_results["deploy_action"] = {
+                        "certainty": check_certainty,
+                        "justification": justification,
+                        "deploy_command": workflow_name,
+                        "trigger_link": trigger_link,
+                        "deploy_action_source_link": deploy_action_source_link,
+                        "html_url": html_url,
+                    }
+
+                    return check_certainty
+
+        return self.failed_check
+
+
+build_as_code_subcheck_results: BuildAsCodeSubchecks = None  # type: ignore # pylint: disable=invalid-name
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index 3dcce24b9..89b1733b2 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -4,7 +4,7 @@
 """Contains ProbLog predicates that return the results stored in the BuildAsCodeSubchecks dataclass."""
 from problog.extern import problog_export
 
-from macaron.slsa_analyzer.checks.bac_ import build_as_code_subchecks
+from macaron.slsa_analyzer.checks.build_as_code_subchecks import build_as_code_subcheck_results
 
 
 @problog_export("-int")  # type: ignore
@@ -16,7 +16,7 @@ def ci_parsed_check() -> float:
     Certainty
         The certainty of the check.
     """
-    return build_as_code_subchecks.ci_parsed
+    return build_as_code_subcheck_results.ci_parsed()
 
 
 @problog_export("-int")  # type: ignore
@@ -28,7 +28,7 @@ def deploy_action_check() -> float:
     Certainty
         The certainty of the check.
     """
-    return build_as_code_subchecks.deploy_action
+    return build_as_code_subcheck_results.deploy_action()
 
 
 @problog_export("-int")  # type: ignore
@@ -40,7 +40,7 @@ def deploy_command_check() -> float:
     Certainty
         The certainty of the check.
     """
-    return build_as_code_subchecks.deploy_command
+    return build_as_code_subcheck_results.deploy_command()
 
 
 @problog_export("-int")  # type: ignore
@@ -52,4 +52,4 @@ def deploy_kws_check() -> float:
     Certainty
         The certainty of the check.
     """
-    return build_as_code_subchecks.deploy_kws
+    return build_as_code_subcheck_results.deploy_kws()

From 41372fafc3eaca5f627ecfa153c6b4cbeef0508f Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Wed, 7 Jun 2023 12:18:29 +1000
Subject: [PATCH 04/29] fix: update test_gha_workflow_deployment so that it
 passes the ci_parsed subcheck

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py | 4 +---
 tests/slsa_analyzer/checks/test_build_as_code_check.py      | 3 ++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 1cb6a4f6c..4fc3fe49d 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -98,7 +98,7 @@ def ci_parsed(self) -> float:
     def deploy_command(self) -> float:
         """Check for the use of deploy command to deploy."""
         check_certainty = 0.7
-        depends_on = [self.ci_parsed() > 0]
+        depends_on = [self.ci_parsed() > 0.0]
         if not all(depends_on):
             return self.failed_check
 
@@ -184,8 +184,6 @@ def deploy_action(self) -> float:
         # TODO: verify that deployment is legitimate and not a test
         check_certainty = 0.8
         depends_on = [self.ci_parsed() > 0]
-        # If this check has already been run on this repo, return certainty.
-        print("CI PARSED: ", self.ci_parsed())
         if not all(depends_on):
             return self.failed_check
 
diff --git a/tests/slsa_analyzer/checks/test_build_as_code_check.py b/tests/slsa_analyzer/checks/test_build_as_code_check.py
index b7a07b0f9..8f3252844 100644
--- a/tests/slsa_analyzer/checks/test_build_as_code_check.py
+++ b/tests/slsa_analyzer/checks/test_build_as_code_check.py
@@ -180,9 +180,10 @@ def test_gha_workflow_deployment(
     """Test the use of verified GitHub Actions to deploy."""
     check = BuildAsCodeCheck()
     check_result = CheckResult(justification=[])  # type: ignore
+    bash_commands = BashCommands(caller_path="source_file", CI_path="ci_file", CI_type="github_actions", commands=[[]])
     ci_info = CIInfo(
         service=github_actions_service,
-        bash_commands=[],
+        bash_commands=[bash_commands],
         callgraph=CallGraph(BaseNode(), ""),
         provenance_assets=[],
         latest_release={},

From ca0a3988988f02c2b9a0d2605c243a986a97945e Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Wed, 7 Jun 2023 13:02:23 +1000
Subject: [PATCH 05/29] chore: convert problog result dictionary to use str
 keys

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../checks/build_as_code_check.py             | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index ad3c369e8..5312a3806 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -6,7 +6,7 @@
 import logging
 
 from problog import get_evaluatable
-from problog.program import PrologString
+from problog.program import PrologString, Term
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy.sql.sqltypes import Float, String
 
@@ -86,15 +86,16 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
         # Checking if a build tool is discovered for this repo.
         if build_tool and not isinstance(build_tool, NoneBuildTool):
             for ci_info in ci_services:
-
+                confidence_score = 0.0
                 ci_service = ci_info["service"]
                 # Checking if a CI service is discovered for this repo.
                 if isinstance(ci_service, NoneCIService):
                     continue
 
-                # Populate the BuildAsCodeSubchecks object with the certainty results from subchecks.
+                # Initialize the BuildAsCodeSubchecks object with the AnalyzeContext.
                 build_as_code_subchecks.build_as_code_subcheck_results = BuildAsCodeSubchecks(ctx=ctx, ci_info=ci_info)
 
+                # ProbLog rules to be evaluated.
                 prolog_string = PrologString(
                     """
                     :- use_module('src/macaron/slsa_analyzer/checks/problog_predicates.py').
@@ -114,18 +115,20 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
 
                     build_as_code_check :- deploy_action_certainty; deploy_command_certainty; deploy_kws_certainty.
 
+                    query(deploy_command_certainty).
+                    query(deploy_action_certainty).
+                    query(deploy_kws_certainty).
                     query(build_as_code_check).
                     """
                 )
 
                 # TODO: query each of the methods, and take the values from the one with the highest confidence.
-                confidence_score = 0.0
-                result = get_evaluatable().create_from(prolog_string).evaluate()
-                for key, value in result.items():
-                    print(key, value)
-                    if str(key) == "build_as_code_check":
-                        confidence_score = float(value)
-                results = vars(build_as_code_subchecks.build_as_code_subcheck_results)
+
+                # Convert the result dictionary from Term:float to str:float
+                term_result: dict[Term, float] = get_evaluatable().create_from(prolog_string).evaluate()
+                result: dict[str, float] = {str(k): v for k, v in term_result.items()}
+
+                confidence_score = result["build_as_code_check"]
 
                 # TODO: Ideas:
                 #  - Query the intermediate checks to construct the check_result table for the highest
@@ -136,9 +139,6 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
 
                 check_result["confidence_score"] = confidence_score
 
-                subcheck_results: list[str | dict[str, str]] = [results]
-                check_result["justification"].extend(subcheck_results)
-
                 # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores.
                 # TODO: determine a better way to save these values to the database.
 

From 6fcfbd1cc0aca09d99ea34bf7fff592ad1b0455a Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Wed, 7 Jun 2023 15:46:23 +1000
Subject: [PATCH 06/29] feat: perform intermediate querying on deploy method
 subchecks to determine which results to store

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../checks/build_as_code_check.py             | 106 ++++++++----------
 .../checks/build_as_code_subchecks.py         |  69 +++++++-----
 2 files changed, 91 insertions(+), 84 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index 5312a3806..78eac81ed 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -16,7 +16,7 @@
 from macaron.slsa_analyzer.build_tool.base_build_tool import NoneBuildTool
 from macaron.slsa_analyzer.checks import build_as_code_subchecks
 from macaron.slsa_analyzer.checks.base_check import BaseCheck
-from macaron.slsa_analyzer.checks.build_as_code_subchecks import BuildAsCodeSubchecks
+from macaron.slsa_analyzer.checks.build_as_code_subchecks import BuildAsCodeSubchecks, DeploySubcheckResults
 from macaron.slsa_analyzer.checks.check_result import CheckResult, CheckResultType
 from macaron.slsa_analyzer.ci_service.base_ci_service import NoneCIService
 from macaron.slsa_analyzer.registry import registry
@@ -122,69 +122,57 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                     """
                 )
 
-                # TODO: query each of the methods, and take the values from the one with the highest confidence.
-
                 # Convert the result dictionary from Term:float to str:float
                 term_result: dict[Term, float] = get_evaluatable().create_from(prolog_string).evaluate()
-                result: dict[str, float] = {str(k): v for k, v in term_result.items()}
-
+                result: dict[str, float] = {str(key): value for key, value in term_result.items()}
+                deploy_methods = {
+                    "deploy_command": result["deploy_command_certainty"],
+                    "deploy_action": result["deploy_action_certainty"],
+                    "deploy_kws": result["deploy_kws_certainty"],
+                }
+                deploy_methods_valid = {key: value for key, value in deploy_methods.items() if value != 0}
                 confidence_score = result["build_as_code_check"]
-
-                # TODO: Ideas:
-                #  - Query the intermediate checks to construct the check_result table for the highest
-                #       confidence score?
-                #  - Can we find the evidence that contributes the most to this check to output the confidence
-                #       scores for it, and populate the check_result table.
-                #  - Print intermediate proofs?
-
                 check_result["confidence_score"] = confidence_score
 
-                # TODO: BuildAsCodeTable should contain the results from subchecks and the confidence scores.
-                # TODO: determine a better way to save these values to the database.
-
-                # if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
-
-                #     if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
-                #         predicate = ci_info["provenances"][0]["predicate"]
-                #         predicate["buildType"] = f"Custom {ci_service.name}"
-                #         predicate["invocation"]["configSource"][
-                #             "uri"
-                #         ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}"
-                #         predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha
-
-                #         # TODO: Change this. Need a better method for deciding which of the values to store.
-                #         # Could decide based on preliminary queries in the prolog string.
-                #         if deploy_action["certainty"]:
-                #             deploy_source_link = deploy_action["deploy_action_source_link"]
-                #             deploy_cmd = deploy_action["deploy_command"]
-                #             html_url = deploy_action["html_url"]
-                #             trigger_link = deploy_action["trigger_link"]
-                #             predicate["metadata"]["buildInvocationId"] = html_url
-                #             predicate["invocation"]["configSource"]["entryPoint"] = trigger_link
-                #             predicate["builder"]["id"] = deploy_source_link
-                #         elif deploy_command["certainty"]:
-                #             deploy_source_link = deploy_command["deploy_action_source_link"]
-                #             deploy_cmd = deploy_command["deploy_command"]
-                #             html_url = deploy_command["html_url"]
-                #             predicate["metadata"]["buildInvocationId"] = html_url
-                #             predicate["invocation"]["configSource"]["entryPoint"] = trigger_link
-                #             predicate["builder"]["id"] = deploy_source_link
-                #         elif deploy_kws["certainty"]:
-                #             deploy_cmd = deploy_kws["config_name"]
-                #             predicate["builder"]["id"] = deploy_command
-                #             predicate["invocation"]["configSource"]["entryPoint"] = deploy_command
-
-                # TODO: Return subcheck certainties
-                # check_result["result_tables"] = [
-                #     BuildAsCodeTable(
-                #         build_tool_name=build_tool.name,
-                #         ci_service_name=ci_service.name,
-                #         build_trigger=trigger_link,
-                #         deploy_command=deploy_cmd,
-                #         build_status_url=html_url,
-                #         confidence_score=confidence_score,
-                #     )
-                # ]
+                if deploy_methods_valid.values():
+                    # Determine the deployment method with the highest certainty score.
+                    highest_certainty = max(deploy_methods_valid, key=deploy_methods_valid.__getitem__)
+                    deploy_method = build_as_code_subchecks.build_as_code_subcheck_results.get_subcheck_results(
+                        highest_certainty
+                    )
+
+                    if isinstance(deploy_method, DeploySubcheckResults):
+                        if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
+                            predicate = ci_info["provenances"][0]["predicate"]
+                            predicate["buildType"] = f"Custom {ci_service.name}"
+                            predicate["invocation"]["configSource"][
+                                "uri"
+                            ] = f"{ctx.remote_path}@refs/heads/{ctx.branch_name}"
+                            predicate["invocation"]["configSource"]["digest"]["sha1"] = ctx.commit_sha
+
+                            predicate["metadata"]["buildInvocationId"] = deploy_method.html_url
+                            predicate["builder"]["id"] = deploy_method.source_link
+                            predicate["invocation"]["configSource"]["entryPoint"] = deploy_method.trigger_link
+
+                            if highest_certainty == "deploy_kws":
+                                predicate["builder"]["id"] = deploy_method.config_name
+                                predicate["invocation"]["configSource"]["entryPoint"] = deploy_method.config_name
+
+                        check_result["result_tables"] = [
+                            BuildAsCodeTable(
+                                build_tool_name=build_tool.name,
+                                ci_service_name=ci_service.name,
+                                build_trigger=deploy_method.trigger_link,
+                                deploy_command=deploy_method.deploy_cmd,
+                                build_status_url=deploy_method.html_url,
+                                confidence_score=confidence_score,
+                            )
+                        ]
+
+                # TODO: compile all justifications
+                # check_result["justification"].append()
+
+                # TODO: Investigate using proofs
 
                 # Check whether the confidence score is greater than the minimum threshold for this check.
                 if confidence_score >= self.confidence_score_threshold:
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 4fc3fe49d..9b2928d93 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -6,6 +6,8 @@
 import logging
 import os
 
+from attr import dataclass
+
 from macaron.config.defaults import defaults
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext
 from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
@@ -65,6 +67,19 @@ def has_deploy_command(commands: list[list[str]], build_tool: BaseBuildTool) ->
     return ""
 
 
+@dataclass
+class DeploySubcheckResults:
+    """DataClass containing information required from deploy command subchecks."""
+
+    certainty: float = 0.0
+    justification: list[str | dict[str, str]] = [""]
+    deploy_cmd: str = ""
+    trigger_link: str = ""
+    source_link: str = ""
+    html_url: str = ""
+    config_name: str = ""
+
+
 class BuildAsCodeSubchecks:
     """Class for storing the results from the BuildAsCodeCheck subchecks."""
 
@@ -73,9 +88,10 @@ def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None:
         self.ctx = ctx
         self.build_tool: BaseBuildTool = ctx.dynamic_data["build_spec"].get("tool")  # type: ignore
         self.ci_services = ctx.dynamic_data["ci_services"]
-        self.check_results: dict = {}  # Update this with each check.
+        self.check_results: dict[str, dict | DeploySubcheckResults] = {}  # Update this with each check.
         self.ci_info = ci_info
         self.ci_service = ci_info["service"]
+        # Certainty value to be returned if a subcheck fails.
         self.failed_check = 0.0
 
         # TODO: Make subcheck functions available to other checks.
@@ -88,9 +104,8 @@ def ci_parsed(self) -> float:
         check_certainty = 1.0
         # If this check has already been run on this repo, return certainty.
 
-        justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."]
-
         if self.ci_info["bash_commands"]:
+            justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."]
             self.check_results["ci_parsed"] = {"certainty": check_certainty, "justification": justification}
             return check_certainty
         return self.failed_check
@@ -135,14 +150,14 @@ def deploy_command(self) -> float:
                     else "However, could not find a passing workflow run.",
                 ]
 
-                self.check_results["deploy_command"] = {
-                    "certainty": check_certainty,
-                    "justification": justification,
-                    "deploy_cmd": deploy_cmd,
-                    "trigger_link": trigger_link,
-                    "bash_source_link": bash_source_link,
-                    "html_url": html_url,
-                }
+                self.check_results["deploy_command"] = DeploySubcheckResults(
+                    certainty=check_certainty,
+                    justification=justification,
+                    deploy_cmd=deploy_cmd,
+                    trigger_link=trigger_link,
+                    source_link=bash_source_link,
+                    html_url=html_url,
+                )
 
                 return check_certainty
         return self.failed_check
@@ -169,12 +184,12 @@ def deploy_kws(self) -> float:
 
                     justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."]
 
-                    self.check_results["deploy_kws"] = {
-                        "certainty": check_certainty,
-                        "justification": justification,
-                        "deploy_kw": deploy_kw,
-                        "config_name": config_name,
-                    }
+                    self.check_results["deploy_kws"] = DeploySubcheckResults(
+                        certainty=check_certainty,
+                        justification=justification,
+                        deploy_cmd=deploy_kw,
+                        config_name=config_name,
+                    )
                     return check_certainty
 
         return self.failed_check
@@ -229,18 +244,22 @@ def deploy_action(self) -> float:
                         else "However, could not find a passing workflow run.",
                     ]
 
-                    self.check_results["deploy_action"] = {
-                        "certainty": check_certainty,
-                        "justification": justification,
-                        "deploy_command": workflow_name,
-                        "trigger_link": trigger_link,
-                        "deploy_action_source_link": deploy_action_source_link,
-                        "html_url": html_url,
-                    }
+                    self.check_results["deploy_action"] = DeploySubcheckResults(
+                        certainty=check_certainty,
+                        justification=justification,
+                        deploy_cmd=workflow_name,
+                        trigger_link=trigger_link,
+                        source_link=deploy_action_source_link,
+                        html_url=html_url,
+                    )
 
                     return check_certainty
 
         return self.failed_check
 
+    def get_subcheck_results(self, subcheck_name: str) -> dict | DeploySubcheckResults:
+        """Return the results for a particular subcheck."""
+        return self.check_results[subcheck_name]
+
 
 build_as_code_subcheck_results: BuildAsCodeSubchecks = None  # type: ignore # pylint: disable=invalid-name

From 35ae23b7d0c9e0ee3bd0068b906a4661b6f368bf Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Fri, 9 Jun 2023 11:34:32 +1000
Subject: [PATCH 07/29] feat: add sub-check for workflow trigger event type

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../slsa_analyzer/checks/base_check.py        |  2 +-
 .../checks/build_as_code_check.py             |  3 +
 .../checks/build_as_code_subchecks.py         | 56 ++++++++++++++++++-
 .../checks/problog_predicates.py              | 24 ++++++++
 4 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/base_check.py b/src/macaron/slsa_analyzer/checks/base_check.py
index 8b12d2005..3a43cf635 100644
--- a/src/macaron/slsa_analyzer/checks/base_check.py
+++ b/src/macaron/slsa_analyzer/checks/base_check.py
@@ -86,7 +86,7 @@ def run(self, target: AnalyzeContext, skipped_info: Optional[SkippedInfo] = None
             justification=[],
             result_type=CheckResultType.SKIPPED,
             result_tables=[],
-            confidence_score=0,
+            confidence_score=0.0,
         )
 
         if skipped_info:
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index 78eac81ed..89bed17fb 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -122,6 +122,8 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                     """
                 )
 
+                build_as_code_subchecks.build_as_code_subcheck_results.workflow_trigger("publish.yaml")
+
                 # Convert the result dictionary from Term:float to str:float
                 term_result: dict[Term, float] = get_evaluatable().create_from(prolog_string).evaluate()
                 result: dict[str, float] = {str(key): value for key, value in term_result.items()}
@@ -143,6 +145,7 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
 
                     if isinstance(deploy_method, DeploySubcheckResults):
                         if ctx.dynamic_data["is_inferred_prov"] and ci_info["provenances"]:
+                            # Store the values for the inferred provenance representation.
                             predicate = ci_info["provenances"][0]["predicate"]
                             predicate["buildType"] = f"Custom {ci_service.name}"
                             predicate["invocation"]["configSource"][
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 9b2928d93..af6813464 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -78,6 +78,7 @@ class DeploySubcheckResults:
     source_link: str = ""
     html_url: str = ""
     config_name: str = ""
+    workflow_name: str = ""
 
 
 class BuildAsCodeSubchecks:
@@ -88,7 +89,7 @@ def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None:
         self.ctx = ctx
         self.build_tool: BaseBuildTool = ctx.dynamic_data["build_spec"].get("tool")  # type: ignore
         self.ci_services = ctx.dynamic_data["ci_services"]
-        self.check_results: dict[str, dict | DeploySubcheckResults] = {}  # Update this with each check.
+        self.check_results: dict[str, DeploySubcheckResults] = {}  # Update this with each check.
         self.ci_info = ci_info
         self.ci_service = ci_info["service"]
         # Certainty value to be returned if a subcheck fails.
@@ -106,7 +107,9 @@ def ci_parsed(self) -> float:
 
         if self.ci_info["bash_commands"]:
             justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."]
-            self.check_results["ci_parsed"] = {"certainty": check_certainty, "justification": justification}
+            self.check_results["ci_parsed"] = DeploySubcheckResults(
+                certainty=check_certainty, justification=justification
+            )
             return check_certainty
         return self.failed_check
 
@@ -139,6 +142,8 @@ def deploy_command(self) -> float:
                     os.path.basename(bash_cmd["CI_path"]),
                 )
 
+                workflow_name = os.path.basename(html_url)
+
                 justification: list[str | dict[str, str]] = [
                     {
                         f"The target repository uses build tool {self.build_tool.name} to deploy": bash_source_link,
@@ -157,6 +162,7 @@ def deploy_command(self) -> float:
                     trigger_link=trigger_link,
                     source_link=bash_source_link,
                     html_url=html_url,
+                    workflow_name=workflow_name,
                 )
 
                 return check_certainty
@@ -251,13 +257,57 @@ def deploy_action(self) -> float:
                         trigger_link=trigger_link,
                         source_link=deploy_action_source_link,
                         html_url=html_url,
+                        workflow_name=workflow_name,
                     )
 
                     return check_certainty
 
         return self.failed_check
 
-    def get_subcheck_results(self, subcheck_name: str) -> dict | DeploySubcheckResults:
+    def workflow_trigger(self, workflow_name: str) -> str:
+        """Check that the workflow is triggered by a valid event."""
+        valid_trigger_events = ["workflow-dispatch", "push", "release"]
+        for callee in self.ci_info["callgraph"].bfs():
+            if callee.name == workflow_name:
+                trigger_events = callee.parsed_obj.get("On", {})
+                for event in trigger_events:
+                    hook = event.get("Hook", {})
+                    trigger_type = str(hook.get("Value", ""))
+                    if trigger_type in valid_trigger_events:
+                        return trigger_type
+        return ""
+
+    def workflow_trigger_deploy_command(self) -> float:
+        """Check the workflow trigger for the required deploy_command workflow file."""
+        check_certainty = 0.9
+        depends_on = [self.deploy_command() > 0.0]
+        if not all(depends_on):
+            return self.failed_check
+
+        workflow_name = self.check_results["deploy_command"].workflow_name
+        if workflow_name:
+            trigger_type = self.workflow_trigger(workflow_name=workflow_name)
+            if trigger_type:
+                logger.info("Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name)
+                return check_certainty
+        return self.failed_check
+
+    def workflow_trigger_deploy_action(self) -> float:
+        """Check the workflow trigger for the required deploy_action workflow file."""
+        check_certainty = 0.9
+        depends_on = [self.deploy_action() > 0.0]
+        if not all(depends_on):
+            return self.failed_check
+
+        workflow_name = self.check_results["deploy_action"].workflow_name
+        if workflow_name:
+            trigger_type = self.workflow_trigger(workflow_name=workflow_name)
+        if trigger_type:
+            logger.info("Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name)
+            return check_certainty
+        return self.failed_check
+
+    def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults:
         """Return the results for a particular subcheck."""
         return self.check_results[subcheck_name]
 
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index 89b1733b2..93aa7324e 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -53,3 +53,27 @@ def deploy_kws_check() -> float:
         The certainty of the check.
     """
     return build_as_code_subcheck_results.deploy_kws()
+
+
+@problog_export("-int")  # type: ignore
+def workflow_trigger_deploy_commmand() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    return build_as_code_subcheck_results.workflow_trigger_deploy_command()
+
+
+@problog_export("-int")  # type: ignore
+def workflow_trigger_deploy_action() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    return build_as_code_subcheck_results.workflow_trigger_deploy_action()

From c7502c6d983bc32dac9eb94c9a871871a3b8206a Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Sat, 10 Jun 2023 12:23:18 +1000
Subject: [PATCH 08/29] chore: store workflow info object for each node in
 GitHub Actions callgraph

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/slsa_analyzer/ci_service/github_actions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions.py b/src/macaron/slsa_analyzer/ci_service/github_actions.py
index 7ae85dee9..7d44f5816 100644
--- a/src/macaron/slsa_analyzer/ci_service/github_actions.py
+++ b/src/macaron/slsa_analyzer/ci_service/github_actions.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module analyzes GitHub Actions CI."""
@@ -170,7 +170,7 @@ def build_call_graph_from_node(self, node: GitHubNode) -> None:
                             name=step["Exec"]["Uses"]["Value"],
                             node_type=GHWorkflowType.EXTERNAL,
                             source_path="",
-                            parsed_obj={},
+                            parsed_obj=step["Exec"],
                             caller_path=node.source_path,
                         )
                     )

From e48003fdabba779f78cc4c2f941001586fc19d17 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Sun, 11 Jun 2023 13:47:56 +1000
Subject: [PATCH 09/29] chore: check that deploy action doesn't have a
 repository url specified

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../slsa_analyzer/checks/build_as_code_subchecks.py  | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index af6813464..67398e470 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -221,6 +221,18 @@ def deploy_action(self) -> float:
                     logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
                     continue
                 if workflow_name in trusted_deploy_actions:
+                    workflow_info = callee.parsed_obj
+                    inputs = workflow_info.get("Inputs", {})
+
+                    # Deployment is to Pypi if there isn't a repository url
+                    if inputs and inputs.get("repository_url"):
+                        logger.debug(
+                            "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...",
+                            callee.name,
+                        )
+                        continue
+
+                    # TODO: all of this logic could be generalized in build_as_code body.
                     trigger_link = self.ci_service.api_client.get_file_link(
                         self.ctx.repo_full_name,
                         self.ctx.commit_sha,

From 0bc69bd0e2955401bc8f1e23f79a15d60927385f Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Mon, 12 Jun 2023 00:51:55 +1000
Subject: [PATCH 10/29] feat: add sub-check for test publish to pypi

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../checks/build_as_code_subchecks.py         | 23 +++++++++++++++++++
 .../checks/problog_predicates.py              | 10 ++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 67398e470..e73afaf6e 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -200,6 +200,29 @@ def deploy_kws(self) -> float:
 
         return self.failed_check
 
+    def test_deploy_action(self, workflow_name: str) -> float:
+        """Check for the use of a test deploy to PyPi given a CI workflow."""
+        check_certainty = 0.7
+        for callee in self.ci_info["callgraph"].bfs():
+            # TODO: figure out a way to generalize this implementation for other external GHAs.
+            # Currently just checks for the pypa/gh-action-pypi-publish action.
+            if not workflow_name or callee.node_type not in [
+                GHWorkflowType.EXTERNAL,
+                GHWorkflowType.REUSABLE,
+            ]:
+                logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
+                continue
+            callee_name = callee.name.split("@")[0]
+
+            if callee_name == workflow_name == "pypa/gh-action-pypi-publish":
+                workflow_info = callee.parsed_obj
+                inputs = workflow_info.get("Inputs", {})
+                repo_url = inputs.get("repository_url", {}).get("Value", {}).get("Value", "")
+                # TODO: Use values that come from defaults.ini rather than hardcoded.
+                if repo_url == "https://test.pypi.org/legacy/":
+                    return check_certainty
+        return self.failed_check
+
     def deploy_action(self) -> float:
         """Check for use of a trusted Github Actions workflow to publish/deploy."""
         # TODO: verify that deployment is legitimate and not a test
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index 93aa7324e..e8c78f609 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -6,6 +6,8 @@
 
 from macaron.slsa_analyzer.checks.build_as_code_subchecks import build_as_code_subcheck_results
 
+FAILED_CHECK = 0.0
+
 
 @problog_export("-int")  # type: ignore
 def ci_parsed_check() -> float:
@@ -68,7 +70,7 @@ def workflow_trigger_deploy_commmand() -> float:
 
 
 @problog_export("-int")  # type: ignore
-def workflow_trigger_deploy_action() -> float:
+def test_deploy_action_check() -> float:
     """Get the value of the subcheck.
 
     Returns
@@ -76,4 +78,8 @@ def workflow_trigger_deploy_action() -> float:
     Certainty
         The certainty of the check.
     """
-    return build_as_code_subcheck_results.workflow_trigger_deploy_action()
+    depends_on = [deploy_action_check() > 0.0]
+    if not all(depends_on):
+        return FAILED_CHECK
+    workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name
+    return build_as_code_subcheck_results.test_deploy_action(workflow_name=workflow_name)

From 8c6f80d47e07cb76c8b787833eccd14fb84df09c Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Mon, 12 Jun 2023 00:57:02 +1000
Subject: [PATCH 11/29] chore: restructure problog predicate functions

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../checks/build_as_code_subchecks.py         | 77 ++++++++++++-------
 .../checks/problog_predicates.py              | 26 ++++++-
 2 files changed, 72 insertions(+), 31 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index e73afaf6e..6651161f9 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -104,6 +104,7 @@ def ci_parsed(self) -> float:
         """Check whether parsing is supported for this CI service's CI config files."""
         check_certainty = 1.0
         # If this check has already been run on this repo, return certainty.
+        logger.info("CI PARSED")
 
         if self.ci_info["bash_commands"]:
             justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."]
@@ -120,6 +121,8 @@ def deploy_command(self) -> float:
         if not all(depends_on):
             return self.failed_check
 
+        logger.info("DEPLOY COMMAND")
+
         for bash_cmd in self.ci_info["bash_commands"]:
             deploy_cmd = has_deploy_command(bash_cmd["commands"], self.build_tool)
             if deploy_cmd:
@@ -299,9 +302,14 @@ def deploy_action(self) -> float:
 
         return self.failed_check
 
-    def workflow_trigger(self, workflow_name: str) -> str:
+    def workflow_trigger(self, workflow_name: str = "") -> float:
         """Check that the workflow is triggered by a valid event."""
+        check_certainty = 0.9
+        if not workflow_name:
+            return self.failed_check
+
         valid_trigger_events = ["workflow-dispatch", "push", "release"]
+
         for callee in self.ci_info["callgraph"].bfs():
             if callee.name == workflow_name:
                 trigger_events = callee.parsed_obj.get("On", {})
@@ -309,38 +317,49 @@ def workflow_trigger(self, workflow_name: str) -> str:
                     hook = event.get("Hook", {})
                     trigger_type = str(hook.get("Value", ""))
                     if trigger_type in valid_trigger_events:
-                        return trigger_type
-        return ""
+                        logger.info(
+                            "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name
+                        )
+                        return check_certainty
+        return self.failed_check
 
-    def workflow_trigger_deploy_command(self) -> float:
-        """Check the workflow trigger for the required deploy_command workflow file."""
-        check_certainty = 0.9
-        depends_on = [self.deploy_command() > 0.0]
-        if not all(depends_on):
-            return self.failed_check
+    # def workflow_uses_secrets(self, ) -> float:
+    #     # TODO: we just want for this specific workflow
+    #     for callee in self.ci_info["callgraph"].bfs():
+    #         workflow_name = callee.name.split("@")[0]
+    #         blah = callee.parsed_obj
 
-        workflow_name = self.check_results["deploy_command"].workflow_name
-        if workflow_name:
-            trigger_type = self.workflow_trigger(workflow_name=workflow_name)
-            if trigger_type:
-                logger.info("Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name)
-                return check_certainty
-        return self.failed_check
+    #         logger.info("WORKFLOW NAME: %s", workflow_name)
+    #         logger.info(blah)
 
-    def workflow_trigger_deploy_action(self) -> float:
-        """Check the workflow trigger for the required deploy_action workflow file."""
-        check_certainty = 0.9
-        depends_on = [self.deploy_action() > 0.0]
-        if not all(depends_on):
-            return self.failed_check
+    #         if not workflow_name or callee.node_type not in [
+    #             GHWorkflowType.EXTERNAL,
+    #             GHWorkflowType.REUSABLE,
+    #         ]:
+    #             logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
+    #             continue
+    #         if workflow_name in trusted_deploy_actions:
+    #             return 0.0
 
-        workflow_name = self.check_results["deploy_action"].workflow_name
-        if workflow_name:
-            trigger_type = self.workflow_trigger(workflow_name=workflow_name)
-        if trigger_type:
-            logger.info("Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name)
-            return check_certainty
-        return self.failed_check
+    # def pypi_publishing_workflow(self, workflow_id):
+    #     depends_on = [self.workflow_trigger_deploy_command() > 0.0 or self.workflow_trigger_deploy_action() > 0.0]
+
+    #     # TODO:
+    #     #   1. Figure out how to get the pypi name etc.
+
+    #     # 1. Get timestamp of the PyPi package
+    #     # To do this, we need the url of the pypi package
+
+    #     # curl returns null if it doesn't exist
+
+    #     # 2. Get timestamp of github workflow run
+    #     # Depends on has_latest_run_passed
+
+    #     # 2. If timestamp of the publishing workflow is close enough, then chance of the workflow
+    #     # being the one to publish the package is high (rather than manual upload).
+
+    #     # http_request
+    #     return
 
     def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults:
         """Return the results for a particular subcheck."""
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index e8c78f609..fe9cebdd6 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -58,7 +58,7 @@ def deploy_kws_check() -> float:
 
 
 @problog_export("-int")  # type: ignore
-def workflow_trigger_deploy_commmand() -> float:
+def workflow_trigger_deploy_command_check() -> float:
     """Get the value of the subcheck.
 
     Returns
@@ -66,7 +66,29 @@ def workflow_trigger_deploy_commmand() -> float:
     Certainty
         The certainty of the check.
     """
-    return build_as_code_subcheck_results.workflow_trigger_deploy_command()
+    depends_on = [deploy_command_check() > 0.0]
+    print(all(depends_on))
+    if not all(depends_on):
+        return FAILED_CHECK
+    workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name
+    return build_as_code_subcheck_results.workflow_trigger(workflow_name=workflow_name)
+
+
+@problog_export("-int")  # type: ignore
+def workflow_trigger_deploy_action_check() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    depends_on = [deploy_action_check() > 0.0]
+    print(all(depends_on))
+    if not all(depends_on):
+        return FAILED_CHECK
+    workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name
+    return build_as_code_subcheck_results.workflow_trigger(workflow_name=workflow_name)
 
 
 @problog_export("-int")  # type: ignore

From 9e80d79dbf0ca61d6767c80ea4a48f629346227e Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Mon, 12 Jun 2023 09:39:08 +1000
Subject: [PATCH 12/29] chore: verify sub-check dependencies in ProbLog
 predicates rather than sub-checks

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../checks/build_as_code_subchecks.py         | 35 +++----------------
 .../checks/problog_predicates.py              |  9 +++++
 2 files changed, 13 insertions(+), 31 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 6651161f9..12ca06cdb 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -117,11 +117,6 @@ def ci_parsed(self) -> float:
     def deploy_command(self) -> float:
         """Check for the use of deploy command to deploy."""
         check_certainty = 0.7
-        depends_on = [self.ci_parsed() > 0.0]
-        if not all(depends_on):
-            return self.failed_check
-
-        logger.info("DEPLOY COMMAND")
 
         for bash_cmd in self.ci_info["bash_commands"]:
             deploy_cmd = has_deploy_command(bash_cmd["commands"], self.build_tool)
@@ -174,11 +169,6 @@ def deploy_command(self) -> float:
     def deploy_kws(self) -> float:
         """Check for the use of deploy keywords to deploy."""
         check_certainty = 0.6
-        depends_on = [self.ci_parsed() == 0.0]
-        # If this check has already been run on this repo, return certainty.
-
-        if not all(depends_on):
-            return self.failed_check
 
         # We currently don't parse these CI configuration files.
         # We just look for a keyword for now.
@@ -228,11 +218,7 @@ def test_deploy_action(self, workflow_name: str) -> float:
 
     def deploy_action(self) -> float:
         """Check for use of a trusted Github Actions workflow to publish/deploy."""
-        # TODO: verify that deployment is legitimate and not a test
         check_certainty = 0.8
-        depends_on = [self.ci_parsed() > 0]
-        if not all(depends_on):
-            return self.failed_check
 
         if isinstance(self.build_tool, Pip):
             trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[])
@@ -251,7 +237,7 @@ def deploy_action(self) -> float:
                     inputs = workflow_info.get("Inputs", {})
 
                     # Deployment is to Pypi if there isn't a repository url
-                    if inputs and inputs.get("repository_url"):
+                    if inputs.get("repository_url"):
                         logger.debug(
                             "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...",
                             callee.name,
@@ -310,6 +296,8 @@ def workflow_trigger(self, workflow_name: str = "") -> float:
 
         valid_trigger_events = ["workflow-dispatch", "push", "release"]
 
+        # TODO: Consider activity types for release, i.e. prereleased
+
         for callee in self.ci_info["callgraph"].bfs():
             if callee.name == workflow_name:
                 trigger_events = callee.parsed_obj.get("On", {})
@@ -324,22 +312,7 @@ def workflow_trigger(self, workflow_name: str = "") -> float:
         return self.failed_check
 
     # def workflow_uses_secrets(self, ) -> float:
-    #     # TODO: we just want for this specific workflow
-    #     for callee in self.ci_info["callgraph"].bfs():
-    #         workflow_name = callee.name.split("@")[0]
-    #         blah = callee.parsed_obj
-
-    #         logger.info("WORKFLOW NAME: %s", workflow_name)
-    #         logger.info(blah)
-
-    #         if not workflow_name or callee.node_type not in [
-    #             GHWorkflowType.EXTERNAL,
-    #             GHWorkflowType.REUSABLE,
-    #         ]:
-    #             logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
-    #             continue
-    #         if workflow_name in trusted_deploy_actions:
-    #             return 0.0
+    #     return
 
     # def pypi_publishing_workflow(self, workflow_id):
     #     depends_on = [self.workflow_trigger_deploy_command() > 0.0 or self.workflow_trigger_deploy_action() > 0.0]
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index fe9cebdd6..95a2ea3e1 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -30,6 +30,9 @@ def deploy_action_check() -> float:
     Certainty
         The certainty of the check.
     """
+    depends_on = [ci_parsed_check() > 0]
+    if not all(depends_on):
+        return FAILED_CHECK
     return build_as_code_subcheck_results.deploy_action()
 
 
@@ -42,6 +45,9 @@ def deploy_command_check() -> float:
     Certainty
         The certainty of the check.
     """
+    depends_on = [ci_parsed_check() > 0.0]
+    if not all(depends_on):
+        return FAILED_CHECK
     return build_as_code_subcheck_results.deploy_command()
 
 
@@ -54,6 +60,9 @@ def deploy_kws_check() -> float:
     Certainty
         The certainty of the check.
     """
+    depends_on = [ci_parsed_check() == 0.0]
+    if not all(depends_on):
+        return FAILED_CHECK
     return build_as_code_subcheck_results.deploy_kws()
 
 

From 74da2c9f193016ec36ec9dea5de9700c96de8509 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Mon, 12 Jun 2023 15:33:15 +1000
Subject: [PATCH 13/29] feat: add API client for PyPI

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../registry_service/api_client.py            | 82 +++++++++++++++++++
 1 file changed, 82 insertions(+)
 create mode 100644 src/macaron/slsa_analyzer/registry_service/api_client.py

diff --git a/src/macaron/slsa_analyzer/registry_service/api_client.py b/src/macaron/slsa_analyzer/registry_service/api_client.py
new file mode 100644
index 000000000..bb610766d
--- /dev/null
+++ b/src/macaron/slsa_analyzer/registry_service/api_client.py
@@ -0,0 +1,82 @@
+# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module provides API clients for Registry services, such as PyPi."""
+
+import logging
+
+from macaron.util import send_get_http
+
+logger: logging.Logger = logging.getLogger(__name__)
+
+# TODO: Create BaseAPIClient
+
+
+class PyPIAPIClient:
+    """This class acts as a client to use PyPi API.
+
+    See https://warehouse.pypa.io/api-reference/ for the PyPI API documentation.
+    """
+
+    _PYPI_API_URL = "https://pypi.org/pypi"
+
+    def get_all_project_data(self, project_name: str) -> dict:
+        """Query PyPi JSON API for the information about an individual project at the latest version.
+
+        The url would be in the following form:
+        ``https://pypi.org/pypi/{project_name}/json``
+
+        Parameters
+        ----------
+        project_name : str
+            The full name of the project (case insensitive).
+
+        Returns
+        -------
+        dict
+            The json query result or an empty dict if failed.
+
+        Examples
+        --------
+        The following call to this method will perform a query to ``https://pypi.org/pypi/flask/json``
+
+        >>> pypi_client.get_all_project_data(
+            project_name="flask"
+        )
+        """
+        logger.debug("Query for project %s 's data", project_name)
+        url = f"{PyPIAPIClient._PYPI_API_URL}/{project_name}/json"
+        response_data = send_get_http(url, {})
+        return response_data
+
+    def get_release_data(self, project_name: str, version: str) -> dict:
+        """Query PyPi JSON API for the information about an individual release at a specific version.
+
+        The url would be in the following form:
+        ``https://pypi.org/pypi/{project_name}/{version}/json``
+
+        Parameters
+        ----------
+        project_name : str
+            The full name of the project (case insensitive).
+        version : str
+            The version of the project in the form ``*.*.*``.
+
+        Returns
+        -------
+        dict
+            The json query result or an empty dict if failed.
+
+        Examples
+        --------
+        The following call to this method will perform a query to ``https://pypi.org/pypi/flask/1.0.0/json``
+
+        >>> pypi_client.get_release_data(
+            project_name="flask",
+            version="1.0.0"
+        )
+        """
+        logger.debug("Query for project %s 's data at version %s", project_name, version)
+        url = f"{PyPIAPIClient._PYPI_API_URL}/{project_name}/{version}/json"
+        response_data = send_get_http(url, {})
+        return response_data

From d9203072a5397756c4a489fc9da9463338f5b46c Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Mon, 12 Jun 2023 17:49:58 +1000
Subject: [PATCH 14/29] chore: get project name from poetry config file

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/slsa_analyzer/build_tool/base_build_tool.py | 1 +
 src/macaron/slsa_analyzer/build_tool/poetry.py          | 8 ++++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py
index 2235b8e5d..64e93d76f 100644
--- a/src/macaron/slsa_analyzer/build_tool/base_build_tool.py
+++ b/src/macaron/slsa_analyzer/build_tool/base_build_tool.py
@@ -79,6 +79,7 @@ def __init__(self, name: str) -> None:
         }
         self.build_log: list[str] = []
         self.wrapper_files: list[str] = []
+        self.project_name: str = ""
 
     def __str__(self) -> str:
         return self.name
diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py
index c101a368d..dd7d276b9 100644
--- a/src/macaron/slsa_analyzer/build_tool/poetry.py
+++ b/src/macaron/slsa_analyzer/build_tool/poetry.py
@@ -64,8 +64,9 @@ def is_detected(self, repo_path: str) -> bool:
 
             if files_detected:
                 # If a package_lock file exists, and a config file is present, Poetry build tool is detected.
+                # TODO: package_lock_exists check removed for now so poetry # tool name is stored.
                 if package_lock_exists:
-                    return True
+                    logger.info("Lock file found.")  # return True
                 # TODO: this implementation assumes one build type, so when multiple build types are supported, this
                 # needs to be updated.
                 # Take the highest level file, if there are two at the same level, take the first in the list.
@@ -76,7 +77,10 @@ def is_detected(self, repo_path: str) -> bool:
                         try:
                             data = tomllib.load(toml_file)
                             # Check for the existence of a [tool.poetry] section.
-                            if ("tool" in data) and ("poetry" in data["tool"]):
+                            poetry_tool = data.get("tool", {}).get("poetry", {})
+                            if poetry_tool:
+                                # Store the project name
+                                self.project_name = poetry_tool.get("name")
                                 return True
                         except tomllib.TOMLDecodeError:
                             logger.error("Failed to read the %s file: invalid toml file.", conf)

From ee392e35cdb192a33e16dfed3ef9e0edd44d2b35 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 13 Jun 2023 02:44:50 +1000
Subject: [PATCH 15/29] chore: extract project name from pip config files

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/slsa_analyzer/build_tool/pip.py | 56 ++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py
index 4abdbd09b..dede70047 100644
--- a/src/macaron/slsa_analyzer/build_tool/pip.py
+++ b/src/macaron/slsa_analyzer/build_tool/pip.py
@@ -6,7 +6,11 @@
 This module is used to work with repositories that use pip for dependency management.
 """
 
+import ast
+import configparser
 import logging
+import os
+import tomllib
 
 from macaron.config.defaults import defaults
 from macaron.dependency_analyzer import DependencyAnalyzer, NoneDependencyAnalyzer
@@ -49,7 +53,57 @@ def is_detected(self, repo_path: str) -> bool:
         """
         for file in self.build_configs:
             if file_exists(repo_path, file):
-                return True
+                # Find project name value from the config file.
+                # TODO: improve this approach.
+                file_path = os.path.join(repo_path, file)
+                file_found = ""
+                if file == "pyproject.toml":
+                    try:
+                        with open(file_path, "rb") as toml_file:
+                            try:
+                                data = tomllib.load(toml_file)
+                                poetry_tool = data.get("tool", {}).get("poetry", {})
+                                if poetry_tool:
+                                    # Store the project name
+                                    self.project_name = poetry_tool.get("name")
+                                    file_found = file
+                            except tomllib.TOMLDecodeError:
+                                logger.error("Failed to read the %s file: invalid toml file.", file)
+                    except FileNotFoundError:
+                        logger.error("Failed to read the %s file.", file)
+
+                if file == "setup.cfg":
+                    config = configparser.ConfigParser()
+                    try:
+                        config.read(file_path, encoding="utf8")
+                        if "metadata" in config and "name" in config["metadata"]:
+                            self.project_name = config["metadata"]["name"]
+                            file_found = file
+                    except (configparser.Error, ValueError) as error:
+                        logger.error("Failed to read the %s file.", file)
+                        logger.error(error)
+
+                if file == "setup.py":
+                    try:
+                        with open(file_path, "rb") as config_file:
+                            content = config_file.read()
+                            tree = ast.parse(content)
+                            for node in ast.walk(tree):
+                                if (
+                                    isinstance(node, ast.Call)
+                                    and isinstance(node.func, ast.Name)
+                                    and node.func.id == "setup"
+                                ):
+                                    for keyword in node.keywords:
+                                        if keyword.arg == "name":
+                                            self.project_name = str(keyword.value)
+                                            file_found = file
+                    except FileNotFoundError:
+                        logger.info("Failed to read the %s file.", file)
+                if self.project_name:
+                    return True
+        if file_found:
+            return True
         return False
 
     def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool:

From 067a2c048d60b0b6aadb13ec0bb0601f6a683ff1 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 13 Jun 2023 02:45:58 +1000
Subject: [PATCH 16/29] chore: setup PyPI registry_service with the PyPI API
 client

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../slsa_analyzer/registry_service/pypi.py    | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 src/macaron/slsa_analyzer/registry_service/pypi.py

diff --git a/src/macaron/slsa_analyzer/registry_service/pypi.py b/src/macaron/slsa_analyzer/registry_service/pypi.py
new file mode 100644
index 000000000..8e822d07b
--- /dev/null
+++ b/src/macaron/slsa_analyzer/registry_service/pypi.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
+
+"""This module contains the spec for the PyPI service."""
+
+from macaron.slsa_analyzer.registry_service.api_client import PyPIAPIClient
+
+
+class PyPI:
+    """This class contains the spec of the PyPI service."""
+
+    def __init__(self) -> None:
+        """Initialize instance."""
+        self._api_client: PyPIAPIClient = None  # type: ignore
+
+    @property
+    def api_client(self) -> PyPIAPIClient:
+        """Return the API client used for querying PyPI API.
+
+        This API is used to check if a PyPI repo can be cloned.
+        """
+        if not self._api_client:
+            self._api_client = PyPIAPIClient()
+
+        return self._api_client

From 6a04c4393c9db4454621d691b56d2640604a5127 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 13 Jun 2023 10:27:14 +1000
Subject: [PATCH 17/29] feat: implement sub-check to compare PyPI project
 timestamp with GHA workflow timestamp

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/slsa_analyzer/build_tool/pip.py   |  4 +-
 .../checks/build_as_code_subchecks.py         | 61 +++++++++++--------
 .../checks/problog_predicates.py              | 25 +++++++-
 3 files changed, 58 insertions(+), 32 deletions(-)

diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py
index dede70047..a23746dd7 100644
--- a/src/macaron/slsa_analyzer/build_tool/pip.py
+++ b/src/macaron/slsa_analyzer/build_tool/pip.py
@@ -77,7 +77,7 @@ def is_detected(self, repo_path: str) -> bool:
                     try:
                         config.read(file_path, encoding="utf8")
                         if "metadata" in config and "name" in config["metadata"]:
-                            self.project_name = config["metadata"]["name"]
+                            self.project_name = str(config["metadata"]["name"])
                             file_found = file
                     except (configparser.Error, ValueError) as error:
                         logger.error("Failed to read the %s file.", file)
@@ -96,7 +96,7 @@ def is_detected(self, repo_path: str) -> bool:
                                 ):
                                     for keyword in node.keywords:
                                         if keyword.arg == "name":
-                                            self.project_name = str(keyword.value)
+                                            self.project_name = ast.literal_eval(keyword.value)
                                             file_found = file
                     except FileNotFoundError:
                         logger.info("Failed to read the %s file.", file)
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 12ca06cdb..5aff490ac 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -17,6 +17,7 @@
 from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI
 from macaron.slsa_analyzer.ci_service.jenkins import Jenkins
 from macaron.slsa_analyzer.ci_service.travis import Travis
+from macaron.slsa_analyzer.registry_service.api_client import PyPIAPIClient
 from macaron.slsa_analyzer.specs.ci_spec import CIInfo
 
 logger: logging.Logger = logging.getLogger(__name__)
@@ -103,9 +104,7 @@ def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None:
     def ci_parsed(self) -> float:
         """Check whether parsing is supported for this CI service's CI config files."""
         check_certainty = 1.0
-        # If this check has already been run on this repo, return certainty.
-        logger.info("CI PARSED")
-
+        # TODO: If this check has already been run on this repo, return certainty.
         if self.ci_info["bash_commands"]:
             justification: list[str | dict[str, str]] = ["The CI workflow files for this CI service are parsed."]
             self.check_results["ci_parsed"] = DeploySubcheckResults(
@@ -193,9 +192,10 @@ def deploy_kws(self) -> float:
 
         return self.failed_check
 
-    def test_deploy_action(self, workflow_name: str) -> float:
+    def test_deploy_action(self, workflow_file: str = "", workflow_name: str = "") -> float:
         """Check for the use of a test deploy to PyPi given a CI workflow."""
         check_certainty = 0.7
+        logger.info("File name: %s", workflow_file)
         for callee in self.ci_info["callgraph"].bfs():
             # TODO: figure out a way to generalize this implementation for other external GHAs.
             # Currently just checks for the pypa/gh-action-pypi-publish action.
@@ -288,10 +288,12 @@ def deploy_action(self) -> float:
 
         return self.failed_check
 
-    def workflow_trigger(self, workflow_name: str = "") -> float:
+    # TODO: workflow_name isn't used as a file in some places!
+
+    def workflow_trigger(self, workflow_file: str = "") -> float:
         """Check that the workflow is triggered by a valid event."""
         check_certainty = 0.9
-        if not workflow_name:
+        if not workflow_file:
             return self.failed_check
 
         valid_trigger_events = ["workflow-dispatch", "push", "release"]
@@ -299,40 +301,45 @@ def workflow_trigger(self, workflow_name: str = "") -> float:
         # TODO: Consider activity types for release, i.e. prereleased
 
         for callee in self.ci_info["callgraph"].bfs():
-            if callee.name == workflow_name:
+            if callee.name == workflow_file:
                 trigger_events = callee.parsed_obj.get("On", {})
                 for event in trigger_events:
                     hook = event.get("Hook", {})
                     trigger_type = str(hook.get("Value", ""))
                     if trigger_type in valid_trigger_events:
                         logger.info(
-                            "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_name
+                            "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_file
                         )
                         return check_certainty
         return self.failed_check
 
-    # def workflow_uses_secrets(self, ) -> float:
-    #     return
-
-    # def pypi_publishing_workflow(self, workflow_id):
-    #     depends_on = [self.workflow_trigger_deploy_command() > 0.0 or self.workflow_trigger_deploy_action() > 0.0]
-
-    #     # TODO:
-    #     #   1. Figure out how to get the pypi name etc.
-
-    #     # 1. Get timestamp of the PyPi package
-    #     # To do this, we need the url of the pypi package
-
-    #     # curl returns null if it doesn't exist
+    def pypi_publishing_workflow(self) -> float:
+        """Compare PyPI release timestamp with GHA publishing workflow timestamps."""
+        check_certainty = 0.5
+        project_name = self.build_tool.project_name
+        pypi_timestamp = ""
+        # Query PyPI API for the timestamp of the latest release.
+        if project_name:
+            api_client = PyPIAPIClient()
+            response = api_client.get_all_project_data(project_name=project_name)
+            latest = response.get("urls", [""])[0]
+            if latest:
+                pypi_timestamp = latest.get("upload_time")
+        if not pypi_timestamp:
+            return self.failed_check
 
-    #     # 2. Get timestamp of github workflow run
-    #     # Depends on has_latest_run_passed
+        # TODO: Collect 10 (?) of the most recent successful workflow runs
+        workflow_data: dict = {}
 
-    #     # 2. If timestamp of the publishing workflow is close enough, then chance of the workflow
-    #     # being the one to publish the package is high (rather than manual upload).
+        workflow_created_timestamp = workflow_data.get("created_at", "")
+        workflow_updated_timestamp = workflow_data.get("updated_at", "")
 
-    #     # http_request
-    #     return
+        # Compare timestamp of most recent PyPI release with several GHAs workflow runs.
+        if workflow_created_timestamp and workflow_updated_timestamp:
+            # TODO: convert into datetime object to compare
+            if workflow_created_timestamp <= pypi_timestamp <= workflow_updated_timestamp:
+                return check_certainty
+        return self.failed_check
 
     def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults:
         """Return the results for a particular subcheck."""
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index 95a2ea3e1..88d10c890 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -2,12 +2,16 @@
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """Contains ProbLog predicates that return the results stored in the BuildAsCodeSubchecks dataclass."""
+import logging
+
 from problog.extern import problog_export
 
 from macaron.slsa_analyzer.checks.build_as_code_subchecks import build_as_code_subcheck_results
 
 FAILED_CHECK = 0.0
 
+logger: logging.Logger = logging.getLogger(__name__)
+
 
 @problog_export("-int")  # type: ignore
 def ci_parsed_check() -> float:
@@ -76,11 +80,10 @@ def workflow_trigger_deploy_command_check() -> float:
         The certainty of the check.
     """
     depends_on = [deploy_command_check() > 0.0]
-    print(all(depends_on))
     if not all(depends_on):
         return FAILED_CHECK
     workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name
-    return build_as_code_subcheck_results.workflow_trigger(workflow_name=workflow_name)
+    return build_as_code_subcheck_results.workflow_trigger(workflow_file=workflow_name)
 
 
 @problog_export("-int")  # type: ignore
@@ -97,7 +100,7 @@ def workflow_trigger_deploy_action_check() -> float:
     if not all(depends_on):
         return FAILED_CHECK
     workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name
-    return build_as_code_subcheck_results.workflow_trigger(workflow_name=workflow_name)
+    return build_as_code_subcheck_results.workflow_trigger(workflow_file=workflow_name)
 
 
 @problog_export("-int")  # type: ignore
@@ -114,3 +117,19 @@ def test_deploy_action_check() -> float:
         return FAILED_CHECK
     workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name
     return build_as_code_subcheck_results.test_deploy_action(workflow_name=workflow_name)
+
+
+@problog_export("-int")  # type: ignore
+def publishing_workflow_check() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    depends_on = [workflow_trigger_deploy_action_check() > 0.0]
+    if not all(depends_on):
+        return FAILED_CHECK
+    # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"]
+    return build_as_code_subcheck_results.pypi_publishing_workflow()

From ddb2d0c6fb88fe7752f4e605b40a60a1eaf8e2ec Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 13 Jun 2023 10:40:02 +1000
Subject: [PATCH 18/29] chore: fix poetry is_detected logic to pass snapshots

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/slsa_analyzer/build_tool/poetry.py              | 6 ++----
 .../slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr | 1 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/macaron/slsa_analyzer/build_tool/poetry.py b/src/macaron/slsa_analyzer/build_tool/poetry.py
index dd7d276b9..8177af1fa 100644
--- a/src/macaron/slsa_analyzer/build_tool/poetry.py
+++ b/src/macaron/slsa_analyzer/build_tool/poetry.py
@@ -84,12 +84,10 @@ def is_detected(self, repo_path: str) -> bool:
                                 return True
                         except tomllib.TOMLDecodeError:
                             logger.error("Failed to read the %s file: invalid toml file.", conf)
-                            return False
-                    return False
                 except FileNotFoundError:
                     logger.error("Failed to read the %s file.", conf)
-                    return False
-
+                if package_lock_exists:
+                    return True
         return False
 
     def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool:
diff --git a/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr b/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr
index 24a7fc494..51b7a8431 100644
--- a/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr
+++ b/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr
@@ -1,7 +1,6 @@
 # serializer version: 1
 # name: test_get_build_dirs[mock_repo0]
   list([
-    PosixPath('.'),
   ])
 # ---
 # name: test_get_build_dirs[mock_repo1]

From 7d9a3aa7e57e819c0360d3e506d2fc6682dba335 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 13 Jun 2023 10:40:50 +1000
Subject: [PATCH 19/29] chore: update poetry snapshot

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr b/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr
index 51b7a8431..24a7fc494 100644
--- a/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr
+++ b/tests/slsa_analyzer/build_tool/__snapshots__/test_poetry.ambr
@@ -1,6 +1,7 @@
 # serializer version: 1
 # name: test_get_build_dirs[mock_repo0]
   list([
+    PosixPath('.'),
   ])
 # ---
 # name: test_get_build_dirs[mock_repo1]

From 08e58bde39fc3ed8663e31c5323b942b22cd9f66 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 20 Jun 2023 09:21:11 +1000
Subject: [PATCH 20/29] chore: remove setup.py file parsing from pip build tool
 detection

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/slsa_analyzer/build_tool/pip.py | 51 ++++-----------------
 1 file changed, 8 insertions(+), 43 deletions(-)

diff --git a/src/macaron/slsa_analyzer/build_tool/pip.py b/src/macaron/slsa_analyzer/build_tool/pip.py
index a23746dd7..0c9865338 100644
--- a/src/macaron/slsa_analyzer/build_tool/pip.py
+++ b/src/macaron/slsa_analyzer/build_tool/pip.py
@@ -6,8 +6,6 @@
 This module is used to work with repositories that use pip for dependency management.
 """
 
-import ast
-import configparser
 import logging
 import os
 import tomllib
@@ -54,56 +52,23 @@ def is_detected(self, repo_path: str) -> bool:
         for file in self.build_configs:
             if file_exists(repo_path, file):
                 # Find project name value from the config file.
-                # TODO: improve this approach.
+                # TODO: improve this approach, support setup.py
                 file_path = os.path.join(repo_path, file)
-                file_found = ""
                 if file == "pyproject.toml":
                     try:
                         with open(file_path, "rb") as toml_file:
                             try:
                                 data = tomllib.load(toml_file)
-                                poetry_tool = data.get("tool", {}).get("poetry", {})
-                                if poetry_tool:
+                                project = data.get("project", {})
+                                if project:
                                     # Store the project name
-                                    self.project_name = poetry_tool.get("name")
-                                    file_found = file
+                                    self.project_name = project.get("name", "")
+                                    logger.info("Package name: %s", self.project_name)
                             except tomllib.TOMLDecodeError:
-                                logger.error("Failed to read the %s file: invalid toml file.", file)
+                                logger.debug("Failed to read the %s file: invalid toml file.", file)
                     except FileNotFoundError:
-                        logger.error("Failed to read the %s file.", file)
-
-                if file == "setup.cfg":
-                    config = configparser.ConfigParser()
-                    try:
-                        config.read(file_path, encoding="utf8")
-                        if "metadata" in config and "name" in config["metadata"]:
-                            self.project_name = str(config["metadata"]["name"])
-                            file_found = file
-                    except (configparser.Error, ValueError) as error:
-                        logger.error("Failed to read the %s file.", file)
-                        logger.error(error)
-
-                if file == "setup.py":
-                    try:
-                        with open(file_path, "rb") as config_file:
-                            content = config_file.read()
-                            tree = ast.parse(content)
-                            for node in ast.walk(tree):
-                                if (
-                                    isinstance(node, ast.Call)
-                                    and isinstance(node.func, ast.Name)
-                                    and node.func.id == "setup"
-                                ):
-                                    for keyword in node.keywords:
-                                        if keyword.arg == "name":
-                                            self.project_name = ast.literal_eval(keyword.value)
-                                            file_found = file
-                    except FileNotFoundError:
-                        logger.info("Failed to read the %s file.", file)
-                if self.project_name:
-                    return True
-        if file_found:
-            return True
+                        logger.debug("Failed to read the %s file.", file)
+                return True
         return False
 
     def prepare_config_files(self, wrapper_path: str, build_dir: str) -> bool:

From 03621ae182c076d58b936f2a91006f8cd5b7f373 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 20 Jun 2023 09:44:10 +1000
Subject: [PATCH 21/29] chore: add evidence to BuildAsCodeTable and update
 ProbLog rules

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../checks/build_as_code_check.py             | 70 ++++++++++++-------
 .../checks/build_as_code_subchecks.py         | 48 ++++++++++---
 .../checks/problog_predicates.py              | 70 +++++++++++++++----
 3 files changed, 138 insertions(+), 50 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index 89bed17fb..23a577751 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -35,6 +35,7 @@ class BuildAsCodeTable(CheckFactsTable, ORMBase):
     deploy_command: Mapped[str] = mapped_column(String, nullable=True)
     build_status_url: Mapped[str] = mapped_column(String, nullable=True)
     confidence_score: Mapped[float] = mapped_column(Float, nullable=True)
+    evidence: Mapped[str] = mapped_column(String, nullable=True)
 
 
 class BuildAsCodeCheck(BaseCheck):
@@ -98,31 +99,36 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                 # ProbLog rules to be evaluated.
                 prolog_string = PrologString(
                     """
-                    :- use_module('src/macaron/slsa_analyzer/checks/problog_predicates.py').
-
-                    A :: ci_parsed :- ci_parsed_check(A).
-                    B :: deploy_action :- deploy_action_check(B).
-                    C :: deploy_command :- deploy_command_check(C).
-                    D :: deploy_kws :- deploy_kws_check(D).
-
-                    0.80 :: deploy_action_certainty :- deploy_action.
-                    0.15 :: deploy_action_certainty :- deploy_action, ci_parsed.
-
-                    0.70 :: deploy_command_certainty :- deploy_command.
-                    0.15 :: deploy_command_certainty :- deploy_command, ci_parsed.
-
-                    0.60 :: deploy_kws_certainty :- deploy_kws.
-
-                    build_as_code_check :- deploy_action_certainty; deploy_command_certainty; deploy_kws_certainty.
-
-                    query(deploy_command_certainty).
-                    query(deploy_action_certainty).
-                    query(deploy_kws_certainty).
-                    query(build_as_code_check).
-                    """
+                :- use_module('src/macaron/slsa_analyzer/checks/problog_predicates.py').
+
+                A :: ci_parsed :- ci_parsed_check(A).
+                B :: deploy_action :- deploy_action_check(B).
+                C :: deploy_command :- deploy_command_check(C).
+                D :: deploy_kws :- deploy_kws_check(D).
+                E :: release_workflow_trigger_deploy_command :- release_workflow_trigger_deploy_command_check(E).
+                F :: release_workflow_trigger_deploy_action :- release_workflow_trigger_deploy_action_check(F).
+                G :: tested_deploy_action :- tested_deploy_action_check(G).
+                H :: publishing_workflow_deploy_command :- publishing_workflow_deploy_command_check(H).
+                I :: publishing_workflow_deploy_action :- publishing_workflow_deploy_action_check(I).
+
+                0.6 :: deploy_action_certainty :- deploy_action.
+                %0.10 :: deploy_action_certainty :- tested_deploy_action.
+                %0.80 :: deploy_action_certainty :- release_workflow_trigger_deploy_action.
+                %0.90 :: deploy_action_certainty :- publishing_workflow_deploy_action.
+
+                0.45 :: deploy_command_certainty :- deploy_command.
+                %0.80 :: deploy_command_certainty :- release_workflow_trigger_deploy_command.
+                %0.90 :: deploy_command_certainty :- publishing_workflow_deploy_command.
+
+                0.60 :: deploy_kws_certainty :- deploy_kws.
+
+                query(deploy_command_certainty).
+                query(deploy_action_certainty).
+                query(deploy_kws_certainty).
+                """
                 )
-
-                build_as_code_subchecks.build_as_code_subcheck_results.workflow_trigger("publish.yaml")
+                # TODO: we want all the logic to be happening inside the rules,
+                # can we make decisions in here instead of intermediate querying?
 
                 # Convert the result dictionary from Term:float to str:float
                 term_result: dict[Term, float] = get_evaluatable().create_from(prolog_string).evaluate()
@@ -133,12 +139,11 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                     "deploy_kws": result["deploy_kws_certainty"],
                 }
                 deploy_methods_valid = {key: value for key, value in deploy_methods.items() if value != 0}
-                confidence_score = result["build_as_code_check"]
-                check_result["confidence_score"] = confidence_score
 
                 if deploy_methods_valid.values():
                     # Determine the deployment method with the highest certainty score.
                     highest_certainty = max(deploy_methods_valid, key=deploy_methods_valid.__getitem__)
+                    highest_certainty_score = deploy_methods[highest_certainty]
                     deploy_method = build_as_code_subchecks.build_as_code_subcheck_results.get_subcheck_results(
                         highest_certainty
                     )
@@ -161,6 +166,15 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                                 predicate["builder"]["id"] = deploy_method.config_name
                                 predicate["invocation"]["configSource"]["entryPoint"] = deploy_method.config_name
 
+                        logger.info(build_as_code_subchecks.build_as_code_subcheck_results.check_results.values())
+
+                        all_evidence = build_as_code_subchecks.build_as_code_subcheck_results.evidence
+
+                        distinct_evidence = [*set(all_evidence)]
+                        ev_string = ", ".join(distinct_evidence)
+                        logger.info("Evidence vals %s", ev_string)
+
+                        confidence_score = round(highest_certainty_score, 4)
                         check_result["result_tables"] = [
                             BuildAsCodeTable(
                                 build_tool_name=build_tool.name,
@@ -169,17 +183,19 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                                 deploy_command=deploy_method.deploy_cmd,
                                 build_status_url=deploy_method.html_url,
                                 confidence_score=confidence_score,
+                                evidence=ev_string,
                             )
                         ]
+                check_result["confidence_score"] = confidence_score
 
                 # TODO: compile all justifications
                 # check_result["justification"].append()
 
                 # TODO: Investigate using proofs
+                logger.info("The certainty of this check passing is: %s", confidence_score)
 
                 # Check whether the confidence score is greater than the minimum threshold for this check.
                 if confidence_score >= self.confidence_score_threshold:
-                    logger.info("The certainty of this check passing is: %s", confidence_score)
                     return CheckResultType.PASSED
 
             pass_msg = f"The target repository does not use {build_tool.name} to deploy."
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 5aff490ac..e22ce4119 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -95,6 +95,7 @@ def __init__(self, ctx: AnalyzeContext, ci_info: CIInfo) -> None:
         self.ci_service = ci_info["service"]
         # Certainty value to be returned if a subcheck fails.
         self.failed_check = 0.0
+        self.evidence: list[str] = []
 
         # TODO: Make subcheck functions available to other checks.
 
@@ -110,12 +111,13 @@ def ci_parsed(self) -> float:
             self.check_results["ci_parsed"] = DeploySubcheckResults(
                 certainty=check_certainty, justification=justification
             )
+            self.evidence.append("ci_parsed")
             return check_certainty
         return self.failed_check
 
     def deploy_command(self) -> float:
         """Check for the use of deploy command to deploy."""
-        check_certainty = 0.7
+        check_certainty = 0.8
 
         for bash_cmd in self.ci_info["bash_commands"]:
             deploy_cmd = has_deploy_command(bash_cmd["commands"], self.build_tool)
@@ -151,6 +153,7 @@ def deploy_command(self) -> float:
                     if html_url
                     else "However, could not find a passing workflow run.",
                 ]
+                self.evidence.append("deploy_command")
 
                 self.check_results["deploy_command"] = DeploySubcheckResults(
                     certainty=check_certainty,
@@ -167,7 +170,7 @@ def deploy_command(self) -> float:
 
     def deploy_kws(self) -> float:
         """Check for the use of deploy keywords to deploy."""
-        check_certainty = 0.6
+        check_certainty = 0.4
 
         # We currently don't parse these CI configuration files.
         # We just look for a keyword for now.
@@ -181,6 +184,7 @@ def deploy_kws(self) -> float:
                         return self.failed_check
 
                     justification: list[str | dict[str, str]] = [f"The target repository uses {deploy_kw} to deploy."]
+                    self.evidence.append("deploy_kws")
 
                     self.check_results["deploy_kws"] = DeploySubcheckResults(
                         certainty=check_certainty,
@@ -192,9 +196,9 @@ def deploy_kws(self) -> float:
 
         return self.failed_check
 
-    def test_deploy_action(self, workflow_file: str = "", workflow_name: str = "") -> float:
+    def tested_deploy_action(self, workflow_file: str = "", workflow_name: str = "") -> float:
         """Check for the use of a test deploy to PyPi given a CI workflow."""
-        check_certainty = 0.7
+        check_certainty = 0.9
         logger.info("File name: %s", workflow_file)
         for callee in self.ci_info["callgraph"].bfs():
             # TODO: figure out a way to generalize this implementation for other external GHAs.
@@ -213,12 +217,13 @@ def test_deploy_action(self, workflow_file: str = "", workflow_name: str = "") -
                 repo_url = inputs.get("repository_url", {}).get("Value", {}).get("Value", "")
                 # TODO: Use values that come from defaults.ini rather than hardcoded.
                 if repo_url == "https://test.pypi.org/legacy/":
+                    self.evidence.append("tested_deploy_action")
                     return check_certainty
         return self.failed_check
 
     def deploy_action(self) -> float:
         """Check for use of a trusted Github Actions workflow to publish/deploy."""
-        check_certainty = 0.8
+        check_certainty = 0.95
 
         if isinstance(self.build_tool, Pip):
             trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[])
@@ -232,11 +237,15 @@ def deploy_action(self) -> float:
                 ]:
                     logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
                     continue
+
+                # TODO
                 if workflow_name in trusted_deploy_actions:
                     workflow_info = callee.parsed_obj
                     inputs = workflow_info.get("Inputs", {})
 
                     # Deployment is to Pypi if there isn't a repository url
+                    # https://packaging.python.org/en/latest/guides/
+                    # publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
                     if inputs.get("repository_url"):
                         logger.debug(
                             "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...",
@@ -274,6 +283,8 @@ def deploy_action(self) -> float:
                         else "However, could not find a passing workflow run.",
                     ]
 
+                    self.evidence.append("deploy_action")
+
                     self.check_results["deploy_action"] = DeploySubcheckResults(
                         certainty=check_certainty,
                         justification=justification,
@@ -290,7 +301,7 @@ def deploy_action(self) -> float:
 
     # TODO: workflow_name isn't used as a file in some places!
 
-    def workflow_trigger(self, workflow_file: str = "") -> float:
+    def release_workflow_trigger(self, workflow_file: str = "") -> float:
         """Check that the workflow is triggered by a valid event."""
         check_certainty = 0.9
         if not workflow_file:
@@ -299,7 +310,6 @@ def workflow_trigger(self, workflow_file: str = "") -> float:
         valid_trigger_events = ["workflow-dispatch", "push", "release"]
 
         # TODO: Consider activity types for release, i.e. prereleased
-
         for callee in self.ci_info["callgraph"].bfs():
             if callee.name == workflow_file:
                 trigger_events = callee.parsed_obj.get("On", {})
@@ -310,12 +320,19 @@ def workflow_trigger(self, workflow_file: str = "") -> float:
                         logger.info(
                             "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_file
                         )
+                        self.evidence.append("release_workflow_trigger")
+                        justification: list[str | dict[str, str]] = [
+                            f"Valid trigger event type {trigger_type} used in workflow: {workflow_file}"
+                        ]
+                        self.check_results["release_workflow_trigger"] = DeploySubcheckResults(
+                            justification=justification
+                        )
                         return check_certainty
         return self.failed_check
 
-    def pypi_publishing_workflow(self) -> float:
+    def pypi_publishing_workflow_timestamp(self) -> float:
         """Compare PyPI release timestamp with GHA publishing workflow timestamps."""
-        check_certainty = 0.5
+        check_certainty = 0.9
         project_name = self.build_tool.project_name
         pypi_timestamp = ""
         # Query PyPI API for the timestamp of the latest release.
@@ -328,8 +345,9 @@ def pypi_publishing_workflow(self) -> float:
         if not pypi_timestamp:
             return self.failed_check
 
-        # TODO: Collect 10 (?) of the most recent successful workflow runs
+        # TODO: Collect 5 of the most recent successful workflow runs
         workflow_data: dict = {}
+        workflow_name = ""
 
         workflow_created_timestamp = workflow_data.get("created_at", "")
         workflow_updated_timestamp = workflow_data.get("updated_at", "")
@@ -338,9 +356,19 @@ def pypi_publishing_workflow(self) -> float:
         if workflow_created_timestamp and workflow_updated_timestamp:
             # TODO: convert into datetime object to compare
             if workflow_created_timestamp <= pypi_timestamp <= workflow_updated_timestamp:
+                self.evidence.append("publish_timestamp")
+                justification: list[str | dict[str, str]] = [
+                    f"The timestamp of workflow {workflow_name} matches with the PyPI package release time."
+                ]
+                self.check_results["publish_timestamp"] = DeploySubcheckResults(justification=justification)
                 return check_certainty
         return self.failed_check
 
+    def step_uses_secrets(self) -> float:
+        """Identify whether a workflow step uses secrets."""
+        check_certainty = 0.85
+        return check_certainty
+
     def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults:
         """Return the results for a particular subcheck."""
         return self.check_results[subcheck_name]
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index 88d10c890..b952c758e 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -22,7 +22,10 @@ def ci_parsed_check() -> float:
     Certainty
         The certainty of the check.
     """
-    return build_as_code_subcheck_results.ci_parsed()
+    subtask = build_as_code_subcheck_results.ci_parsed()
+    if subtask > 0:
+        logger.info("Evidence found: ci_parsed -> %s", subtask)
+    return subtask
 
 
 @problog_export("-int")  # type: ignore
@@ -37,7 +40,10 @@ def deploy_action_check() -> float:
     depends_on = [ci_parsed_check() > 0]
     if not all(depends_on):
         return FAILED_CHECK
-    return build_as_code_subcheck_results.deploy_action()
+    subtask = build_as_code_subcheck_results.deploy_action()
+    if subtask > 0:
+        logger.info("Evidence found: deploy_action -> %s", subtask)
+    return subtask
 
 
 @problog_export("-int")  # type: ignore
@@ -52,7 +58,10 @@ def deploy_command_check() -> float:
     depends_on = [ci_parsed_check() > 0.0]
     if not all(depends_on):
         return FAILED_CHECK
-    return build_as_code_subcheck_results.deploy_command()
+    subtask = build_as_code_subcheck_results.deploy_command()
+    if subtask > 0:
+        logger.info("Evidence found: deploy_command -> %s", subtask)
+    return subtask
 
 
 @problog_export("-int")  # type: ignore
@@ -67,11 +76,14 @@ def deploy_kws_check() -> float:
     depends_on = [ci_parsed_check() == 0.0]
     if not all(depends_on):
         return FAILED_CHECK
-    return build_as_code_subcheck_results.deploy_kws()
+    subtask = build_as_code_subcheck_results.deploy_kws()
+    if subtask > 0:
+        logger.info("Evidence found: deploy_kws -> %s", subtask)
+    return subtask
 
 
 @problog_export("-int")  # type: ignore
-def workflow_trigger_deploy_command_check() -> float:
+def release_workflow_trigger_deploy_command_check() -> float:
     """Get the value of the subcheck.
 
     Returns
@@ -83,11 +95,15 @@ def workflow_trigger_deploy_command_check() -> float:
     if not all(depends_on):
         return FAILED_CHECK
     workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name
-    return build_as_code_subcheck_results.workflow_trigger(workflow_file=workflow_name)
+    subtask = build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name)
+    if subtask > 0:
+        logger.info("Evidence found: release_workflow_trigger_command -> %s", subtask)
+        # build_as_code_subcheck_results.check_results["deploy_command"].sub_tasks["release_workflow_trigger"] = subtask
+    return subtask
 
 
 @problog_export("-int")  # type: ignore
-def workflow_trigger_deploy_action_check() -> float:
+def release_workflow_trigger_deploy_action_check() -> float:
     """Get the value of the subcheck.
 
     Returns
@@ -100,11 +116,14 @@ def workflow_trigger_deploy_action_check() -> float:
     if not all(depends_on):
         return FAILED_CHECK
     workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name
-    return build_as_code_subcheck_results.workflow_trigger(workflow_file=workflow_name)
+    subtask = build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name)
+    if subtask > 0:
+        logger.info("Evidence found: release_workflow_trigger_action -> %s", subtask)
+    return subtask
 
 
 @problog_export("-int")  # type: ignore
-def test_deploy_action_check() -> float:
+def tested_deploy_action_check() -> float:
     """Get the value of the subcheck.
 
     Returns
@@ -116,11 +135,14 @@ def test_deploy_action_check() -> float:
     if not all(depends_on):
         return FAILED_CHECK
     workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name
-    return build_as_code_subcheck_results.test_deploy_action(workflow_name=workflow_name)
+    subtask = build_as_code_subcheck_results.tested_deploy_action(workflow_name=workflow_name)
+    if subtask > 0:
+        logger.info("Evidence found: test_deploy_action -> %s", subtask)
+    return subtask
 
 
 @problog_export("-int")  # type: ignore
-def publishing_workflow_check() -> float:
+def publishing_workflow_deploy_action_check() -> float:
     """Get the value of the subcheck.
 
     Returns
@@ -128,8 +150,30 @@ def publishing_workflow_check() -> float:
     Certainty
         The certainty of the check.
     """
-    depends_on = [workflow_trigger_deploy_action_check() > 0.0]
+    depends_on = [release_workflow_trigger_deploy_action_check()]
     if not all(depends_on):
         return FAILED_CHECK
     # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"]
-    return build_as_code_subcheck_results.pypi_publishing_workflow()
+    subtask = build_as_code_subcheck_results.pypi_publishing_workflow_timestamp()
+    if subtask > 0:
+        logger.info("Evidence found: publishing_workflow_check -> %s", subtask)
+    return subtask
+
+
+@problog_export("-int")  # type: ignore
+def publishing_workflow_deploy_command_check() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    depends_on = [release_workflow_trigger_deploy_command_check() > 0.0]
+    if not all(depends_on):
+        return FAILED_CHECK
+    # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"]
+    subtask = build_as_code_subcheck_results.pypi_publishing_workflow_timestamp()
+    if subtask > 0:
+        logger.info("Evidence found: publishing_workflow_check -> %s", subtask)
+    return subtask

From ca00db49eadf81b27de9e5d42a277e48aaf2a080 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 20 Jun 2023 10:48:54 +1000
Subject: [PATCH 22/29] chore: fix repository_url check

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index e22ce4119..18aa6111c 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -246,7 +246,7 @@ def deploy_action(self) -> float:
                     # Deployment is to Pypi if there isn't a repository url
                     # https://packaging.python.org/en/latest/guides/
                     # publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
-                    if inputs.get("repository_url"):
+                    if inputs and inputs.get("repository_url"):
                         logger.debug(
                             "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...",
                             callee.name,

From c04d887d90f4b5ebe4d4f07092082edeb88bca18 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 20 Jun 2023 10:50:21 +1000
Subject: [PATCH 23/29] chore: fix logging of sub-task results

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../checks/build_as_code_subchecks.py         | 17 +++++--
 .../checks/problog_predicates.py              | 48 +++++--------------
 2 files changed, 25 insertions(+), 40 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 18aa6111c..874b37eed 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -112,6 +112,7 @@ def ci_parsed(self) -> float:
                 certainty=check_certainty, justification=justification
             )
             self.evidence.append("ci_parsed")
+            logger.info("Evidence found: ci_parsed -> %s", check_certainty)
             return check_certainty
         return self.failed_check
 
@@ -154,7 +155,7 @@ def deploy_command(self) -> float:
                     else "However, could not find a passing workflow run.",
                 ]
                 self.evidence.append("deploy_command")
-
+                logger.info("Evidence found: deploy_command -> %s", check_certainty)
                 self.check_results["deploy_command"] = DeploySubcheckResults(
                     certainty=check_certainty,
                     justification=justification,
@@ -192,6 +193,7 @@ def deploy_kws(self) -> float:
                         deploy_cmd=deploy_kw,
                         config_name=config_name,
                     )
+                    logger.info("Evidence found: deploy_kws -> %s", check_certainty)
                     return check_certainty
 
         return self.failed_check
@@ -218,6 +220,7 @@ def tested_deploy_action(self, workflow_file: str = "", workflow_name: str = "")
                 # TODO: Use values that come from defaults.ini rather than hardcoded.
                 if repo_url == "https://test.pypi.org/legacy/":
                     self.evidence.append("tested_deploy_action")
+                    logger.info("Evidence found: tested_deploy_action -> %s", check_certainty)
                     return check_certainty
         return self.failed_check
 
@@ -246,7 +249,8 @@ def deploy_action(self) -> float:
                     # Deployment is to Pypi if there isn't a repository url
                     # https://packaging.python.org/en/latest/guides/
                     # publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
-                    if inputs and inputs.get("repository_url"):
+                    logger.info("inputs")
+                    if inputs and inputs.get("repository_url", ""):
                         logger.debug(
                             "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...",
                             callee.name,
@@ -284,6 +288,7 @@ def deploy_action(self) -> float:
                     ]
 
                     self.evidence.append("deploy_action")
+                    logger.info("Evidence found: deploy_action -> %s", check_certainty)
 
                     self.check_results["deploy_action"] = DeploySubcheckResults(
                         certainty=check_certainty,
@@ -327,6 +332,8 @@ def release_workflow_trigger(self, workflow_file: str = "") -> float:
                         self.check_results["release_workflow_trigger"] = DeploySubcheckResults(
                             justification=justification
                         )
+                        logger.info("Evidence found: release_workflow_trigger -> %s", check_certainty)
+
                         return check_certainty
         return self.failed_check
 
@@ -361,12 +368,16 @@ def pypi_publishing_workflow_timestamp(self) -> float:
                     f"The timestamp of workflow {workflow_name} matches with the PyPI package release time."
                 ]
                 self.check_results["publish_timestamp"] = DeploySubcheckResults(justification=justification)
+                logger.info("Evidence found: publishing_workflow_timestamp -> %s", check_certainty)
                 return check_certainty
+
         return self.failed_check
 
     def step_uses_secrets(self) -> float:
         """Identify whether a workflow step uses secrets."""
-        check_certainty = 0.85
+        check_certainty = 0  # 0.85
+        logger.info("Evidence found: step_secrets -> %s", check_certainty)
+
         return check_certainty
 
     def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults:
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index b952c758e..1dfde62ca 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -12,6 +12,8 @@
 
 logger: logging.Logger = logging.getLogger(__name__)
 
+# TODO: check that a result doesn't already exist before running the check.
+
 
 @problog_export("-int")  # type: ignore
 def ci_parsed_check() -> float:
@@ -22,10 +24,7 @@ def ci_parsed_check() -> float:
     Certainty
         The certainty of the check.
     """
-    subtask = build_as_code_subcheck_results.ci_parsed()
-    if subtask > 0:
-        logger.info("Evidence found: ci_parsed -> %s", subtask)
-    return subtask
+    return build_as_code_subcheck_results.ci_parsed()
 
 
 @problog_export("-int")  # type: ignore
@@ -40,10 +39,7 @@ def deploy_action_check() -> float:
     depends_on = [ci_parsed_check() > 0]
     if not all(depends_on):
         return FAILED_CHECK
-    subtask = build_as_code_subcheck_results.deploy_action()
-    if subtask > 0:
-        logger.info("Evidence found: deploy_action -> %s", subtask)
-    return subtask
+    return build_as_code_subcheck_results.deploy_action()
 
 
 @problog_export("-int")  # type: ignore
@@ -58,10 +54,7 @@ def deploy_command_check() -> float:
     depends_on = [ci_parsed_check() > 0.0]
     if not all(depends_on):
         return FAILED_CHECK
-    subtask = build_as_code_subcheck_results.deploy_command()
-    if subtask > 0:
-        logger.info("Evidence found: deploy_command -> %s", subtask)
-    return subtask
+    return build_as_code_subcheck_results.deploy_command()
 
 
 @problog_export("-int")  # type: ignore
@@ -76,10 +69,7 @@ def deploy_kws_check() -> float:
     depends_on = [ci_parsed_check() == 0.0]
     if not all(depends_on):
         return FAILED_CHECK
-    subtask = build_as_code_subcheck_results.deploy_kws()
-    if subtask > 0:
-        logger.info("Evidence found: deploy_kws -> %s", subtask)
-    return subtask
+    return build_as_code_subcheck_results.deploy_kws()
 
 
 @problog_export("-int")  # type: ignore
@@ -95,11 +85,7 @@ def release_workflow_trigger_deploy_command_check() -> float:
     if not all(depends_on):
         return FAILED_CHECK
     workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name
-    subtask = build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name)
-    if subtask > 0:
-        logger.info("Evidence found: release_workflow_trigger_command -> %s", subtask)
-        # build_as_code_subcheck_results.check_results["deploy_command"].sub_tasks["release_workflow_trigger"] = subtask
-    return subtask
+    return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name)
 
 
 @problog_export("-int")  # type: ignore
@@ -116,10 +102,7 @@ def release_workflow_trigger_deploy_action_check() -> float:
     if not all(depends_on):
         return FAILED_CHECK
     workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name
-    subtask = build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name)
-    if subtask > 0:
-        logger.info("Evidence found: release_workflow_trigger_action -> %s", subtask)
-    return subtask
+    return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name)
 
 
 @problog_export("-int")  # type: ignore
@@ -135,10 +118,7 @@ def tested_deploy_action_check() -> float:
     if not all(depends_on):
         return FAILED_CHECK
     workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name
-    subtask = build_as_code_subcheck_results.tested_deploy_action(workflow_name=workflow_name)
-    if subtask > 0:
-        logger.info("Evidence found: test_deploy_action -> %s", subtask)
-    return subtask
+    return build_as_code_subcheck_results.tested_deploy_action(workflow_name=workflow_name)
 
 
 @problog_export("-int")  # type: ignore
@@ -154,10 +134,7 @@ def publishing_workflow_deploy_action_check() -> float:
     if not all(depends_on):
         return FAILED_CHECK
     # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"]
-    subtask = build_as_code_subcheck_results.pypi_publishing_workflow_timestamp()
-    if subtask > 0:
-        logger.info("Evidence found: publishing_workflow_check -> %s", subtask)
-    return subtask
+    return build_as_code_subcheck_results.pypi_publishing_workflow_timestamp()
 
 
 @problog_export("-int")  # type: ignore
@@ -173,7 +150,4 @@ def publishing_workflow_deploy_command_check() -> float:
     if not all(depends_on):
         return FAILED_CHECK
     # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"]
-    subtask = build_as_code_subcheck_results.pypi_publishing_workflow_timestamp()
-    if subtask > 0:
-        logger.info("Evidence found: publishing_workflow_check -> %s", subtask)
-    return subtask
+    return build_as_code_subcheck_results.pypi_publishing_workflow_timestamp()

From 24be921b7ac33ea8dc07ec608ec861c0a5106f46 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Tue, 20 Jun 2023 10:58:10 +1000
Subject: [PATCH 24/29] chore: update ProbLog rules likelihood values

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../slsa_analyzer/checks/build_as_code_check.py    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index 23a577751..56cb9c4f1 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -111,16 +111,16 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                 H :: publishing_workflow_deploy_command :- publishing_workflow_deploy_command_check(H).
                 I :: publishing_workflow_deploy_action :- publishing_workflow_deploy_action_check(I).
 
-                0.6 :: deploy_action_certainty :- deploy_action.
+                0.8 :: deploy_action_certainty :- deploy_action.
                 %0.10 :: deploy_action_certainty :- tested_deploy_action.
-                %0.80 :: deploy_action_certainty :- release_workflow_trigger_deploy_action.
-                %0.90 :: deploy_action_certainty :- publishing_workflow_deploy_action.
+                %0.85 :: deploy_action_certainty :- release_workflow_trigger_deploy_action.
+                %0.95 :: deploy_action_certainty :- publishing_workflow_deploy_action.
 
-                0.45 :: deploy_command_certainty :- deploy_command.
-                %0.80 :: deploy_command_certainty :- release_workflow_trigger_deploy_command.
-                %0.90 :: deploy_command_certainty :- publishing_workflow_deploy_command.
+                0.75 :: deploy_command_certainty :- deploy_command.
+                %0.85 :: deploy_command_certainty :- release_workflow_trigger_deploy_command.
+                %0.95 :: deploy_command_certainty :- publishing_workflow_deploy_command.
 
-                0.60 :: deploy_kws_certainty :- deploy_kws.
+                0.70 :: deploy_kws_certainty :- deploy_kws.
 
                 query(deploy_command_certainty).
                 query(deploy_action_certainty).

From 5c641b82fcac86c6d8ecfbc2d090e33af424ac59 Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Wed, 21 Jun 2023 00:10:07 +1000
Subject: [PATCH 25/29] feat: add sub-task to check for secrets used in same
 workflow step as deployment method

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/parsers/bashparser.py             |  9 +++-
 .../checks/build_as_code_check.py             |  7 ++-
 .../checks/build_as_code_subchecks.py         | 47 ++++++++++++++++---
 .../checks/problog_predicates.py              | 46 ++++++++++++++++++
 .../ci_service/github_actions.py              |  1 +
 5 files changed, 101 insertions(+), 9 deletions(-)

diff --git a/src/macaron/parsers/bashparser.py b/src/macaron/parsers/bashparser.py
index f7b03d9f5..7f44d52be 100644
--- a/src/macaron/parsers/bashparser.py
+++ b/src/macaron/parsers/bashparser.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022 - 2022, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.
 
 """This module is a Python wrapper for the compiled bashparser binary.
@@ -33,6 +33,7 @@ class BashCommands(TypedDict):
     """CI service type."""
     commands: list[list[str]]
     """Parsed bash commands."""
+    workflow_info: dict
 
 
 def parse_file(file_path: str, macaron_path: str = "") -> dict:
@@ -115,6 +116,7 @@ def extract_bash_from_ci(
     bash_content: str,
     ci_file: str,
     ci_type: str,
+    workflow_info: dict,
     macaron_path: str = "",
     recursive: bool = False,
     repo_path: str = "",
@@ -152,7 +154,9 @@ def extract_bash_from_ci(
     parsed_parent = parse(bash_content)
     caller_commands = parsed_parent.get("commands", [])
     if caller_commands:
-        yield BashCommands(caller_path=ci_file, CI_path=ci_file, CI_type=ci_type, commands=caller_commands)
+        yield BashCommands(
+            caller_path=ci_file, CI_path=ci_file, CI_type=ci_type, commands=caller_commands, workflow_info=workflow_info
+        )
 
     # Parse the bash script files called from the current script.
     if recursive and repo_path:
@@ -171,4 +175,5 @@ def extract_bash_from_ci(
                     CI_path=ci_file,
                     CI_type=ci_type,
                     commands=callee_commands,
+                    workflow_info=workflow_info,
                 )
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index 56cb9c4f1..4a081b753 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -55,7 +55,7 @@ def __init__(self) -> None:
             ("mcn_trusted_builder_level_three_1", CheckResultType.FAILED),
         ]
         eval_reqs = [ReqName.BUILD_AS_CODE]
-        self.confidence_score_threshold = 0.3
+        self.confidence_score_threshold = 0.7
 
         super().__init__(
             check_id="mcn_build_as_code_1",
@@ -110,15 +110,19 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                 G :: tested_deploy_action :- tested_deploy_action_check(G).
                 H :: publishing_workflow_deploy_command :- publishing_workflow_deploy_command_check(H).
                 I :: publishing_workflow_deploy_action :- publishing_workflow_deploy_action_check(I).
+                J :: step_uses_secrets_deploy_action :- step_uses_secrets_deploy_action_check(J).
+                K :: step_uses_secrets_deploy_command :- step_uses_secrets_deploy_command_check(K).
 
                 0.8 :: deploy_action_certainty :- deploy_action.
                 %0.10 :: deploy_action_certainty :- tested_deploy_action.
                 %0.85 :: deploy_action_certainty :- release_workflow_trigger_deploy_action.
                 %0.95 :: deploy_action_certainty :- publishing_workflow_deploy_action.
+                0.65 :: deploy_action_certainty :- step_uses_secrets_deploy_action.
 
                 0.75 :: deploy_command_certainty :- deploy_command.
                 %0.85 :: deploy_command_certainty :- release_workflow_trigger_deploy_command.
                 %0.95 :: deploy_command_certainty :- publishing_workflow_deploy_command.
+                0.65 :: deploy_command_certainty :- step_uses_secrets_deploy_command.
 
                 0.70 :: deploy_kws_certainty :- deploy_kws.
 
@@ -138,6 +142,7 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                     "deploy_action": result["deploy_action_certainty"],
                     "deploy_kws": result["deploy_kws_certainty"],
                 }
+
                 deploy_methods_valid = {key: value for key, value in deploy_methods.items() if value != 0}
 
                 if deploy_methods_valid.values():
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 874b37eed..1f6597695 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -5,6 +5,7 @@
 
 import logging
 import os
+import re
 
 from attr import dataclass
 
@@ -80,6 +81,7 @@ class DeploySubcheckResults:
     html_url: str = ""
     config_name: str = ""
     workflow_name: str = ""
+    workflow_info: dict = {}
 
 
 class BuildAsCodeSubchecks:
@@ -143,6 +145,7 @@ def deploy_command(self) -> float:
                 )
 
                 workflow_name = os.path.basename(html_url)
+                workflow_info = bash_cmd["workflow_info"]
 
                 justification: list[str | dict[str, str]] = [
                     {
@@ -164,6 +167,7 @@ def deploy_command(self) -> float:
                     source_link=bash_source_link,
                     html_url=html_url,
                     workflow_name=workflow_name,
+                    workflow_info=workflow_info,
                 )
 
                 return check_certainty
@@ -249,8 +253,7 @@ def deploy_action(self) -> float:
                     # Deployment is to Pypi if there isn't a repository url
                     # https://packaging.python.org/en/latest/guides/
                     # publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
-                    logger.info("inputs")
-                    if inputs and inputs.get("repository_url", ""):
+                    if inputs and inputs.get("repository_url"):
                         logger.debug(
                             "Workflow %s has a repository url, indicating a non-legit publish to PyPi. Skipping...",
                             callee.name,
@@ -298,6 +301,7 @@ def deploy_action(self) -> float:
                         source_link=deploy_action_source_link,
                         html_url=html_url,
                         workflow_name=workflow_name,
+                        workflow_info=workflow_info,
                     )
 
                     return check_certainty
@@ -373,12 +377,43 @@ def pypi_publishing_workflow_timestamp(self) -> float:
 
         return self.failed_check
 
-    def step_uses_secrets(self) -> float:
+    def step_uses_secrets(self, step_info: dict) -> float:
         """Identify whether a workflow step uses secrets."""
-        check_certainty = 0  # 0.85
-        logger.info("Evidence found: step_secrets -> %s", check_certainty)
+        check_certainty = 0.9
+
+        logger.info("STEP")
+        logger.info(step_info)
+
+        # inputs = step_info.get("Inputs", {})
+        logger.info("inputs: %s", step_info)
+        if self._step_uses_secrets(step_info):
+            self.evidence.append("deploy_step_uses_secrets")
+            logger.info("Evidence found: step_secrets -> %s", check_certainty)
+            justification: list[str | dict[str, str]] = [
+                "The workflow step that contains the deployment method uses secrets."
+            ]
+            self.check_results["step_secrets"] = DeploySubcheckResults(justification=justification)
+            return check_certainty
+        return self.failed_check
 
-        return check_certainty
+    def _step_uses_secrets(self, inputs: dict) -> bool:
+        """Recurse through GitHub Actions syntax tree to find the use of secrets."""
+        for value in inputs.values():
+            if isinstance(value, str):
+                # Match the pattern '${{ content }}'
+                pattern = re.compile(r"\$\{\{([^}]*)\}\}", re.IGNORECASE)
+                match = pattern.match(value)
+                if match is not None:
+                    content = match.group(1).strip()
+                    contents = content.split(".")
+                    # Note that we only support the case: ${{ secrets.TOKEN }} for now.
+                    # Exclude 'secrets.GITHUB_TOKEN'..
+                    if len(contents) == 2 and (contents[0] == "secrets") and (contents[1] != "GITHUB_TOKEN"):
+                        return True
+            elif isinstance(value, dict):
+                if self._step_uses_secrets(value):
+                    return True
+        return False
 
     def get_subcheck_results(self, subcheck_name: str) -> DeploySubcheckResults:
         """Return the results for a particular subcheck."""
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index 1dfde62ca..7140ada1e 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -37,8 +37,12 @@ def deploy_action_check() -> float:
         The certainty of the check.
     """
     depends_on = [ci_parsed_check() > 0]
+    # Verify dependencies and that this check hasn't already been run.
     if not all(depends_on):
         return FAILED_CHECK
+    check = build_as_code_subcheck_results.check_results.get("deploy_action")
+    if check:
+        return check.certainty
     return build_as_code_subcheck_results.deploy_action()
 
 
@@ -52,8 +56,12 @@ def deploy_command_check() -> float:
         The certainty of the check.
     """
     depends_on = [ci_parsed_check() > 0.0]
+    # Verify dependencies and that this check hasn't already been run.
+    check = build_as_code_subcheck_results.check_results.get("deploy_command")
     if not all(depends_on):
         return FAILED_CHECK
+    if check:
+        return check.certainty
     return build_as_code_subcheck_results.deploy_command()
 
 
@@ -151,3 +159,41 @@ def publishing_workflow_deploy_command_check() -> float:
         return FAILED_CHECK
     # workflow_name = build_as_code_subcheck_results.check_results["deploy_action"]
     return build_as_code_subcheck_results.pypi_publishing_workflow_timestamp()
+
+
+@problog_export("-int")  # type: ignore
+def step_uses_secrets_deploy_command_check() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    # TODO: currently we don't store the GHA object during deploy_command_check so
+    # can't perform this sub-task (no workflow_info available).
+    depends_on = [deploy_command_check() > 0.0]
+    if not all(depends_on):
+        return FAILED_CHECK
+    step_info = build_as_code_subcheck_results.check_results["deploy_command"].workflow_info
+    if step_info:
+        return build_as_code_subcheck_results.step_uses_secrets(step_info=step_info)
+    return FAILED_CHECK
+
+
+@problog_export("-int")  # type: ignore
+def step_uses_secrets_deploy_action_check() -> float:
+    """Get the value of the subcheck.
+
+    Returns
+    -------
+    Certainty
+        The certainty of the check.
+    """
+    depends_on = [deploy_action_check() > 0.0]
+    if not all(depends_on):
+        return FAILED_CHECK
+    step_info = build_as_code_subcheck_results.check_results["deploy_action"].workflow_info
+    if step_info:
+        return build_as_code_subcheck_results.step_uses_secrets(step_info=step_info)
+    return FAILED_CHECK
diff --git a/src/macaron/slsa_analyzer/ci_service/github_actions.py b/src/macaron/slsa_analyzer/ci_service/github_actions.py
index 7d44f5816..836b4d0f8 100644
--- a/src/macaron/slsa_analyzer/ci_service/github_actions.py
+++ b/src/macaron/slsa_analyzer/ci_service/github_actions.py
@@ -278,6 +278,7 @@ def extract_all_bash(self, callgraph: CallGraph, macaron_path: str = "") -> Iter
                                 step["Exec"]["Run"]["Value"],
                                 ci_file=self.api_client.get_relative_path_of_workflow(callee.name),
                                 ci_type="github_actions",
+                                workflow_info=step,
                                 recursive=True,
                                 repo_path=callgraph.repo_path,
                                 working_dir=step["Exec"]["WorkingDirectory"] or "",

From 2ebdea2b19efd521d869a8214da8b479e7b7e7ca Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Wed, 21 Jun 2023 10:19:24 +1000
Subject: [PATCH 26/29] chore: store workflow_file in deploy_action and
 deploy_command checks for trigger event type sub-task

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../checks/build_as_code_check.py             |  6 +++---
 .../checks/build_as_code_subchecks.py         | 20 ++++++++++++-------
 .../checks/problog_predicates.py              | 12 ++++++-----
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_check.py b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
index 4a081b753..40d5b7401 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_check.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_check.py
@@ -114,13 +114,13 @@ def run_check(self, ctx: AnalyzeContext, check_result: CheckResult) -> CheckResu
                 K :: step_uses_secrets_deploy_command :- step_uses_secrets_deploy_command_check(K).
 
                 0.8 :: deploy_action_certainty :- deploy_action.
-                %0.10 :: deploy_action_certainty :- tested_deploy_action.
-                %0.85 :: deploy_action_certainty :- release_workflow_trigger_deploy_action.
+                0.10 :: deploy_action_certainty :- tested_deploy_action.
+                0.85 :: deploy_action_certainty :- release_workflow_trigger_deploy_action.
                 %0.95 :: deploy_action_certainty :- publishing_workflow_deploy_action.
                 0.65 :: deploy_action_certainty :- step_uses_secrets_deploy_action.
 
                 0.75 :: deploy_command_certainty :- deploy_command.
-                %0.85 :: deploy_command_certainty :- release_workflow_trigger_deploy_command.
+                0.85 :: deploy_command_certainty :- release_workflow_trigger_deploy_command.
                 %0.95 :: deploy_command_certainty :- publishing_workflow_deploy_command.
                 0.65 :: deploy_command_certainty :- step_uses_secrets_deploy_command.
 
diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 1f6597695..7d0c982a6 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -81,6 +81,7 @@ class DeploySubcheckResults:
     html_url: str = ""
     config_name: str = ""
     workflow_name: str = ""
+    workflow_file: str = ""
     workflow_info: dict = {}
 
 
@@ -144,7 +145,7 @@ def deploy_command(self) -> float:
                     os.path.basename(bash_cmd["CI_path"]),
                 )
 
-                workflow_name = os.path.basename(html_url)
+                workflow_file = os.path.basename(trigger_link)
                 workflow_info = bash_cmd["workflow_info"]
 
                 justification: list[str | dict[str, str]] = [
@@ -166,7 +167,7 @@ def deploy_command(self) -> float:
                     trigger_link=trigger_link,
                     source_link=bash_source_link,
                     html_url=html_url,
-                    workflow_name=workflow_name,
+                    workflow_file=workflow_file,
                     workflow_info=workflow_info,
                 )
 
@@ -220,7 +221,9 @@ def tested_deploy_action(self, workflow_file: str = "", workflow_name: str = "")
             if callee_name == workflow_name == "pypa/gh-action-pypi-publish":
                 workflow_info = callee.parsed_obj
                 inputs = workflow_info.get("Inputs", {})
-                repo_url = inputs.get("repository_url", {}).get("Value", {}).get("Value", "")
+                repo_url = ""
+                if inputs:
+                    repo_url = inputs.get("repository_url", {}).get("Value", {}).get("Value", "")
                 # TODO: Use values that come from defaults.ini rather than hardcoded.
                 if repo_url == "https://test.pypi.org/legacy/":
                     self.evidence.append("tested_deploy_action")
@@ -245,7 +248,6 @@ def deploy_action(self) -> float:
                     logger.debug("Workflow %s is not relevant. Skipping...", callee.name)
                     continue
 
-                # TODO
                 if workflow_name in trusted_deploy_actions:
                     workflow_info = callee.parsed_obj
                     inputs = workflow_info.get("Inputs", {})
@@ -278,6 +280,8 @@ def deploy_action(self) -> float:
                         os.path.basename(callee.caller_path),
                     )
 
+                    workflow_file = os.path.basename(trigger_link)
+
                     # TODO: include in the justification multiple cases of external action usage
                     justification: list[str | dict[str, str]] = [
                         {
@@ -301,6 +305,7 @@ def deploy_action(self) -> float:
                         source_link=deploy_action_source_link,
                         html_url=html_url,
                         workflow_name=workflow_name,
+                        workflow_file=workflow_file,
                         workflow_info=workflow_info,
                     )
 
@@ -313,6 +318,7 @@ def deploy_action(self) -> float:
     def release_workflow_trigger(self, workflow_file: str = "") -> float:
         """Check that the workflow is triggered by a valid event."""
         check_certainty = 0.9
+
         if not workflow_file:
             return self.failed_check
 
@@ -320,11 +326,14 @@ def release_workflow_trigger(self, workflow_file: str = "") -> float:
 
         # TODO: Consider activity types for release, i.e. prereleased
         for callee in self.ci_info["callgraph"].bfs():
+            # Find the workflow file that the deployment method was used in and
+            # extract the trigger event types.
             if callee.name == workflow_file:
                 trigger_events = callee.parsed_obj.get("On", {})
                 for event in trigger_events:
                     hook = event.get("Hook", {})
                     trigger_type = str(hook.get("Value", ""))
+                    # Check that the identified event trigger type is a valid release event.
                     if trigger_type in valid_trigger_events:
                         logger.info(
                             "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_file
@@ -381,9 +390,6 @@ def step_uses_secrets(self, step_info: dict) -> float:
         """Identify whether a workflow step uses secrets."""
         check_certainty = 0.9
 
-        logger.info("STEP")
-        logger.info(step_info)
-
         # inputs = step_info.get("Inputs", {})
         logger.info("inputs: %s", step_info)
         if self._step_uses_secrets(step_info):
diff --git a/src/macaron/slsa_analyzer/checks/problog_predicates.py b/src/macaron/slsa_analyzer/checks/problog_predicates.py
index 7140ada1e..0cbbdff51 100644
--- a/src/macaron/slsa_analyzer/checks/problog_predicates.py
+++ b/src/macaron/slsa_analyzer/checks/problog_predicates.py
@@ -24,6 +24,9 @@ def ci_parsed_check() -> float:
     Certainty
         The certainty of the check.
     """
+    check = build_as_code_subcheck_results.check_results.get("ci_parsed")
+    if check:
+        return check.certainty
     return build_as_code_subcheck_results.ci_parsed()
 
 
@@ -92,8 +95,8 @@ def release_workflow_trigger_deploy_command_check() -> float:
     depends_on = [deploy_command_check() > 0.0]
     if not all(depends_on):
         return FAILED_CHECK
-    workflow_name = build_as_code_subcheck_results.check_results["deploy_command"].workflow_name
-    return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name)
+    workflow_file = build_as_code_subcheck_results.check_results["deploy_command"].workflow_file
+    return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_file)
 
 
 @problog_export("-int")  # type: ignore
@@ -106,11 +109,10 @@ def release_workflow_trigger_deploy_action_check() -> float:
         The certainty of the check.
     """
     depends_on = [deploy_action_check() > 0.0]
-    print(all(depends_on))
     if not all(depends_on):
         return FAILED_CHECK
-    workflow_name = build_as_code_subcheck_results.check_results["deploy_action"].workflow_name
-    return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_name)
+    workflow_file = build_as_code_subcheck_results.check_results["deploy_action"].workflow_file
+    return build_as_code_subcheck_results.release_workflow_trigger(workflow_file=workflow_file)
 
 
 @problog_export("-int")  # type: ignore

From a84c2e4b8e61138fbe2c38dbae276316d1cb947c Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Wed, 21 Jun 2023 17:50:47 +1000
Subject: [PATCH 27/29] chore: add tox -e release as supported deploy tool

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 src/macaron/config/defaults.ini | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/macaron/config/defaults.ini b/src/macaron/config/defaults.ini
index 355dfea56..80c818be8 100644
--- a/src/macaron/config/defaults.ini
+++ b/src/macaron/config/defaults.ini
@@ -237,6 +237,7 @@ publisher =
     twine
     flit
     conda
+    tox
 # These are the Python interpreters that may be used to load modules.
 interpreter =
     python
@@ -250,6 +251,7 @@ build_arg =
 deploy_arg =
     publish
     upload
+    release
 [builder.pip.ci.deploy]
 github_actions = pypa/gh-action-pypi-publish
 

From 48ca217864aef58dc3b2463a9916d55de843aa2a Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Wed, 21 Jun 2023 17:53:02 +1000
Subject: [PATCH 28/29] chore: include Poetry projects for deploy_action check

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../slsa_analyzer/checks/build_as_code_subchecks.py        | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 7d0c982a6..925432a09 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -13,6 +13,7 @@
 from macaron.slsa_analyzer.analyze_context import AnalyzeContext
 from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool
 from macaron.slsa_analyzer.build_tool.pip import Pip
+from macaron.slsa_analyzer.build_tool.poetry import Poetry
 from macaron.slsa_analyzer.ci_service.circleci import CircleCI
 from macaron.slsa_analyzer.ci_service.github_actions import GHWorkflowType
 from macaron.slsa_analyzer.ci_service.gitlab_ci import GitLabCI
@@ -235,7 +236,7 @@ def deploy_action(self) -> float:
         """Check for use of a trusted Github Actions workflow to publish/deploy."""
         check_certainty = 0.95
 
-        if isinstance(self.build_tool, Pip):
+        if isinstance(self.build_tool, (Pip, Poetry)):
             trusted_deploy_actions = defaults.get_list("builder.pip.ci.deploy", "github_actions", fallback=[])
 
             for callee in self.ci_info["callgraph"].bfs():
@@ -336,11 +337,11 @@ def release_workflow_trigger(self, workflow_file: str = "") -> float:
                     # Check that the identified event trigger type is a valid release event.
                     if trigger_type in valid_trigger_events:
                         logger.info(
-                            "Valid trigger event %s found for the workflow file %s.", trigger_type, workflow_file
+                            "Valid trigger event '%s' found for the workflow file: %s.", trigger_type, workflow_file
                         )
                         self.evidence.append("release_workflow_trigger")
                         justification: list[str | dict[str, str]] = [
-                            f"Valid trigger event type {trigger_type} used in workflow: {workflow_file}"
+                            f"Valid trigger event type '{trigger_type}' used in workflow file: {workflow_file}"
                         ]
                         self.check_results["release_workflow_trigger"] = DeploySubcheckResults(
                             justification=justification

From 44b36c87d59b1a83cddc4560d0eec7b28762c1bc Mon Sep 17 00:00:00 2001
From: sophie-bates <sophie.bates@oracle.com>
Date: Fri, 30 Jun 2023 17:16:13 +1000
Subject: [PATCH 29/29] chore: update release workflow trigger sub-task to
 penalize certainty for particular event types

Signed-off-by: sophie-bates <sophie.bates@oracle.com>
---
 .../checks/build_as_code_subchecks.py         | 38 ++++++++++++-------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
index 925432a09..f257e8cc2 100644
--- a/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
+++ b/src/macaron/slsa_analyzer/checks/build_as_code_subchecks.py
@@ -319,11 +319,15 @@ def deploy_action(self) -> float:
     def release_workflow_trigger(self, workflow_file: str = "") -> float:
         """Check that the workflow is triggered by a valid event."""
         check_certainty = 0.9
+        check_certainty_lowered = 0.75
 
         if not workflow_file:
             return self.failed_check
 
-        valid_trigger_events = ["workflow-dispatch", "push", "release"]
+        valid_trigger_events = ["workflow_dispatch", "push", "release", "create"]
+        invalid_trigger_events = ["pull_request"]
+        valid_trigger = [""]
+        invalid_trigger = ""
 
         # TODO: Consider activity types for release, i.e. prereleased
         for callee in self.ci_info["callgraph"].bfs():
@@ -336,19 +340,25 @@ def release_workflow_trigger(self, workflow_file: str = "") -> float:
                     trigger_type = str(hook.get("Value", ""))
                     # Check that the identified event trigger type is a valid release event.
                     if trigger_type in valid_trigger_events:
-                        logger.info(
-                            "Valid trigger event '%s' found for the workflow file: %s.", trigger_type, workflow_file
-                        )
-                        self.evidence.append("release_workflow_trigger")
-                        justification: list[str | dict[str, str]] = [
-                            f"Valid trigger event type '{trigger_type}' used in workflow file: {workflow_file}"
-                        ]
-                        self.check_results["release_workflow_trigger"] = DeploySubcheckResults(
-                            justification=justification
-                        )
-                        logger.info("Evidence found: release_workflow_trigger -> %s", check_certainty)
+                        valid_trigger.append(trigger_type)
+                    if trigger_type in invalid_trigger_events:
+                        invalid_trigger = trigger_type
 
-                        return check_certainty
+                if valid_trigger:
+                    logger.info(
+                        "Valid trigger event '%s' found for the workflow file: %s.", valid_trigger[0], workflow_file
+                    )
+                    self.evidence.append("release_workflow_trigger")
+                    justification: list[str | dict[str, str]] = [
+                        f"Valid trigger event type '{valid_trigger[0]}' used in workflow file: {workflow_file}"
+                    ]
+                    self.check_results["release_workflow_trigger"] = DeploySubcheckResults(justification=justification)
+                    if invalid_trigger:
+                        logger.info("Evidence found: release_workflow_trigger -> %s", check_certainty_lowered)
+                        return check_certainty_lowered
+
+                    logger.info("Evidence found: release_workflow_trigger -> %s", check_certainty)
+                    return check_certainty
         return self.failed_check
 
     def pypi_publishing_workflow_timestamp(self) -> float:
@@ -409,7 +419,7 @@ def _step_uses_secrets(self, inputs: dict) -> bool:
             if isinstance(value, str):
                 # Match the pattern '${{ content }}'
                 pattern = re.compile(r"\$\{\{([^}]*)\}\}", re.IGNORECASE)
-                match = pattern.match(value)
+                match = pattern.search(value)
                 if match is not None:
                     content = match.group(1).strip()
                     contents = content.split(".")