Add Cryptofuzz to CI

This adds a new CI workflow to ACCP which runs Cryptofuzz for a fixed amount of time on each PR and reports any failures. Cryptofuzz is a fuzzing framework backed by LLVM's LibFuzzer project. It performs fuzz testing on cryptographic libraries at the granularity of cryptographic algorithms and also performs differential testing against the cryptographic libraries that it fuzzes. The implementation here is unapologetically derivative of the AWS-LC integration with Cryptofuzz. It follows an architecture reminiscent of our existing CI stacks, but differs in that it runs on a newly defined docker image `amazonlinux-2_clang-11x_cryptofuzz_x86` which uses clang as the compiler and has Cryptofuzz related assets baked into it. Clang is necessary here because LibFuzzer (invoked via -fsantize=fuzzer) is only available from LLVM based compilers. The ACCPGitHubFuzzCIStack spins up a VPC containing an EFS filesystem which contains the outputs of any Cryptofuzz findings as well as a running corpus of interesting inputs discovered over time. Codebuild containers of the fuzzer mount this filesystem at runtime. In the CI workflow, it builds ACCP and "links" it into the Cryptofuzz binary. It then runs Cryptofuzz to completion. We depart from the AWS-LC template by explicitly specifying the algorithms to fuzz because ACCP supports a much smaller subset of algorithms as OpenSSL. Without this, the fuzzer will end up fuzzing OpenSSL more than ACCP due to the random nature of LibFuzzer. To get ACCP working with Cryptofuzz, we adapt the existing Java module in CryptoFuzz to support installing ACCP as a JCE provider. We also configure LSAN suppressions to avoid false positives that would break the build.
corretto · Nov 10, 2023 · c3b0b63 · c3b0b63
1 parent 430b607
commit c3b0b63
Show file tree

Hide file tree

Showing 17 changed files with 149 additions and 296 deletions.
diff --git a/tests/ci/README.md b/tests/ci/README.md
@@ -61,3 +61,12 @@ CI Tool|C Compiler|Java Compiler|CPU platform|OS|Dimensions
 ------------ | -------------| -------------| -------------|-------------|-------------
 CodeBuild|gcc 7|corretto 11|x86-64|Ubuntu 20.04|both FIPS/non-FIPS
 CodeBuild|gcc 7|corretto 11|aarch|Ubuntu 20.04|both FIPS/non-FIPS
+
+
+### Cryptofuzz
+
+Each change is built and tested with [Cryptofuzz](https://github.com/guidovranken/cryptofuzz) for an hour. A seed corpus
+is included in tests/docker_images/cryptofuzz_data.zip. As new inputs are found they are saved in a shared corpus across
+runs in AWS EFS. Cryptofuzz is built with 2 modules:
+* ACCP
+* OpenSSL
diff --git a/tests/ci/cdk/app.py b/tests/ci/cdk/app.py
@@ -6,6 +6,7 @@
 from aws_cdk import core
 
 from cdk.accp_github_ci_stack import ACCPGitHubCIStack
+from cdk.accp_github_fuzz_ci_stack import ACCPGitHubFuzzCIStack
 from cdk.linux_docker_image_batch_build_stack import LinuxDockerImageBatchBuildStack
 from cdk.windows_docker_image_build_stack import WindowsDockerImageBuildStack
 from cdk.ecr_stack import EcrStack
@@ -33,6 +34,8 @@
 ACCPGitHubCIStack(app, "accp-ci-pr-integration-linux-x86", LINUX_ECR_REPO, x86_build_spec_file, env=env)
 arm_build_spec_file = "./cdk/codebuild/pr_integration_linux_arm_omnibus.yaml"
 ACCPGitHubCIStack(app, "accp-ci-pr-integration-linux-arm", LINUX_ECR_REPO, arm_build_spec_file, env=env)
+fuzz_build_spec_file = "cdk/codebuild/pr_fuzzing_omnibus.yaml"
+ACCPGitHubFuzzCIStack(app, "accp-ci-fuzzing", LINUX_ECR_REPO, fuzz_build_spec_file, env=env)
 
 # TODO: Renable the code below when ACCP adds support for Windows.
 # Issue: https://github.com/corretto/amazon-corretto-crypto-provider/issues/48

diff --git a/tests/ci/cdk/cdk/accp_github_fuzz_ci_stack.py b/tests/ci/cdk/cdk/accp_github_fuzz_ci_stack.py
@@ -1,23 +1,20 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0 OR ISC
 
-from aws_cdk import Duration, Size, Stack, aws_codebuild as codebuild, aws_iam as iam, aws_ec2 as ec2, aws_efs as efs
-from constructs import Construct
 
-from cdk.components import PruneStaleGitHubBuilds
+from aws_cdk import core, aws_codebuild as codebuild, aws_iam as iam, aws_ec2 as ec2, aws_efs as efs
 from util.ecr_util import ecr_arn
-from util.iam_policies import code_build_batch_policy_in_json, \
-    code_build_publish_metrics_in_json
-from util.metadata import AWS_ACCOUNT, AWS_REGION, GITHUB_PUSH_CI_BRANCH_TARGETS, GITHUB_REPO_OWNER, GITHUB_REPO_NAME
-from util.build_spec_loader import BuildSpecLoader
+from util.iam_policies import code_build_batch_policy_in_json
+from util.metadata import AWS_ACCOUNT, AWS_REGION, GITHUB_BRANCH_EXCLUDE_CI, GITHUB_REPO_OWNER, GITHUB_REPO_NAME
+from util.yml_loader import YmlLoader
 
-
-class AwsLcGitHubFuzzCIStack(Stack):
-    """Define a stack used to batch execute AWS-LC tests in GitHub."""
+class ACCPGitHubFuzzCIStack(core.Stack):
+    """Define a stack used to batch execute ACCP tests in GitHub."""
 
     def __init__(self,
-                 scope: Construct,
+                 scope: core.Construct,
                  id: str,
+                 ecr_repo_name: str,
                  spec_file_path: str,
                  **kwargs) -> None:
         super().__init__(scope, id, **kwargs)
@@ -27,31 +24,30 @@ def __init__(self,
             owner=GITHUB_REPO_OWNER,
             repo=GITHUB_REPO_NAME,
             webhook=True,
+            fetch_submodules=True,
             webhook_filters=[
                 codebuild.FilterGroup.in_event_of(
+                    codebuild.EventAction.PULL_REQUEST_MERGED,
                     codebuild.EventAction.PULL_REQUEST_CREATED,
                     codebuild.EventAction.PULL_REQUEST_UPDATED,
-                    codebuild.EventAction.PULL_REQUEST_REOPENED),
-                codebuild.FilterGroup.in_event_of(codebuild.EventAction.PUSH).and_branch_is(
-                    GITHUB_PUSH_CI_BRANCH_TARGETS),
+                    codebuild.EventAction.PULL_REQUEST_REOPENED)
+                .and_base_branch_is_not(GITHUB_BRANCH_EXCLUDE_CI)
             ],
             webhook_triggers_batch_build=True)
 
         # Define a IAM role for this stack.
         code_build_batch_policy = iam.PolicyDocument.from_json(
             code_build_batch_policy_in_json([id])
         )
-        fuzz_policy = iam.PolicyDocument.from_json(code_build_publish_metrics_in_json())
-        inline_policies = {"code_build_batch_policy": code_build_batch_policy,
-                           "fuzz_policy": fuzz_policy}
+        inline_policies = {"code_build_batch_policy": code_build_batch_policy}
         role = iam.Role(scope=self,
                         id="{}-role".format(id),
                         assumed_by=iam.ServicePrincipal("codebuild.amazonaws.com"),
                         inline_policies=inline_policies)
 
         # Create the VPC for EFS and CodeBuild
         public_subnet = ec2.SubnetConfiguration(name="PublicFuzzingSubnet", subnet_type=ec2.SubnetType.PUBLIC)
-        private_subnet = ec2.SubnetConfiguration(name="PrivateFuzzingSubnet", subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS)
+        private_subnet = ec2.SubnetConfiguration(name="PrivateFuzzingSubnet", subnet_type=ec2.SubnetType.PRIVATE)
 
         # Create a VPC with a single public and private subnet in a single AZ. This is to avoid the elastic IP limit
         # being used up by a bunch of idle NAT gateways
@@ -73,7 +69,7 @@ def __init__(self,
             description="Allow all traffic inside security group"
         )
 
-        efs_subnet_selection = ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE_WITH_EGRESS)
+        efs_subnet_selection = ec2.SubnetSelection(subnet_type=ec2.SubnetType.PRIVATE)
 
         # Create the EFS to store the corpus and logs. EFS allows new filesystems to burst to 100 MB/s for the first 2
         # TB of data read/written, after that the rate is limited based on the size of the filesystem. As of late
@@ -86,46 +82,41 @@ def __init__(self,
         fuzz_filesystem = efs.FileSystem(
             scope=self,
             id="{}-FuzzingEFS".format(id),
-            file_system_name="AWS-LC-Fuzz-Corpus",
+            file_system_name="ACCP-Fuzz-Corpus",
             enable_automatic_backups=True,
             encrypted=True,
             security_group=build_security_group,
             vpc=fuzz_vpc,
             vpc_subnets=efs_subnet_selection,
             performance_mode=efs.PerformanceMode.GENERAL_PURPOSE,
             throughput_mode=efs.ThroughputMode.PROVISIONED,
-            provisioned_throughput_per_second=Size.mebibytes(100),
+            provisioned_throughput_per_second=core.Size.mebibytes(100),
         )
 
+        placeholder_map = {"ECR_REPO_PLACEHOLDER": ecr_arn(ecr_repo_name)}
+        build_spec_content = YmlLoader.load(spec_file_path, placeholder_map)
+
+        # The EFS identifier needs to match tests/ci/common_fuzz.sh, CodeBuild defines an environment variable named
+        # codebuild_$identifier.
+        # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-codebuild-project-projectfilesystemlocation.html
+        efs_location = codebuild.FileSystemLocation.efs(
+            identifier="fuzzing_root",
+            location="%s.efs.%s.amazonaws.com:/" % (fuzz_filesystem.file_system_id, AWS_REGION),
+            mount_point="/efs_fuzzing_root")
         # Define CodeBuild.
         fuzz_codebuild = codebuild.Project(
             scope=self,
             id="FuzzingCodeBuild",
             project_name=id,
             source=git_hub_source,
             role=role,
-            timeout=Duration.minutes(120),
+            timeout=core.Duration.minutes(120),
             environment=codebuild.BuildEnvironment(compute_type=codebuild.ComputeType.LARGE,
                                                    privileged=True,
                                                    build_image=codebuild.LinuxBuildImage.STANDARD_4_0),
-            build_spec=BuildSpecLoader.load(spec_file_path),
+            build_spec=codebuild.BuildSpec.from_object(build_spec_content),
             vpc=fuzz_vpc,
-            security_groups=[build_security_group])
+            security_groups=[build_security_group],
+            file_system_locations=[efs_location]
+        )
         fuzz_codebuild.enable_batch_builds()
-
-        # CDK raw overrides: https://docs.aws.amazon.com/cdk/latest/guide/cfn_layer.html#cfn_layer_raw
-        # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-codebuild-project.html#aws-resource-codebuild-project-properties
-        # The EFS identifier needs to match tests/ci/common_fuzz.sh, CodeBuild defines an environment variable named
-        # codebuild_$identifier.
-        # https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-codebuild-project-projectfilesystemlocation.html
-        #
-        # TODO: add this to the CDK project above when it supports EfsFileSystemLocation
-        cfn_codebuild = fuzz_codebuild.node.default_child
-        cfn_codebuild.add_override("Properties.FileSystemLocations", [{
-          "Identifier": "fuzzing_root",
-          "Location": "%s.efs.%s.amazonaws.com:/" % (fuzz_filesystem.file_system_id, AWS_REGION),
-          "MountPoint": "/efs_fuzzing_root",
-          "Type": "EFS"
-        }])
-
-        PruneStaleGitHubBuilds(scope=self, id="PruneStaleGitHubBuilds", project=fuzz_codebuild)
diff --git a/tests/ci/cdk/cdk/codebuild/pr_fuzzing_omnibus.yaml b/tests/ci/cdk/cdk/codebuild/pr_fuzzing_omnibus.yaml
@@ -0,0 +1,15 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0 OR ISC
+
+version: 0.2
+
+# Doc for batch https://docs.aws.amazon.com/codebuild/latest/userguide/batch-build-buildspec.html#build-spec.batch.build-list
+batch:
+  build-list:
+    - identifier: amazonlinux2_clang11x_cryptofuzz_x86
+      buildspec: ./tests/ci/codebuild/run_accp_cryptofuzz.yml
+      env:
+        type: LINUX_CONTAINER
+        privileged-mode: true
+        compute-type: BUILD_GENERAL1_LARGE
+        image: ECR_REPO_PLACEHOLDER:amazonlinux-2_clang-11x_cryptofuzz_x86_latest
diff --git a/tests/ci/codebuild/run_accp_cryptofuzz.yml b/tests/ci/codebuild/run_accp_cryptofuzz.yml
@@ -0,0 +1,13 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0 OR ISC
+
+version: 0.2
+
+env:
+  variables:
+    GOPROXY: https://proxy.golang.org,direct
+
+phases:
+  build:
+    commands:
+      - "tests/ci/run_cryptofuzz.sh"
diff --git a/tests/ci/common_fuzz.sh b/tests/ci/common_fuzz.sh
@@ -23,20 +23,6 @@ SHARED_FAILURE_ROOT="${CORPUS_ROOT}/runs/${DATE_NOW}/${BUILD_ID}"
 LOCAL_RUN_ROOT="${BUILD_ROOT}/fuzz_run_root"
 rm -rf "$LOCAL_RUN_ROOT"
 
-function put_metric_count {
-  put_metric --unit Count "$@"
-}
-
-function put_metric {
-  # This call to publish the metric could fail but we don't want to fail the build +e turns off exit on error
-  set +e
-  aws cloudwatch put-metric-data \
-    --namespace AWS-LC-Fuzz \
-    "$@" || echo "Publishing metric failed, continuing with the rest of the build"
-  # Turn it back on for the rest of the build
-  set -e
-}
-
 function run_fuzz_test {
   SHARED_FUZZ_TEST_CORPUS="${CORPUS_ROOT}/shared_corpus/${FUZZ_NAME}/shared_corpus"
   LOCAL_FUZZ_TEST_ROOT="${LOCAL_RUN_ROOT}/${FUZZ_NAME}"
@@ -54,7 +40,6 @@ function run_fuzz_test {
   # as the SharedCorpusFileCount, which it basically everything in SHARED_FUZZ_TEST_CORPUS was just copied to
   # LOCAL_SHARED_CORPUS
   ORIGINAL_CORPUS_FILE_COUNT=$(find "$LOCAL_SHARED_CORPUS" -type f | wc -l)
-  put_metric_count --metric-name SharedCorpusFileCount --value "$ORIGINAL_CORPUS_FILE_COUNT" --dimensions "FuzzTest=$FUZZ_NAME"
 
   # Perform the actual fuzzing!
   # Step 1 run each fuzz test for the determined time. This will use the existing shared corpus copied from EFS to
@@ -81,6 +66,7 @@ function run_fuzz_test {
   time "${FUZZ_TEST_PATH}" -rss_limit_mb=${MEM_USAGE_LIMIT} -print_final_stats=1 -timeout="$FUZZ_TEST_TIMEOUT" -max_total_time="$TIME_FOR_EACH_FUZZ" \
     -jobs="$NUM_CPU_THREADS" -workers="$NUM_CPU_THREADS" \
     -artifact_prefix="$LOCAL_ARTIFACTS_FOLDER/" \
+    ${FUZZ_TEST_ADDITIONAL_ARGS} \
     "$LOCAL_RUN_CORPUS" "$LOCAL_SHARED_CORPUS" "$SRC_CORPUS" 2>&1 | tee "$SUMMARY_LOG"
   # This gets the status of the fuzz run which determines if we want to fail the build or not, otherwise we'd get the results of tee
   if [ "${PIPESTATUS[0]}" == 1 ]; then
@@ -123,22 +109,10 @@ function run_fuzz_test {
   # Calculate interesting metrics and post results to CloudWatch, this checks the shared (EFS) corpus after the new test
   # run corpus has been merged in
   FINAL_SHARED_CORPUS_FILE_COUNT=$(find "$SHARED_FUZZ_TEST_CORPUS" -type f | wc -l)
-  put_metric_count --metric-name SharedCorpusFileCount --value "$FINAL_SHARED_CORPUS_FILE_COUNT" --dimensions "FuzzTest=$FUZZ_NAME"
-
   RUN_CORPUS_FILE_COUNT=$(find "$LOCAL_RUN_CORPUS" -type f | wc -l)
-  put_metric_count --metric-name RunCorpusFileCount --value "$RUN_CORPUS_FILE_COUNT" --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"
-
   TEST_COUNT=$(grep -o "stat::number_of_executed_units: [0-9]*" "$SUMMARY_LOG" | awk '{test_count += $2} END {print test_count}')
-  put_metric_count --metric-name TestCount --value "$TEST_COUNT" --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"
-
   TESTS_PER_SECOND=$((TEST_COUNT/TIME_FOR_EACH_FUZZ))
-  put_metric --metric-name TestRate --value "$TESTS_PER_SECOND" --unit Count/Second --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"
-
   FEATURE_COVERAGE=$(grep -o "ft: [0-9]*" "$SUMMARY_LOG" | awk '{print $2}' | sort -n | tail -1)
-  put_metric_count --metric-name FeatureCoverage --value "$FEATURE_COVERAGE" --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"
-
   BLOCK_COVERAGE=$(grep -o "cov: [0-9]*" "$SUMMARY_LOG" | awk '{print $2}' | sort -n | tail -1)
-  put_metric_count --metric-name BlockCoverage --value "$BLOCK_COVERAGE" --dimensions "FuzzTest=$FUZZ_NAME,Platform=$PLATFORM"
-
   echo "${FUZZ_NAME} starting shared ${ORIGINAL_CORPUS_FILE_COUNT} final shared ${FINAL_SHARED_CORPUS_FILE_COUNT} new files ${RUN_CORPUS_FILE_COUNT} total test count ${TEST_COUNT} test rate ${TESTS_PER_SECOND} code coverage ${BLOCK_COVERAGE} feature coverage ${FEATURE_COVERAGE}"
 }