diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index e7c6d95a68..ca01225313 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,5 +1,15 @@
+variables:
+  GCP_AUTH_JSON: $gcpAuthJson
+  AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID
+  AWS_DEFAULT_REGION: $AWS_DEFAULT_REGION
+  AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY
+  IS_IN_CI: "1"
+
 stages:
   - prepare-auth
+  - build-docker
+  - test
+
 
 update_token:
   image:
@@ -9,5 +19,76 @@ update_token:
   before_script:
     - aws --version
   script:
-    - echo hello
+    - USER=AWS
+    - TOKEN=$(aws ecr get-login-password)
+    - AUTH=$(echo -n "$USER:$TOKEN" | base64 | tr -d "\n")
+    - |
+      set -eux
+      curl --request PUT --header "PRIVATE-TOKEN: $GITLAB_PERSONAL_ACCESS_TOKEN" \
+      --silent --output /dev/null --show-error --fail \
+      "https://gitlab.com/api/v4/projects/$CI_PROJECT_ID/variables/AWS_ECR_AUTH" --form "value=$AUTH"
+  only:
+    variables:
+      - $CI_PIPELINE_SOURCE == "web"
+      - $CI_COMMIT_REF_NAME == "main"
+
+docker_build:
+  stage: build-docker
+  image: docker:20
+  dependencies:
+    - update_token
+  tags:
+    - cpu-sole-tenant
+  script:
+    - apk add --update py-pip
+    - pip install awscli
+    - NO_CACHE=1 sh ./scorecard/docker/build.sh
+    # Generate a test suite ID to be used in later runs so all the concurrent results can be grouped together
+    - sh -c 'TEST_SUITE_ID=$(tr -dc a-z </dev/urandom | head -c 5 ; echo ''); echo "TEST_SUITE_ID=$TEST_SUITE_ID" >> output.env'
+  variables:
+    DOCKER_HOST: dind-service.kube-system.svc.cluster.local:2375
+    PUSH_TO_ECR: 1
+    GIT_COMMIT_SHA: $CI_COMMIT_SHA
+  artifacts:
+    reports:
+      dotenv: output.env
+
+.benchmark_template: &benchmark_template
+  tags:
+    - gpu-triton
+  stage: test
+  dependencies:
+    - docker_build
+  image:
+    # name: 186900524924.dkr.ecr.us-west-2.amazonaws.com/scorecard:2023-03-10-b4fb5b6
+    name: 186900524924.dkr.ecr.us-west-2.amazonaws.com/scorecard:$TAG
+  script: |
+    set -eux
+    ls
+    ./scorecard/scripts/show_node_info.sh
+    mkdir model-data
+    echo "$GCP_AUTH_JSON" > gcp_auth.json
+    export UPLOAD_GCP=1
+    export TEST_RUNS=10
+    export WARMUP_RUNS=3
+    pytest --tb=native -rA -v -s -q scorecard/relax-coverage/ -k "$PYTEST_FILTER"
+
+benchmarks-baseline:
+  <<: *benchmark_template
+  variables:
+    PYTEST_FILTER: onnx-trt
+
+benchmarks-relax:
+  <<: *benchmark_template
+  variables:
+    PYTEST_FILTER: relax-cuda and not stable-diffusion
+
+benchmarks-relax-sd-unet:
+  <<: *benchmark_template
+  variables:
+    PYTEST_FILTER: relax-cuda and stable-diffusion and unet
 
+benchmarks-relax-sd-vae:
+  <<: *benchmark_template
+  variables:
+    PYTEST_FILTER: relax-cuda and stable-diffusion and vae
diff --git a/scorecard/Makefile b/scorecard/Makefile
new file mode 100644
index 0000000000..53d5375cc6
--- /dev/null
+++ b/scorecard/Makefile
@@ -0,0 +1,17 @@
+TEST_DATA ?= build/testdata.jsonl
+
+clean:
+	rm -rf build
+
+build/venv-created.touch: poetry.lock pyproject.toml
+	poetry install
+	mkdir -p build
+	touch build/venv-created.touch
+
+prepare_testdata: build/venv-created.touch testdata/*.jsonc schema/schema.jsonschema relax_scorecard/*.py
+	mkdir -p build
+	poetry run python3 -m relax_scorecard.concat_testdata --schema schema/schema.jsonschema testdata/*.jsonc >"${TEST_DATA}"
+	@echo "Prepared testdata in ${TEST_DATA}"
+
+
+.DEFAULT_GOAL = prepare_testdata
diff --git a/scorecard/bashrc.sh b/scorecard/bashrc.sh
new file mode 100644
index 0000000000..7719d4e159
--- /dev/null
+++ b/scorecard/bashrc.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+echo "scorecard Docker image
+
+ensure S3 credentials are set up (ask the team to get a new set):
+export AWS_ACCESS_KEY_ID=...
+export AWS_SECRET_ACCESS_KEY=...
+
+run tests with:
+pytest --tb=native -v -s -q relax-coverage
+"
diff --git a/scorecard/docker/Dockerfile.auth-test b/scorecard/docker/Dockerfile.auth-test
new file mode 100644
index 0000000000..5589580043
--- /dev/null
+++ b/scorecard/docker/Dockerfile.auth-test
@@ -0,0 +1,5 @@
+FROM scratch
+
+COPY docker/Dockerfile.auth-test /
+
+ARG TVM_BUILT_AT
diff --git a/scorecard/docker/Dockerfile.scorecard b/scorecard/docker/Dockerfile.scorecard
new file mode 100644
index 0000000000..26039890e3
--- /dev/null
+++ b/scorecard/docker/Dockerfile.scorecard
@@ -0,0 +1,93 @@
+# TensorRT image
+# uses CUDA 11.7
+FROM nvcr.io/nvidia/tensorrt:22.12-py3
+# FROM nvcr.io/nvidia/tensorrt:23.02-py3  # uses CUDA 12.0, not supported on the gpu-triton runners' CUDA driver
+
+# CUDA images (requires us to manually install tensorrt)
+# FROM nvidia/cuda:11.7.1-devel-ubuntu22.04
+# FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
+
+WORKDIR /opt/scorecard
+
+RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y \
+    build-essential \
+    curl \
+    fish \
+    git \
+    python3 \
+    libpq-dev \
+    postgresql \
+    postgresql-contrib \
+    python3-dev \
+    python3-pip \
+    sudo \
+    vim \
+    wget \
+    ;
+
+# llvm
+RUN echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main" >> /etc/apt/sources.list
+RUN echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main" >> /etc/apt/sources.list
+RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add
+RUN apt update && apt install -y libllvm-15-ocaml-dev \
+    libllvm15 \
+    llvm-15 \
+    llvm-15-dev \
+    llvm-15-runtime
+
+# python dependencies
+RUN python3 -m pip install --no-cache-dir \
+    cmake \
+    commentjson==0.9.0 \
+    google-cloud-bigquery==3.5.0 \
+    jinja2 \
+    jsonschema==4.17.3 \
+    ninja \
+    nvidia-tensorrt \
+    onnx \
+    onnxruntime-gpu \
+    psycopg2==2.9.5 \
+    pytest \
+    pytest-xdist \
+    pyyaml \
+    tabulate==0.9.0 \
+    torch \
+    typing_extensions \
+    xgboost \
+    ;
+
+RUN python3 -m pip --no-cache-dir install onnx_graphsurgeon==0.3.26 --index-url https://pypi.ngc.nvidia.com --no-deps
+
+# onnx nightly
+RUN mkdir /opt/onnx_nightly
+RUN PYTHONUSERBASE=/opt/onnx_nightly pip install --user \
+    --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ \
+    # ort-nightly \
+    ort-nightly-gpu
+ENV ONNX_NIGHTLY_PATH /opt/onnx_nightly/lib/python3.10/site-packages
+
+# Build TVM
+ARG TVM_BUILT_AT
+RUN git clone https://github.com/octoml/relax --recursive
+RUN cd relax && git fetch origin && git config user.name test && git config user.email test@example.com
+
+# Add this line to build in an un-merged PR
+# RUN PR_NUMBER=NN bash -c 'cd relax && curl -L "https://github.com/octoml/relax/pull/$PR_NUMBER.diff" | patch -p1 -N -d . && git add . && git commit -m"PR #$PR_NUMBER"'
+RUN bash -c 'cd relax && curl -L "https://github.com/octoml/relax/compare/TUZ-145.diff" | patch -p1 -N -d . && git add . && git commit -m"Add TUZ-145"'
+
+RUN rm -rf relax/build
+COPY docker/build_relax.sh docker/build_relax.sh
+RUN bash docker/build_relax.sh
+RUN cd relax/python && python3 -m pip install --no-cache-dir -e .
+
+# aws CLI
+RUN pip install awscli
+
+# testbench code
+COPY relax-coverage relax-coverage
+COPY schema schema
+COPY models.yaml models.yaml
+COPY hub_models.yaml hub_models.yaml
+
+ENV ORT_TENSORRT_FP16_ENABLE 1
+ENV AWS_DEFAULT_REGION us-west-2
diff --git a/scorecard/docker/build.sh b/scorecard/docker/build.sh
new file mode 100755
index 0000000000..1e0458b637
--- /dev/null
+++ b/scorecard/docker/build.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+set -eux
+
+set +x
+source docker/retry.sh
+set -x
+
+PUSH_TO_ECR="${PUSH_TO_ECR:=0}"
+NO_CACHE="${NO_CACHE:=0}"
+TVM_BUILT_AT="${TVM_BUILT_AT:=0}"
+RETRIES="${RETRIES:=5}"
+IMAGE_NAME="${IMAGE_NAME:=scorecard}"
+
+CACHE_ARG=""
+if [ "$NO_CACHE" == "1" ]; then
+    CACHE_ARG="--no-cache"
+fi
+
+retry $RETRIES docker build . --build-arg TVM_BUILT_AT=$TVM_BUILT_AT -f docker/Dockerfile.${IMAGE_NAME} $CACHE_ARG --tag ${IMAGE_NAME}:latest
+
+# # testing code to skip the docker build but still have an image to work with
+# docker pull hello-world
+# docker tag hello-world scorecard:latest
+
+if [ "$PUSH_TO_ECR" == "1" ]; then
+    DATE=$(date '+%Y-%m-%d')
+    HASH=${GIT_COMMIT_SHA:0:7}
+    TAG="$DATE-$HASH"
+
+    REGION="us-west-2"
+    ACCOUNT_ID="186900524924"
+
+    # Make 'docker push' authenticated with ECR
+    aws ecr get-login-password --region $REGION | docker login --username AWS --password-stdin $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com
+
+    # Push to ECR registry (latest)
+    retry 5 docker tag ${IMAGE_NAME}:latest $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:latest
+    retry 5 docker push $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:latest
+
+    # Push to ECR registry (fixed tag)
+    retry 5 docker tag ${IMAGE_NAME}:latest $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:$TAG
+    retry 5 docker push $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:$TAG
+
+    # Save the tag so it can be used later
+    echo "TAG=$TAG" >> output.env
+    echo "ECR_IMAGE=$ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:$TAG" >> output.env
+fi
diff --git a/scorecard/docker/build_relax.sh b/scorecard/docker/build_relax.sh
new file mode 100755
index 0000000000..216ac83ec9
--- /dev/null
+++ b/scorecard/docker/build_relax.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+set -euxo pipefail
+cd relax
+mkdir -p build
+cd build
+cmake -GNinja \
+    -DCMAKE_LINKER=/usr/bin/lld-15 \
+    -DCMAKE_CUDA_ARCHITECTURES=75 \
+    -DUSE_LLVM=llvm-config-15 \
+    -DSUMMARIZE=1 \
+    -DUSE_CUDA=1 \
+    -DUSE_MICRO=1 \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DUSE_CUTLASS=1 \
+    -DUSE_THRUST=1 \
+    ..
+cmake --build . --
diff --git a/scorecard/docker/dev.sh b/scorecard/docker/dev.sh
new file mode 100755
index 0000000000..dc40ea27aa
--- /dev/null
+++ b/scorecard/docker/dev.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+set -euxo pipefail
+
+# NB: Also source MODEL_DATA_DIR and GCP_AUTH_JSON from a .env or whatever
+# is relevant for the running platform
+set +x
+UPLOAD_GCP="${UPLOAD_GCP:=0}"
+UPLOAD_PG="${UPLOAD_PG:=0}"
+TEST_RUNS="${TEST_RUNS:=1}"
+WARMUP_RUNS="${WARMUP_RUNS:=0}"
+IMAGE="${IMAGE:=scorecard}"
+MODEL_DATA_DIR="${MODEL_DATA_DIR:=model-data}"
+GCP_AUTH_JSON="${GCP_AUTH_JSON:=none.json}"
+PWD=$(pwd)
+
+touch .fish_history
+sudo rm -rf doc-relax
+mkdir -p doc-relax
+mkdir -p onnx-hub-cache
+mkdir -p model-data
+
+set -x
+
+docker run \
+    --gpus all \
+    --env TEST_RUNS=$TEST_RUNS \
+    --env WARMUP_RUNS=$WARMUP_RUNS \
+    --env UPLOAD_GCP=$UPLOAD_GCP \
+    --env UPLOAD_PG=$UPLOAD_PG \
+    -v $PWD/$MODEL_DATA_DIR:/opt/scorecard/model-data \
+    -v $GCP_AUTH_JSON:/opt/scorecard/gcp_auth.json:ro \
+    -v $PWD/.coverage_results:/opt/scorecard/.coverage_results \
+    -v $PWD/.tuning_records:/opt/scorecard/.tuning_records \
+    -v $PWD/.fish_history:/root/.local/share/fish/fish_history \
+    -v $PWD/relax-coverage:/opt/scorecard/relax-coverage \
+    -v $PWD/schema:/opt/scorecard/schema \
+    -v $PWD/scripts:/opt/scorecard/scripts \
+    -v $PWD/models.yaml:/opt/scorecard/models.yaml \
+    -v $PWD/hub_models.yaml:/opt/scorecard/hub_models.yaml \
+    --mount type=volume,dst=/opt/scorecard/relax,volume-driver=local,volume-opt=type=none,volume-opt=o=bind,volume-opt=device=$PWD/doc-relax \
+    --mount type=volume,dst=/root/.cache/onnx/hub,volume-driver=local,volume-opt=type=none,volume-opt=o=bind,volume-opt=device=$PWD/onnx-hub-cache \
+    -it $IMAGE \
+    fish
diff --git a/scorecard/docker/output_login.sh b/scorecard/docker/output_login.sh
new file mode 100755
index 0000000000..6b6997202f
--- /dev/null
+++ b/scorecard/docker/output_login.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+set -euxo pipefail
+
+REGION="us-west-2"
+ACCOUNT_ID="186900524924"
+aws ecr get-login-password --region $REGION | docker login --username AWS --password-stdin $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com
+AUTH_JSON=$(cat ~/.docker/config.json | tr '\n' ' ')
+echo "$AUTH_JSON" >> output.env
diff --git a/scorecard/docker/retry.sh b/scorecard/docker/retry.sh
new file mode 100644
index 0000000000..5fca5e74f8
--- /dev/null
+++ b/scorecard/docker/retry.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eux
+
+retry() {
+  local max_retries=$1
+  shift
+  local n=0
+  until [ "$n" -ge "$max_retries" ]
+  do
+      "$@" && break
+      n=$((n+1))
+      if [ "$n" -eq "$max_retries" ]; then
+          echo "failed to update after attempt $n / $max_retries, giving up"
+          exit 1
+      fi
+
+      WAIT=$(( ( RANDOM % 200 )  + 30 ))
+      echo "failed to update $n / $max_retries, waiting $WAIT to try again"
+      sleep "$WAIT"
+  done
+}
diff --git a/scorecard/docker/run.sh b/scorecard/docker/run.sh
new file mode 100755
index 0000000000..532a03b106
--- /dev/null
+++ b/scorecard/docker/run.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -euxo pipefail
+
+# NB: Also source MODEL_DATA_DIR and GCP_AUTH_JSON from a .env or whatever
+# is relevant for the running platform
+export UPLOAD_GCP=1
+export UPLOAD_PG=0
+mkdir -p model-data
+
+IMAGE="${IMAGE:-186900524924.dkr.ecr.us-west-2.amazonaws.com/scorecard:latest}"
+
+docker run \
+    --gpus all \
+    --env TEST_RUNS=10 \
+    --env WARMUP_RUNS=3 \
+    --env UPLOAD_GCP=1 \
+    --env AWS_ACCESS_KEY_ID \
+    --env AWS_SECRET_ACCESS_KEY \
+    --env AWS_DEFAULT_REGION=us-west-2 \
+    -v $(pwd)/model-data:/opt/scorecard/model-data \
+    -v $GCP_AUTH_JSON:/opt/scorecard/gcp_auth.json:ro \
+    -v $(pwd)/.coverage_results:/opt/scorecard/.coverage_results \
+    $IMAGE \
+    pytest --tb=native -v -s -q relax-coverage/
diff --git a/scorecard/docker/with_the_same_user b/scorecard/docker/with_the_same_user
new file mode 100755
index 0000000000..4cde94d6cf
--- /dev/null
+++ b/scorecard/docker/with_the_same_user
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is a wrapper creating the same user inside container as the one
+# running the docker/build.sh outside the container. It also set the home directory
+# for the user inside container to match the same absolute path as the workspace
+# outside of container.  Do not run this manually. It does not make sense. It is
+# intended to be called by ci_build.sh only.
+
+set -ex
+
+# NOTE: sudo uses the env_reset option to reset environment variables to a secure bare minimum.
+# The --preserve-env option below passes those variables through to the invoked process; however,
+# this appears not to affect the environment used with execve, so we resolve the binary to run
+# in this file using the $PATH specified in the Dockerfile.
+COMMAND=( "$(which "$1")" )
+shift
+COMMAND=( "${COMMAND[@]}" "$@" )
+
+if ! touch /this_is_writable_file_system; then
+  echo "You can't write to your filesystem!"
+  echo "If you are in Docker you should check you do not have too many images" \
+      "with too many files in them. Docker has some issue with it."
+  exit 1
+else
+  rm /this_is_writable_file_system
+fi
+
+getent group "${CI_BUILD_GID}" || (
+    # Ensure "${CI_BUILD_GROUP}" is not already some other gid inside container.
+    if grep -q "^${CI_BUILD_GROUP}:" /etc/group; then
+        CI_BUILD_GROUP="${CI_BUILD_GROUP}2"
+    fi
+    addgroup --force-badname --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" >/dev/null)
+
+getent group tvm-venv || (addgroup tvm-venv >/dev/null)
+getent passwd "${CI_BUILD_UID}" || adduser --force-badname --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \
+    --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \
+    --disabled-password --home "${CI_BUILD_HOME}" --quiet "${CI_BUILD_USER}"
+usermod -a -G sudo -G tvm-venv "${CI_BUILD_USER}"
+usermod -a -G sudo -G dialout "${CI_BUILD_USER}"
+
+# Add user to video group for ROCm
+if [[ ! -z "${ROCM_ENABLED-}" ]]; then
+  usermod -a -G video "${CI_BUILD_USER}"
+fi
+
+# This is a grotesque hack to get PYTEST_ADD_OPTS available to all task scripts.
+echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo
+sudo chown "${CI_BUILD_USER}:${CI_BUILD_GID}" /opt/scorecard
+cp /root/.bashrc .
+chown "${CI_BUILD_UID}:${CI_BUILD_GID}" .bashrc
+
+if [ -e /root/.aws ]; then
+    cp -r /root/.aws /opt/scorecard
+    chown -R "${CI_BUILD_USER}:${CI_BUILD_GID}" /opt/scorecard/.aws
+fi
+
+if [[ ! -z "${CUDA_VISIBLE_DEVICES-}" ]]; then
+    CUDA_ENV="CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}"
+else
+    CUDA_ENV=""
+fi
+
+if [[ "$CI_IMAGE_NAME" == *"hexagon"* ]] && [[ ${CI:-false} != "true" ]]; then
+  PATH=$(echo "$PATH" | sed 's/\/opt\/sccache://g')
+fi
+
+sudo -u "#${CI_BUILD_UID}" --preserve-env \
+${CUDA_ENV} \
+PATH=${PATH} \
+JAVA_HOME=${JAVA_HOME} \
+LD_LIBRARY_PATH="${LD_LIBRARY_PATH-}" \
+PYTHONPATH="${PYTHONPATH-}" \
+HOME="$(pwd)" \
+CI_IMAGE_NAME="${CI_IMAGE_NAME-}" \
+"${COMMAND[@]}"
+
+#HOME="${CI_BUILD_HOME-}" \
diff --git a/scorecard/relax-coverage/.gitignore b/scorecard/relax-coverage/.gitignore
new file mode 100644
index 0000000000..70a9e1982b
--- /dev/null
+++ b/scorecard/relax-coverage/.gitignore
@@ -0,0 +1 @@
+old*.py
diff --git a/scorecard/relax-coverage/conftest.py b/scorecard/relax-coverage/conftest.py
new file mode 100644
index 0000000000..a78ad282b7
--- /dev/null
+++ b/scorecard/relax-coverage/conftest.py
@@ -0,0 +1,2 @@
+def pytest_addoption(parser):
+    parser.addoption("--cuda-sm", help="--cuda-sm arg to cli.py")
diff --git a/scorecard/relax-coverage/runners/base.py b/scorecard/relax-coverage/runners/base.py
new file mode 100644
index 0000000000..f295d41de9
--- /dev/null
+++ b/scorecard/relax-coverage/runners/base.py
@@ -0,0 +1,103 @@
+import datetime
+import json
+
+from pathlib import Path
+from typing import List, Optional, Dict, Any
+
+import numpy as np
+from benchmarking_utils import BenchmarkConfig, FROM_HUB, sha256sum
+
+
+def flush_result(
+    result_directory: Path,
+    compile_time_ms: float,
+    import_error: Optional[str],
+    runtimes_ms: List[float],
+    shapes: Dict[str, List[int]],
+    output_deltas: List[List[np.ndarray]],
+    run_config: BenchmarkConfig,
+    framework_ops: List[str],
+    runtime_metadata: Dict[str, Any],
+    relay_ops: List[Dict[str, Any]],
+) -> None:
+    """
+    Print the results of a run to stdout
+    """
+    model_config = run_config.config
+
+    outputs_match_onnx = True
+    for deltas in output_deltas:
+        for delta in deltas:
+            if not np.allclose(
+                np.zeros_like(delta), delta, atol=run_config.atol, rtol=run_config.rtol
+            ):
+                outputs_match_onnx = False
+
+    end_to_end_runtimes_ms = np.array(runtimes_ms)
+    relay_fusion_groups: List[List[int]] = []
+    # branch, sha = git_info()
+    branch = "tbd"
+    sha = "tbd"
+
+    if model_config.file() == FROM_HUB:
+        # TODO: implement this for hub models
+        model_sha = "unknown"
+    else:
+        model_sha = sha256sum(model_config.file())
+
+    if import_error is not None or len(runtimes_ms) == 0:
+        mean_sec = 0
+        p95_sec = 0
+        std_dev_sec = 0
+        variance_sec2 = 0
+        cov = 0
+    else:
+        runtimes_s = np.array(end_to_end_runtimes_ms) / 1000.0
+        mean_s = np.mean(runtimes_s)
+        std_dev_s = np.std(runtimes_s)
+        mean_sec = mean_s
+        p95_sec = np.percentile(runtimes_s, 95)
+        std_dev_sec = std_dev_s
+        variance_sec2 = np.var(runtimes_s)
+        cov = std_dev_s / mean_s
+
+    data = {
+        #  identifying fields
+        "test_run_id": "to be filled in",
+        "run_at": "to be filled in",
+        "test_suite_id": "to be filled in",
+        "model_set_id": model_config.set,
+        "model_name": model_config.name,
+        "config_name": model_config.flow_config,
+        # info to reproduce results
+        "model_hash": model_sha,
+        "repo": {
+            "owner": "octoml",
+            "repo": "relax",
+            "sha": sha,
+            "branch": branch,
+        },
+        "runtime_metadata": runtime_metadata,
+        "import_error": import_error,
+        "warmup_runs": run_config.warmup_runs,
+        "test_runs": run_config.test_runs,
+        "input_shapes": shapes,
+        # coarse grained timings
+        "inference_stats": {
+            "mean_sec": mean_sec,
+            "p95_sec": p95_sec,
+            "std_dev_sec": std_dev_sec,
+            "variance_sec2": variance_sec2,
+            "cov": cov,
+        },
+        "compile_time_ms": compile_time_ms,
+        "raw_stats_ms": runtimes_ms,
+        "outputs_match_onnx": outputs_match_onnx,
+        # model details
+        "framework_ops": framework_ops,
+        "relay_ops": relay_ops,
+        "relay_fusion_groups": relay_fusion_groups,
+        # coverage results
+    }
+
+    print(json.dumps(data, indent=2), flush=True)
diff --git a/scorecard/relax-coverage/runners/benchmarking_utils.py b/scorecard/relax-coverage/runners/benchmarking_utils.py
new file mode 100644
index 0000000000..2f21ecc7fe
--- /dev/null
+++ b/scorecard/relax-coverage/runners/benchmarking_utils.py
@@ -0,0 +1,372 @@
+import json
+import random
+import string
+import subprocess
+import sys
+import time
+import os
+import argparse
+import collections
+import functools
+
+from typing import *
+from pathlib import Path
+from dataclasses import dataclass
+
+import psycopg2
+import commentjson
+import jsonschema
+import onnx
+import pytest
+
+from cloud_utils import aws_download, IS_IN_CI
+
+import numpy as np
+from google.cloud import bigquery
+from google.oauth2 import service_account
+from onnx import hub
+from tvm import relax
+
+
+REPO_ROOT = Path(__file__).parent.parent.parent
+FROM_HUB = object()
+ONNX_REPO = "onnx/models"
+ONNX_REPO_SHA = "8e893eb39b131f6d3970be6ebd525327d3df34ea"
+MODELS_DIR = REPO_ROOT / "model-data"
+
+
+class ImportError:
+    FAILED_ONNX_IMPORT = "failed_onnx_import"
+    FAILED_RELAX_BUILD = "failed_relax_build"
+    FAILED_EXECUTION = "failed_execution"
+    FAILED_OCTO_COMPILE = "failed_octo_compile"
+
+
+def eprint(*args):
+    print(*args, file=sys.stderr, flush=True)
+
+
+@dataclass
+class ModelConfig:
+    set: str
+    name: str
+    sha256: str
+    version: str
+    flow_config: str
+    requires_toposort: bool
+    input_scale: float
+    tuning_steps: Optional[int]
+    shapes: Optional[Dict[str, List[int]]]
+    dtypes: Optional[Dict[str, str]]
+    files: Optional[List[str]]
+
+    @staticmethod
+    def from_json(raw: str) -> "ModelConfig":
+        data = json.loads(raw)
+        return ModelConfig(
+            set=data["set"],
+            name=data["name"],
+            sha256=data["sha256"],
+            version=data["version"],
+            flow_config=data["flow_config"],
+            requires_toposort=data["requires_toposort"],
+            input_scale=data["input_scale"],
+            shapes=data["shapes"],
+            dtypes=data["dtypes"],
+        )
+
+    def id(self) -> str:
+        return f"{self.set}-{self.name}"
+
+    def model_dir(self) -> Path:
+        if self.set == "onnx-hub":
+            return FROM_HUB
+        return MODELS_DIR / self.set / f"{self.name}@{self.version}"
+
+    def file(self) -> Path:
+        if self.set == "onnx-hub":
+            return FROM_HUB
+        return self.model_dir() / "model.onnx"
+
+    def load_model(self, verify_sha256: bool = True) -> onnx.ModelProto:
+        path = self.file()
+        s3_prefix = f"{self.set}/{self.name}@{self.version}"
+        if path == FROM_HUB:
+            repo = f"{ONNX_REPO}:{ONNX_REPO_SHA}"
+            model = hub.load(self.name, repo=repo, silent=True)
+            if verify_sha256:
+                eprint(f"Skipping verification for {path} since it was loaded from ONNX Hub")
+            return model
+        else:
+            if not path.exists():
+                eprint(f"Model file at {path} not found, trying to download from S3 storage...")
+                (MODELS_DIR / self.set).mkdir(exist_ok=True, parents=True)
+                out_path = aws_download(
+                    blob_name=f"{s3_prefix}/model.onnx",
+                    out_path=path,
+                )
+                if not out_path.exists():
+                    raise RuntimeError(f"Model file at {path} not found, has it been downloaded?")
+            if verify_sha256:
+                actual_sha256 = sha256sum(path)
+                if actual_sha256 != self.sha256:
+                    raise RuntimeError(
+                        f"Model's sha256 ({actual_sha256}) did not match expected sha256 ({self.sha256})"
+                    )
+
+        eprint(f"Loading model at {path}")
+        model = onnx.load(path, load_external_data=False)
+        for external_file in self.files:
+            external_file_path = self.model_dir() / external_file
+            if not external_file_path.exists():
+                eprint(f"{external_file_path} does not exist, downloading from S3...")
+                aws_download(
+                    blob_name=f"{s3_prefix}/{external_file}",
+                    out_path=external_file_path,
+                )
+        onnx.load_external_data_for_model(model, self.model_dir())
+
+        if self.requires_toposort:
+            import onnx_graphsurgeon as gs
+
+            sorted = gs.import_onnx(model)
+            sorted.toposort()
+            model = gs.export_onnx(sorted)
+
+        return model
+
+
+@dataclass
+class BenchmarkConfig:
+    config: ModelConfig
+    warmup_runs: int
+    test_runs: int
+    check_accuracy: bool
+    atol: float
+    rtol: float
+    cuda_sm: int
+
+    @staticmethod
+    def from_json(raw: str) -> "BenchmarkConfig":
+        data = json.loads(raw)
+        return BenchmarkConfig(
+            config=ModelConfig.from_json(data["config"]),
+            warmup_runs=data["warmup_runs"],
+            test_runs=data["test_runs"],
+            check_accuracy=data["check_accuracy"],
+            atol=data["atol"],
+            rtol=data["rtol"],
+            cuda_sm=data["cuda_sm"],
+        )
+
+    def __str__(self):
+        return f"{self.config.set}.{self.config.name}.{self.config.flow_config}"
+
+
+def sha256sum(model_file_name: str):
+    proc = subprocess.run(
+        ["sha256sum", model_file_name],
+        stdout=subprocess.PIPE,
+        check=True,
+        encoding="utf-8",
+    )
+    return proc.stdout.strip().split()[0]
+
+
+def git_info() -> Tuple[str, str]:
+    """
+    Determine the git branch and sha
+    """
+    proc = subprocess.run(
+        ["git", "rev-parse", "--abbrev-ref", "HEAD"],
+        stdout=subprocess.PIPE,
+        check=True,
+        encoding="utf-8",
+    )
+    branch = proc.stdout.strip()
+    proc = subprocess.run(
+        ["git", "rev-parse", "--verify", "HEAD"],
+        stdout=subprocess.PIPE,
+        check=True,
+        encoding="utf-8",
+    )
+    sha = proc.stdout.strip()
+    return branch, sha
+
+
+def infer_shapes(
+    model: "onnx.ModelProto", axes: Optional[Dict[str, int]] = None
+) -> Dict[str, List[int]]:
+    # N.B. Defer the import so as not to unconditionally require other runtimes.
+    from tvm import relay
+    from tvm.tir import Any as Any
+
+    input_shapes = {}
+    if axes is None:
+        axes = {}
+    initializer_names = [n.name for n in model.graph.initializer]
+    # The inputs contains both the inputs and parameters. We are just interested in the
+    # inputs so skip all parameters listed in graph.initializer
+    unspecified_dynamic_axes = []
+    for input_info in model.graph.input:
+        if input_info.name not in initializer_names:
+            name, shape, dtype, axis_names = relay.frontend.onnx.get_info(input_info)
+
+            # Normalize the shape dimensions to integers
+            assert isinstance(input_shapes, dict)
+            new_shape = []
+            for value, axis_name in zip(shape, axis_names):
+                if isinstance(value, Any):
+                    lookup_value = axes.get(axis_name)
+                    if lookup_value is None:
+                        unspecified_dynamic_axes.append((axis_name, name))
+                        value = -1
+                    else:
+                        value = lookup_value
+                else:
+                    value = int(value)
+
+                new_shape.append(value)
+            input_shapes.update({input_info.name: new_shape})
+
+    if len(unspecified_dynamic_axes) > 0:
+        axes_to_inputs = collections.defaultdict(list)
+        for axis_name, input_name in unspecified_dynamic_axes:
+            axes_to_inputs[axis_name].append(input_name)
+
+        msg = "\n".join(
+            [
+                f"    {axis_name} on {', '.join(input_names)}"
+                for axis_name, input_names in axes_to_inputs.items()
+            ]
+        )
+        raise RuntimeError(
+            f"Unspecified dynamic shapes detected, shapes must be manually specified or an $axis entry provided:\n{msg}"
+        )
+    return input_shapes
+
+
+def infer_dtypes(model: "onnx.ModelProto") -> Dict[str, str]:
+    # N.B. Defer the import so as not to unconditionally require other runtimes.
+    from tvm import relay
+    from tvm.tir import Any as Any
+
+    input_dtypes = {}
+    initializer_names = [n.name for n in model.graph.initializer]
+    # The inputs contains both the inputs and parameters. We are just interested in the
+    # inputs so skip all parameters listed in graph.initializer
+    for input_info in model.graph.input:
+        if input_info.name not in initializer_names:
+            name, shape, dtype, axis_names = relay.frontend.onnx.get_info(input_info)
+            if dtype is None:
+                raise RuntimeError(
+                    f"Unknown dtype on input '{input_info.name}' is not supported. inputs: '{input_info.name}'",
+                )
+
+            input_dtypes.update({input_info.name: dtype})
+
+    return input_dtypes
+
+
+class Timer(object):
+    def __enter__(self):
+        self.start = time.perf_counter_ns()
+        return self
+
+    def __exit__(self, *args):
+        self.end = time.perf_counter_ns()
+        self.ms_duration = (self.end - self.start) / 1000 / 1000
+
+
+def extract_framework_ops(model: onnx.ModelProto) -> List[Dict[str, str]]:
+    return []
+    return [{"name": node.name, "op_type": node.op_type} for node in model.graph.node]
+
+
+def extract_relay_ops(
+    model: onnx.ModelProto,
+    framework_ops: List[Dict[str, str]],
+    shapes: Dict[str, List[int]],
+) -> List[str]:
+    tvm_model = relax.from_onnx(model, shape=shapes)
+
+    ops = []
+    for item in tvm_model.functions.keys():
+        ops.append(
+            {
+                "framework_op_index": -1,
+                "name": item.name_hint,
+                "schedule_method": "unknown",
+            }
+        )
+
+    return ops
+
+
+class BaseRunner:
+    benchmark_config: BenchmarkConfig
+
+    def __init__(self, benchmark_config: BenchmarkConfig):
+        self.benchmark_config = benchmark_config
+
+        self._model = self.benchmark_config.config.load_model(
+            verify_sha256=benchmark_config.config.sha256 is not None
+        )
+
+    def metadata(self):
+        raise NotImplementedError
+
+    def run(self, *args, **kwargs):
+        raise NotImplementedError
+
+    def load_model(self) -> "onnx.ModelProto":
+        return self._model
+
+    def run_onnx_cpu_inference(self, inputs: Dict[str, "np.ndarray"]) -> List["np.ndarray"]:
+        import onnxruntime as ort
+
+        sess_opt = ort.SessionOptions()
+
+        # Set up an onnx inference on GPU
+        sess = ort.InferenceSession(
+            self._model.SerializeToString(),
+            sess_options=sess_opt,
+            providers=["CPUExecutionProvider"],
+        )
+        output_names = []
+        output = sess.run(output_names, inputs)
+        return output
+
+    def generate_inputs(self, n: int) -> List[Dict[str, np.ndarray]]:
+        all_inputs = []
+
+        inferred_dtypes = None
+
+        if self.benchmark_config.config.shapes is None:
+            shapes = infer_shapes(self._model)
+        else:
+            axes = self.benchmark_config.config.shapes.get("$axes")
+            if len(self.benchmark_config.config.shapes) == 1 and axes is not None:
+                shapes = infer_shapes(self._model, axes=axes)
+            else:
+                shapes = self.benchmark_config.config.shapes
+
+        if self.benchmark_config.config.dtypes is None:
+            if inferred_dtypes is not None:
+                dtypes = inferred_dtypes
+            else:
+                dtypes = infer_dtypes(self._model)
+        else:
+            dtypes = self.benchmark_config.config.dtypes
+
+        for _ in range(n):
+            input_names = list(shapes.keys())
+            inputs = {}
+            for name in input_names:
+                inputs[name] = (
+                    np.random.uniform(size=shapes[name]) * self.benchmark_config.config.input_scale
+                ).astype(dtypes[name])
+
+            all_inputs.append(inputs)
+
+        return all_inputs
diff --git a/scorecard/relax-coverage/runners/cli.py b/scorecard/relax-coverage/runners/cli.py
new file mode 100755
index 0000000000..ca885cae99
--- /dev/null
+++ b/scorecard/relax-coverage/runners/cli.py
@@ -0,0 +1,368 @@
+#!/usr/bin/env python3
+from benchmarking_utils import (
+    BenchmarkConfig,
+    ModelConfig,
+    eprint,
+    extract_framework_ops,
+)
+from base import flush_result
+from pathlib import Path
+from typing import *
+
+import numpy as np
+
+import importlib
+import re
+import json
+import argparse
+import warnings
+import sys
+
+np.set_printoptions(threshold=5, precision=4)
+# warnings.filterwarnings(action="ignore", category=DeprecationWarning, module=r".*")
+# warnings.filterwarnings(action="error", category=UserWarning, module=r".*")
+
+
+def find_inputs_and_outputs(dir: Path):
+    input_dir = Path(dir)
+
+    all_inputs = []
+    gold_results = []
+    eprint(f"Loading sample inputs from {dir}...")
+    for input_path in input_dir.glob("sample_input*.npy"):
+        output_path = input_path.parent / input_path.name.replace("input", "output")
+        all_inputs.append(np.load(input_path, allow_pickle=True).item())
+        gold_results.append(np.load(output_path, allow_pickle=True))
+
+    if len(all_inputs) == 0:
+        eprint(f"No sample inputs (e.g. files named sample_input0.npy found in '{dir}')")
+        exit(1)
+    elif len(all_inputs) > 1:
+        eprint(f"Found multiple input files in '{dir}', use --input to choose a specific one")
+        exit(1)
+
+    return all_inputs, gold_results
+
+
+def run(args, runner):
+    """
+    Run the benchmark as defined by the CLI args
+    """
+
+    # Determine where the model inputs should come from
+    if args.random_inputs:
+        # No specific input to use, make one
+        all_inputs = runner.generate_inputs(1)
+        gold_results = None
+    elif args.input is not None:
+        # A specific file has been chosen, use it
+        all_inputs = [np.load(Path(args.input), allow_pickle=True).item()]
+        gold_results = None
+        if args.output is not None:
+            gold_results = np.load(Path(args.output), allow_pickle=True)
+    elif args.input_dir is not None:
+        # Find a file in a particular directory
+        all_inputs, gold_results = find_inputs_and_outputs(args.input_dir)
+    else:
+        # Find an input file in the same directory as the model.onnx file
+        all_inputs, gold_results = find_inputs_and_outputs(
+            runner.benchmark_config.config.model_dir()
+        )
+
+    # Only one input is used per run, multiple inputs should be specified as
+    # separate runs
+    inputs = all_inputs[0]
+
+    # Trigger the onnx.load call
+    eprint("Loading model...")
+    try:
+        onnx_model = runner.load_model()
+    except Exception as error:
+        flush_result(
+            result_directory=None,
+            run_config=runner.benchmark_config,
+            runtimes_ms=[],
+            shapes=[],
+            import_error="Failed ONNX load",
+            compile_time_ms=[],
+            output_deltas=[],
+            relay_ops=[],
+            framework_ops=[],
+            runtime_metadata=runner.metadata(),
+        )
+        raise error
+
+    if gold_results is None:
+        # Generate the expected results if necessary
+        eprint("Generating expected results at runtime")
+        gold_results = runner.run_onnx_cpu_inference(inputs)
+
+    # Run the model a few times and extract timings
+    error, import_error, compile_time_ms, runtimes_ms, output_deltas = runner.run(
+        inputs=inputs,
+        gold_results=gold_results,
+    )
+
+    try:
+        framework_ops = extract_framework_ops(onnx_model)
+    except Exception as e:
+        framework_ops = []
+        error = e
+
+    # TODO: relay ops
+
+    # Send the output results to a JSON file on disk
+    flush_result(
+        result_directory=None,
+        run_config=runner.benchmark_config,
+        runtimes_ms=runtimes_ms,
+        shapes=None,
+        import_error=import_error,
+        compile_time_ms=compile_time_ms,
+        output_deltas=output_deltas,
+        relay_ops=[],
+        framework_ops=framework_ops,
+        runtime_metadata=runner.metadata(),
+    )
+
+    # Re-raise any failures
+    if error is not None:
+        raise error
+
+
+def generate(args, runner):
+    """
+    Generate pairs of sample inputs and outputs
+    """
+    import numpy as np
+
+    np.random.seed(int(args.seed))
+    n = int(args.n)
+    all_inputs = runner.generate_inputs(n=n)
+    output_dir = Path(args.result_directory)
+
+    eprint("Loading ONNX model...")
+    onnx_model = runner.load_model()
+
+    should_generate_outputs = not args.skip_run
+
+    for i, inputs in enumerate(all_inputs):
+        input_path = output_dir / f"sample_input{i}.npy"
+        output_path = output_dir / f"sample_output{i}.npy"
+
+        if input_path.exists() and not args.force:
+            eprint(f"Refusing to overwrite {input_path} since --force was not used")
+            exit(1)
+
+        if output_path.exists() and not args.force:
+            eprint(f"Refusing to overwrite {output_path} since --force was not used")
+            exit(1)
+
+        if should_generate_outputs:
+            desc = f"{input_path.name}, {output_path.name}"
+        else:
+            desc = f"{input_path.name}"
+
+        eprint(f"[{i + 1} / {n}] Generating input and output ({desc})")
+
+        if should_generate_outputs:
+            outputs = runner.run_onnx_cpu_inference(inputs)
+
+        np.save(input_path, inputs)
+
+        if should_generate_outputs:
+            np.save(output_path, outputs)
+
+
+def parse_args(valid_executors: List[str]):
+    parser = argparse.ArgumentParser()
+    subparsers = parser.add_subparsers(help="benchmarking utilities", dest="command")
+
+    def add_shared_arguments(sub):
+        sub.add_argument(
+            "--cuda-sm",
+            type=int,
+            default=75,
+            help="CUDA target sm level (default: 75, compute capability for Tesla T4)",
+        )
+        sub.add_argument(
+            "-m",
+            "--model",
+            required=True,
+            help="the model slug to run (e.g. oss-onnx.t5-encoder-12@1)",
+        )
+        sub.add_argument(
+            "--sha",
+            help="the model's sha256 to use to verify file integrity",
+        )
+        sub.add_argument("--input-scale", help="scalar to scale np.random results by", default=1.0)
+        sub.add_argument(
+            "--shapes",
+            help="shapes as JSON (will be inferred if not provided), the $axes key can be used to fill in dynamic shapes by axis name",
+        )
+        sub.add_argument(
+            "--files",
+            help="comma separated list of files to download",
+        )
+        sub.add_argument(
+            "--tuning-steps",
+            help="if tuning should be used, the number of steps",
+        )
+        sub.add_argument(
+            "--dtypes",
+            help="comma separated list of dtypes (will be inferred if not provided)",
+        )
+
+    # CLI for running models
+    run = subparsers.add_parser("run", help="run the benchmark")
+    run.add_argument("-i", "--input", help=".npy file to use for input")
+    run.add_argument("-o", "--output", help=".npy file to use for output")
+    add_shared_arguments(run)
+
+    run.add_argument("--runs", help="number of test runs (default: 1)", default=1)
+    run.add_argument(
+        "--warmup-runs",
+        help="number of warmup runs (default: 0)",
+        default=0,
+    )
+    run.add_argument(
+        "--toposort",
+        action="store_true",
+        help="toposort nodes in model before running",
+    )
+    run.add_argument(
+        "--atol",
+        help="absolute tolerance (default: 0.0001)",
+        default=0.0001,
+    )
+    run.add_argument(
+        "--rtol",
+        help="relative tolerance (default: 0.0001)",
+        default=0.0001,
+    )
+    run.add_argument(
+        "--input-dir",
+        help="directory of sample_inputN.npy and sample_outputN.npy files",
+    )
+    run.add_argument(
+        "--random-inputs",
+        action="store_true",
+        help="generate random values for inputs, execute on CPU to generate expected results at runtime",
+    )
+    run.add_argument(
+        "-e",
+        "--executor",
+        required=True,
+        help=f"executor to use (options are {', '.join(valid_executors)})",
+    )
+
+    # CLI for generating inputs for a model
+    generate = subparsers.add_parser(
+        "generate", help="generate new output results for a set of inputs"
+    )
+    generate.add_argument("--shape", help="input shapes as JSON")
+    generate.add_argument(
+        "--skip-run",
+        action="store_true",
+        help="only generate inputs, skip running the model and generating outputs",
+    )
+    generate.add_argument(
+        "-f",
+        "--force",
+        action="store_true",
+        help="overwrite existing files",
+    )
+    generate.add_argument("--seed", help="int to use for np.random.seed (default=0)", default=0)
+    generate.add_argument("-n", help="how many inputs to generate (default=5)", default=5)
+    generate.add_argument(
+        "-r",
+        "--result-directory",
+        required=True,
+        help="directory to store resulting .npy files in",
+    )
+
+    args = parser.parse_args()
+
+    return args
+
+
+if __name__ == "__main__":
+    # Find the possible values for --executor (i.e. the modules that have a .Runner attribute)
+    ignored_files = set(
+        [
+            "all.py",
+            "cli.py",
+            "base.py",
+            "benchmarking_utils.py",
+        ]
+    )
+    executors = [
+        x.stem for x in Path(__file__).resolve().parent.glob("*.py") if x.name not in ignored_files
+    ]
+
+    args = parse_args(executors)
+
+    # Break apart the model slug
+    m = re.match(pattern=r"(.+)\.(.+)@(\d+)", string=args.model)
+    if m is None:
+        eprint(
+            f"--model must match the pattern '<model set>.<model name>@<version number>' (e.g. 'oss-onnx.t5-encoder-12@1'), found {args.model}"
+        )
+        exit(1)
+    set, name, version = m.groups()
+
+    # Find what should run the model
+    if hasattr(args, "executor"):
+        executor = importlib.import_module(args.executor)
+    else:
+        executor = importlib.import_module("onnx-nightly-cpu")
+
+    # Check if shapes or dtypes were provided
+    shapes = None
+    if args.shapes is not None:
+        shapes = json.loads(args.shapes)
+
+    dtypes = None
+    if args.dtypes is not None:
+        dtypes = [d.strip() for d in args.dtypes.split(",")]
+
+    files = []
+    if args.files is not None:
+        files = [x.strip() for x in args.files.split(",")]
+
+    # Instantiate the runner
+    runner_cls = getattr(executor, "Runner")
+    runner = runner_cls(
+        benchmark_config=BenchmarkConfig(
+            config=ModelConfig(
+                **{
+                    "set": set,
+                    "name": name,
+                    "sha256": args.sha,
+                    "version": version,
+                    "flow_config": getattr(args, "executor", None),
+                    "requires_toposort": args.toposort,
+                    "tuning_steps": None if args.tuning_steps is None else int(args.tuning_steps),
+                    "input_scale": float(args.input_scale),
+                    "shapes": shapes,
+                    "dtypes": dtypes,
+                    "files": files,
+                }
+            ),
+            warmup_runs=int(args.warmup_runs),
+            test_runs=int(args.runs),
+            check_accuracy=True,
+            atol=float(args.atol),
+            rtol=float(args.rtol),
+            cuda_sm=args.cuda_sm,
+        ),
+    )
+
+    # Run the specified CLI command
+    if args.command == "generate":
+        generate(args, runner)
+    elif args.command == "run":
+        run(args, runner)
+    else:
+        eprint("Unknown command")
+        exit(1)
diff --git a/scorecard/relax-coverage/runners/cloud_utils.py b/scorecard/relax-coverage/runners/cloud_utils.py
new file mode 100644
index 0000000000..f3481bb908
--- /dev/null
+++ b/scorecard/relax-coverage/runners/cloud_utils.py
@@ -0,0 +1,111 @@
+import json
+import random
+import string
+import subprocess
+import sys
+import os
+
+from typing import *
+from pathlib import Path
+
+import psycopg2
+
+from google.cloud import bigquery
+from google.oauth2 import service_account
+
+
+IS_IN_CI = os.getenv("IS_IN_CI", "0") == "1"
+
+
+def eprint(*args):
+    print(*args, file=sys.stderr, flush=True)
+
+
+_bigquery_client_and_config = None
+
+
+def bigquery_client_and_config(
+    key_path: str = "gcp_auth.json", schema: Optional[List[bigquery.SchemaField]] = None
+):
+    if not Path(key_path).exists():
+        raise RuntimeError(f"{key_path} was not found, did you forget to mount it?")
+
+    global _bigquery_client_and_config
+    if _bigquery_client_and_config is None:
+        credentials = service_account.Credentials.from_service_account_file(
+            key_path,
+            scopes=["https://www.googleapis.com/auth/cloud-platform"],
+        )
+        client = bigquery.Client(
+            credentials=credentials,
+            project=credentials.project_id,
+        )
+
+        if schema is None:
+            schema = [
+                bigquery.SchemaField("r", "STRING", mode="REQUIRED"),
+            ]
+
+        job_config = bigquery.LoadJobConfig(
+            schema=schema,
+        )
+        job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
+        job_config.autodetect = True
+        _bigquery_client_and_config = (client, job_config)
+
+    return _bigquery_client_and_config
+
+
+def bigquery_upload(jsonl_file: Path, dataset_id: str, table_id: str) -> int:
+    client, job_config = bigquery_client_and_config()
+    dataset_ref = client.dataset(dataset_id)
+    table_ref = dataset_ref.table(table_id)
+    with open(jsonl_file, "rb") as source_file:
+        job = client.load_table_from_file(
+            source_file,
+            table_ref,
+            location="us-west1",  # Must match the destination dataset location.
+            job_config=job_config,
+        )  # API request
+
+    job.result()
+
+    return job
+
+
+def postgres_upload(jsonl_file: Path, database: str, table_name: str) -> int:
+    """
+    Uploads records in jsonl_file (one JSON document per line) to postgres
+    """
+    rows = [(json.dumps(d["r"]),) for d in load_jsonl(jsonl_file=jsonl_file)]
+    sql = f"INSERT INTO {table_name} (r) VALUES (%s)"
+    conn = None
+    password = os.environ["POSTGRES_PASSWORD"]
+    ip = os.environ["POSTGRES_IP"]
+    user = os.getenv("POSTGRES_USER", "ci")
+    try:
+        conn = psycopg2.connect(
+            host=ip,
+            database=database,
+            user=user,
+            password=password,
+        )
+        cur = conn.cursor()
+        cur.executemany(sql, rows)
+        conn.commit()
+        cur.close()
+    finally:
+        if conn is not None:
+            conn.close()
+
+    return len(rows)
+
+
+def aws_download(blob_name: str, out_path: Path, bucket_name: str = "scorecard-models"):
+    command = ["aws", "s3", "cp", f"s3://{bucket_name}/{blob_name}", out_path]
+    if IS_IN_CI:
+        command.append("--no-progress")
+    command = [str(c) for c in command]
+    eprint(f"+ {' '.join(command)}")
+    subprocess.run(command, check=True, stdout=sys.stderr)
+    return out_path
diff --git a/scorecard/relax-coverage/runners/onnx-nightly-cpu.py b/scorecard/relax-coverage/runners/onnx-nightly-cpu.py
new file mode 100644
index 0000000000..62c546a03a
--- /dev/null
+++ b/scorecard/relax-coverage/runners/onnx-nightly-cpu.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+from typing import *
+
+import os
+import sys
+
+# unused but needed to get CUDA working in onnx, too lazy to actually fix the
+# issue
+# https://stackoverflow.com/questions/75267445/why-does-onnxruntime-fail-to-create-cudaexecutionprovider-in-linuxubuntu-20/75267493#75267493
+import torch
+
+# Load the nightly ONNX version from its install directory
+sys.path.insert(0, os.environ["ONNX_NIGHTLY_PATH"])
+import onnxruntime as ort
+
+from benchmarking_utils import BenchmarkConfig
+from onnx_base import OnnxBase
+
+
+class OnnxTrt(OnnxBase):
+    def __init__(self, benchmark_config: BenchmarkConfig):
+        super().__init__(
+            benchmark_config=benchmark_config,
+            ort=ort,
+            providers=["CPUExecutionProvider"],
+        )
+
+
+Runner = OnnxTrt
diff --git a/scorecard/relax-coverage/runners/onnx-nightly-trt.py b/scorecard/relax-coverage/runners/onnx-nightly-trt.py
new file mode 100644
index 0000000000..7bbdfe2e94
--- /dev/null
+++ b/scorecard/relax-coverage/runners/onnx-nightly-trt.py
@@ -0,0 +1,29 @@
+from pathlib import Path
+from typing import *
+
+import os
+import sys
+
+# unused but needed to get CUDA working in onnx, too lazy to actually fix the
+# issue
+# https://stackoverflow.com/questions/75267445/why-does-onnxruntime-fail-to-create-cudaexecutionprovider-in-linuxubuntu-20/75267493#75267493
+import torch
+
+# Load the nightly ONNX version from its install directory
+sys.path.insert(0, os.environ["ONNX_NIGHTLY_PATH"])
+import onnxruntime as ort
+
+from benchmarking_utils import BenchmarkConfig
+from onnx_base import OnnxBase
+
+
+class OnnxTrt(OnnxBase):
+    def __init__(self, benchmark_config: BenchmarkConfig):
+        super().__init__(
+            benchmark_config=benchmark_config,
+            ort=ort,
+            providers=["TensorrtExecutionProvider", "CUDAExecutionProvider"],
+        )
+
+
+Runner = OnnxTrt
diff --git a/scorecard/relax-coverage/runners/onnx-trt.py b/scorecard/relax-coverage/runners/onnx-trt.py
new file mode 100644
index 0000000000..d249d0fa0e
--- /dev/null
+++ b/scorecard/relax-coverage/runners/onnx-trt.py
@@ -0,0 +1,24 @@
+from pathlib import Path
+from typing import *
+
+# unused but needed to get CUDA working in onnx, too lazy to actually fix the
+# issue
+# https://stackoverflow.com/questions/75267445/why-does-onnxruntime-fail-to-create-cudaexecutionprovider-in-linuxubuntu-20/75267493#75267493
+import torch
+
+import onnxruntime as ort
+
+from benchmarking_utils import BenchmarkConfig
+from onnx_base import OnnxBase
+
+
+class OnnxTrt(OnnxBase):
+    def __init__(self, benchmark_config: BenchmarkConfig):
+        super().__init__(
+            benchmark_config=benchmark_config,
+            ort=ort,
+            providers=["TensorrtExecutionProvider", "CUDAExecutionProvider"],
+        )
+
+
+Runner = OnnxTrt
diff --git a/scorecard/relax-coverage/runners/onnx_base.py b/scorecard/relax-coverage/runners/onnx_base.py
new file mode 100644
index 0000000000..a6aa6e8944
--- /dev/null
+++ b/scorecard/relax-coverage/runners/onnx_base.py
@@ -0,0 +1,82 @@
+from pathlib import Path
+from typing import *
+from types import ModuleType
+
+import numpy as np
+
+from benchmarking_utils import BenchmarkConfig, Timer, eprint, BaseRunner
+
+
+class OnnxBase(BaseRunner):
+    def __init__(
+        self,
+        benchmark_config: BenchmarkConfig,
+        providers: List[str],
+        ort: ModuleType,
+    ):
+        self.ort = ort
+        self.providers = providers
+
+        super().__init__(benchmark_config=benchmark_config)
+
+    def metadata(self) -> Dict[str, Any]:
+        return {
+            "ort-version": self.ort.__version__,
+        }
+
+    def run(
+        self,
+        inputs,
+        gold_results: List[np.ndarray],
+    ):
+        """
+        Run an onnx `model` with onnxruntime's TensorRT EP
+        """
+        runtimes_ms = []
+        output_deltas = []
+        sess_opt = self.ort.SessionOptions()
+
+        run_config = self.benchmark_config
+
+        id = run_config.config.id()
+        eprint(f"[{id}] Running onnx trt")
+        eprint(
+            f"[{id}]    Running for {run_config.warmup_runs} warmups and {run_config.test_runs} tests"
+        )
+
+        # Set up an onnx inference on the specified providers
+        sess = self.ort.InferenceSession(
+            self._model.SerializeToString(),
+            sess_options=sess_opt,
+            providers=self.providers,
+        )
+
+        # Unwrap input if necessary
+        if isinstance(inputs, list) and len(inputs) == 1:
+            inputs = inputs[0]
+        else:
+            inputs = inputs
+
+        output_names = []
+        compile_time_ms = 0
+        for i in range(run_config.warmup_runs):
+            eprint(
+                f"[{id}][{i + 1} / {run_config.warmup_runs}][onnx] Warmup {run_config.config.id()}"
+            )
+            sess.run(output_names, inputs)
+
+        # Run the model a few times and record the end to end execution time
+        for i in range(run_config.test_runs):
+            eprint(
+                f"[{id}][{i + 1} / {run_config.test_runs}][onnx] Running {run_config.config.id()}"
+            )
+            with Timer() as timer:
+                output = sess.run(output_names, inputs)
+
+            # Stash the runtime
+            runtimes_ms.append(timer.ms_duration)
+
+            # Check accuracy
+            output_deltas.append([gold_results[i] - output[i] for i in range(len(output))])
+
+        return None, None, compile_time_ms, runtimes_ms, output_deltas
diff --git a/scorecard/relax-coverage/runners/relax-cuda.py b/scorecard/relax-coverage/runners/relax-cuda.py
new file mode 100644
index 0000000000..ad91bc213f
--- /dev/null
+++ b/scorecard/relax-coverage/runners/relax-cuda.py
@@ -0,0 +1,19 @@
+from typing import *
+
+
+from relax_base import RelaxBase
+
+
+class RelaxCuda(RelaxBase):
+    name = "relax-cuda"
+
+    def __init__(self, *args, **kwargs):
+        cuda_sm = kwargs["benchmark_config"].cuda_sm
+        super().__init__(
+            target=f"cuda -libs=thrust -arch=sm_{cuda_sm} -max_shared_memory_per_block=49152 -max_threads_per_block=1024 -thread_warp_size=32 -registers_per_block=65536",
+            *args,
+            **kwargs,
+        )
+
+
+Runner = RelaxCuda
diff --git a/scorecard/relax-coverage/runners/relax-native.py b/scorecard/relax-coverage/runners/relax-native.py
new file mode 100644
index 0000000000..b7f63038d6
--- /dev/null
+++ b/scorecard/relax-coverage/runners/relax-native.py
@@ -0,0 +1,14 @@
+from typing import *
+
+
+from relax_base import RelaxBase
+
+
+class RelaxNative(RelaxBase):
+    name = "relax-native"
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(target="llvm -mcpu=core-avx2", *args, **kwargs)
+
+
+Runner = RelaxNative
diff --git a/scorecard/relax-coverage/runners/relax_base.py b/scorecard/relax-coverage/runners/relax_base.py
new file mode 100644
index 0000000000..91c1acba4c
--- /dev/null
+++ b/scorecard/relax-coverage/runners/relax_base.py
@@ -0,0 +1,106 @@
+from pathlib import Path
+from typing import *
+from types import ModuleType
+
+import numpy as np
+
+from benchmarking_utils import (
+    Timer,
+    eprint,
+    ImportError,
+    BaseRunner,
+)
+
+import tvm
+import logging
+from tvm import octo
+from tvm.relax.frontend.onnx.onnx_frontend import ONNXGraphImporter
+
+
+class RelaxBase(BaseRunner):
+    def __init__(self, target, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.target = target
+
+    def metadata(self) -> Dict[str, Any]:
+        return {
+            "relax-version": "unknown",
+        }
+
+    def load_model(self) -> "onnx.ModelProto":
+        return self._model
+
+    def run(
+        self,
+        inputs,
+        gold_results: List[np.ndarray],
+    ):
+        runtimes_ms = []
+        output_deltas = []
+        run_config = self.benchmark_config
+
+        tvm_version = tvm.support.libinfo()["GIT_COMMIT_HASH"]
+        id = run_config.config.id()
+        eprint(
+            f"[{id}] Running octo.compile (from {tvm_version}) with shapes={self.benchmark_config.config.shapes}, target={self.target}, and tuning_steps={self.benchmark_config.config.tuning_steps}"
+        )
+        eprint(
+            f"[{id}]    Running for {run_config.warmup_runs} warmups and {run_config.test_runs} tests"
+        )
+
+        # Disable tuning logs
+        ms_logger = logging.getLogger("tvm.meta_schedule")
+        ms_logger.setLevel(logging.CRITICAL)
+        for name in logging.root.manager.loggerDict:
+            if "tvm" in name:
+                logger = logging.getLogger(name)
+                logger.setLevel(logging.CRITICAL)
+
+        with Timer() as compile_timer:
+            try:
+                tvm_model = octo.compile(
+                    self._model,
+                    shape_dict=self.benchmark_config.config.shapes,
+                    target=tvm.target.Target(self.target),
+                    tuning_steps=self.benchmark_config.config.tuning_steps,
+                )
+            except Exception as e:
+                return e, ImportError.FAILED_OCTO_COMPILE, 0, [], []
+
+        compile_time_ms = compile_timer.ms_duration
+        breakpoint()
+
+        # NOTE: ONNX frontend sanitizes input names. This hack is brittle and presumes Python dict ordering is the same
+        # between invocations. The real fix should be that OctoModel carries a mapping of framework names to Relax names.
+        importer = ONNXGraphImporter({}, {})
+        for k in list(inputs):
+            sanitized_k = importer._sanitize_name(k)
+            if sanitized_k != k:
+                inputs[sanitized_k] = inputs[k]
+                del inputs[k]
+
+        for i in range(run_config.warmup_runs):
+            eprint(
+                f"[{id}][{i + 1} / {run_config.warmup_runs}][{self.name}] Warmup {run_config.config.id()}"
+            )
+            tvm_model.run(inputs)
+
+        # Run the model a few times and record the end to end execution time
+        for i in range(run_config.test_runs):
+            eprint(
+                f"[{id}][{i + 1} / {run_config.test_runs}][{self.name}] Running {run_config.config.id()}"
+            )
+            try:
+                with Timer() as timer:
+                    output = tvm_model.run(inputs)
+
+                # Stash the runtime
+                runtimes_ms.append(timer.ms_duration)
+
+            except Exception as e:
+                return e, ImportError.FAILED_EXECUTION, 0, [], []
+
+            # Check accuracy
+            output_deltas.append([gold_results[i] - output[i] for i in range(len(output))])
+
+        return None, None, compile_time_ms, runtimes_ms, output_deltas
diff --git a/scorecard/relax-coverage/runners/voltaml.py b/scorecard/relax-coverage/runners/voltaml.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/scorecard/relax-coverage/test_coverage.py b/scorecard/relax-coverage/test_coverage.py
new file mode 100644
index 0000000000..c1c0bf30bb
--- /dev/null
+++ b/scorecard/relax-coverage/test_coverage.py
@@ -0,0 +1,412 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import os
+import shlex
+import tempfile
+import shutil
+import re
+import sys
+import datetime
+
+from pathlib import Path
+from typing import *
+
+
+import pytest
+import tabulate
+import yaml
+import numpy as np
+
+from onnx import hub
+from utils import *
+from runners.cloud_utils import *
+
+# Directory to store output JSON files
+JSON_SCHEMA = REPO_ROOT / "schema" / "schema.jsonschema"
+OUTPUT_DIR = REPO_ROOT / ".coverage_results"
+UPLOAD_GCP = os.getenv("UPLOAD_GCP", "0") == "1"
+UPLOAD_PG = os.getenv("UPLOAD_PG", "0") == "1"
+TEST_RUNS = int(os.getenv("TEST_RUNS", "1"))
+WARMUP_RUNS = int(os.getenv("WARMUP_RUNS", "0"))
+ONNX_NIGHTLY_PATH = os.environ["ONNX_NIGHTLY_PATH"]
+
+# Load additional tests from disk
+MODELS_YAML = REPO_ROOT / "models.yaml"
+HUB_MODELS_YAML = REPO_ROOT / "hub_models.yaml"
+MODELS: List[Dict[str, Any]] = []
+CLI = Path(__file__).resolve().parent / "runners" / "cli.py"
+
+
+def generate_configs(item: Dict[str, Any]) -> List[Dict[str, Any]]:
+    return [{**item, "executor": config} for config in item["configs"]]
+
+
+def get_configs_from_file(path: Path) -> List[Dict[str, Any]]:
+    configs = []
+    if path.exists():
+        with open(path) as f:
+            data = yaml.safe_load(f)
+
+        for item in data:
+            configs.extend(generate_configs(item))
+    return configs
+
+
+# MODELS = get_configs_from_file(HUB_MODELS_YAML) + get_configs_from_file(MODELS_YAML)
+MODELS = get_configs_from_file(MODELS_YAML)
+
+
+def flush_print(*args):
+    print(*args, flush=True)
+
+
+@pytest.fixture(scope="function", autouse=True)
+def show_test_name(request):
+    flush_print(f"Test '{request.node.nodeid}' STARTED")
+
+    def fin():
+        flush_print(f"Test '{request.node.nodeid}' COMPLETED")
+
+    request.addfinalizer(fin)
+
+
+@pytest.fixture(scope="session")
+def upload_coverage(request):
+    output_dir = gen_test_output_dir(base=OUTPUT_DIR)
+    output_dir = output_dir / "coverage"
+
+    def finalizer():
+        pass
+
+        if UPLOAD_GCP:
+            flush_print(f"[GCP] Uploading records from {catted_results_path}")
+            dataset_id = "contrived_test_results_1"
+            # prod table
+            table_id = "sampledate_coverage_name"
+
+            # testing table
+            # table_id = "gitlab_ci_data"
+            job = bigquery_upload(
+                jsonl_file=catted_results_path, dataset_id=dataset_id, table_id=table_id
+            )
+            flush_print(
+                f"[GCP] Done uploading {job.output_rows} records to {dataset_id}:{table_id}"
+            )
+        else:
+            flush_print(
+                f"[GCP] Skipping result upload for {output_dir} since UPLOAD_GCP was not 1 ({os.getenv('UPLOAD_GCP', 'unset')})"
+            )
+
+    request.addfinalizer(finalizer)
+
+    yield output_dir
+
+
+@pytest.fixture(scope="session")
+def result_directory(request):
+    output_dir = gen_test_output_dir(base=OUTPUT_DIR)
+
+    def finalizer():
+        catted_results_path = output_dir / "concatted_results.jsonl"
+
+        flush_print(f"Preparing files in {output_dir} for upload")
+        outputs = list(output_dir.glob("*.json"))
+        if len(outputs) == 0:
+            flush_print(f"Skipping result upload for {output_dir} there were no result files")
+            return
+
+        with open(catted_results_path, "w") as f:
+            concat_test_results(
+                schema_file=JSON_SCHEMA,
+                test_results=list(output_dir.glob("*.json")),
+                output=f,
+            )
+
+        if UPLOAD_GCP:
+            flush_print(f"[GCP] Uploading records from {catted_results_path}")
+            dataset_id = "contrived_test_results_1"
+            # prod table
+            table_id = "contrived-string"
+
+            # testing table
+            # table_id = "gitlab_ci_data"
+            job = bigquery_upload(
+                jsonl_file=catted_results_path, dataset_id=dataset_id, table_id=table_id
+            )
+            flush_print(
+                f"[GCP] Done uploading {job.output_rows} records to {dataset_id}:{table_id}"
+            )
+        else:
+            flush_print(
+                f"[GCP] Skipping result upload for {output_dir} since UPLOAD_GCP was not 1 ({os.getenv('UPLOAD_GCP', 'unset')})"
+            )
+
+        if UPLOAD_PG:
+            database = "scorecard"
+            table_name = "gitlab_data"
+            rows = postgres_upload(
+                jsonl_file=catted_results_path,
+                database=database,
+                table_name=table_name,
+            )
+            flush_print(f"[PG] Done uploading {rows} records to {database}:{table_name}")
+        else:
+            flush_print(
+                f"[PG] Skipping result upload for {output_dir} since UPLOAD_PG was not 1 ({os.getenv('UPLOAD_PG', 'unset')})"
+            )
+
+        # Plain text report
+        data = load_jsonl(jsonl_file=catted_results_path)
+        rows = []
+        for item in data:
+            item = json.loads(item["r"])
+            name = f"{item['model_set_id']} / {item['model_name']} / {item['config_name']}"
+            if len(item["raw_stats_ms"]) == 0:
+                rows.append([name, "no data", "no data", "no data", "no data"])
+                continue
+
+            runtimes_s = [x / 1000.0 for x in item["raw_stats_ms"]]
+            if item["import_error"] is None:
+                rows.append(
+                    [
+                        name,
+                        item["inference_stats"]["mean_sec"],
+                        np.min(runtimes_s),
+                        np.var(runtimes_s),
+                        item["inference_stats"]["cov"],
+                    ]
+                )
+            else:
+                rows.append(
+                    [
+                        name,
+                        "err",
+                        "err",
+                        "err",
+                        "err",
+                    ]
+                )
+        rows = sorted(rows, key=lambda row: row[0])
+
+        flush_print(f"benchmark over {TEST_RUNS} runs and {WARMUP_RUNS} warmup runs")
+        flush_print(
+            tabulate.tabulate(rows, headers=["model", "mean (s)", "min (s)", "var (s^2)", "cov"])
+        )
+
+    request.addfinalizer(finalizer)
+
+    yield output_dir
+
+
+BAD_WARNINGS = [
+    "UserWarning: Specified provider 'TensorrtExecutionProvider' is not in available provider names",
+    "UserWarning: Specified provider 'CUDAExecutionProvider' is not in available provider names",
+]
+
+
+def _test_impl(request, slug, result_directory, run_config: Dict[str, Any]):
+    DATE_FORMAT = "%Y-%m-%d-%H:%M:%S"
+    run_at = datetime.datetime.now().strftime(DATE_FORMAT)
+    executor = run_config["executor"]
+    cmd = [
+        sys.executable,
+        CLI,
+        "run",
+        "--sha",
+        run_config["sha256"],
+        "--model",
+        slug,
+        "--executor",
+        executor,
+        "--random-inputs",
+        "--runs",
+        TEST_RUNS,
+        "--warmup-runs",
+        WARMUP_RUNS,
+    ]
+    if "shapes" in run_config and run_config["shapes"] is not None:
+        cmd.append("--shapes")
+        cmd.append(json.dumps(run_config["shapes"]))
+
+    if run_config.get("requires_toposort", False):
+        cmd.append("--toposort")
+
+    if "cuda-sm" in run_config:
+        cmd.append("--cuda-sm")
+        cmd.append(run_config["cuda-sm"])
+
+    if "tuning-steps" in run_config:
+        cmd.append("--tuning-steps")
+        cmd.append(run_config["tuning-steps"])
+
+    cmd = [shlex.quote(str(c)) for c in cmd]
+    cmd = " ".join(cmd)
+    env = os.environ.copy()
+    env["CUDA_PATH"] = "/usr/local/cuda-11.8"
+    env["CUDA_MODULE_LOADING"] = "LAZY"
+    flush_print(f"+ {cmd}")
+    with tempfile.NamedTemporaryFile() as stderr_file:
+        full_cmd = cmd + f" 2> >(tee -a {stderr_file.name} >&2)"
+        proc = subprocess.run(
+            full_cmd,
+            check=False,
+            stdout=subprocess.PIPE,
+            encoding="utf-8",
+            shell=True,
+            env=env,
+            executable=shutil.which("bash"),
+        )
+
+        with open(stderr_file.name) as f:
+            stderr = f.read().strip()
+
+    stdout = proc.stdout.strip()
+
+    if stdout == "":
+        raise RuntimeError(f"No stdout found from process. stderr: {stderr}")
+
+    try:
+        data = json.loads(stdout)
+    except json.decoder.JSONDecodeError as e:
+        raise RuntimeError(f"Could not decode JSON: {e}\n{stdout}")
+
+    result_directory.mkdir(exist_ok=True, parents=True)
+    data["test_run_id"] = f"{result_directory.name}-{run_at}-{slug}"
+    data["run_at"] = run_at
+    data["test_suite_id"] = result_directory.name
+    i = 0
+    while True:
+        output_path = result_directory / f"{executor}_{i}.json"
+        if not output_path.exists():
+            break
+        i += 1
+
+    flush_print(f"Writing to {output_path}")
+    with open(output_path, "w") as f:
+        json.dump(data, f, indent=2)
+
+    if proc.returncode != 0 and not stderr.endswith("free(): invalid pointer"):
+        raise RuntimeError(f"Process failed: stdout:\n{proc.stdout}\nstderr:{stderr}")
+
+    # Prints from C++ don't get captured by Python at all, so check them after the
+    # fact to see if cli.py ran any native code that printed warnings we don't
+    # want to see
+    for warning in BAD_WARNINGS:
+        if warning in stdout:
+            raise RuntimeError(f"Found {warning} in stdout:\n{stdout}")
+        if warning in stderr:
+            raise RuntimeError(f"Found {warning} in stderr:\n{stderr}")
+
+
+def regex_for_unsupported_ops(ops: List[str]):
+    """
+    The list of unsupported ops can be in any order
+    """
+    match_any_op = "|".join(ops)
+    match_any_op = f"({match_any_op})"
+    op_str = ", ".join([match_any_op for i in range(len(ops))])
+    return re.compile(
+        f"tvm.error.OpNotImplemented: The following operators are not supported for frontend ONNX: {op_str}"
+    )
+
+
+encoder_ops = regex_for_unsupported_ops(ops=["Range", "Log", "Abs", "Greater", "Less", "Min"])
+decoder_ops = regex_for_unsupported_ops(
+    ops=["Log", "Max", "LessOrEqual", "Range", "Less", "Min", "Neg"]
+)
+pt_decoder_ops = regex_for_unsupported_ops(
+    ops=["Less", "Min", "Range", "Neg", "Log", "Identity", "LessOrEqual"]
+)
+gptj_ops = regex_for_unsupported_ops(ops=["Einsum", "Cos", "Sin", "Range", "Neg"])
+dynamic_shape = "AttributeError: <class 'tvm.tir.expr.Var'> has no attribute value"
+missing_weights = re.compile(r"No such file or directory: '.*/weights.pb'")
+missing_trt = "'TensorrtExecutionProvider' is not in available provider"
+cuda_initialization = "CUDA initialization failure with error: 35"
+missing_shapes_in_models_yaml = "Unspecified dynamic shapes detected"
+cutlass_offload_failure = "KeyError: <MathOperation.multiply_add"
+
+EXPECTED_FAILURES = {
+    # Missing features in relax
+    "oss-onnx.t5-decoder-with-lm-head-12v1-relax-cuda": dynamic_shape,
+    "oss-onnx.t5-decoder-with-lm-head-12v1-relax-native": dynamic_shape,
+    "oss-onnx.t5-encoder-12v1-relax-cuda": dynamic_shape,
+    "oss-onnx.t5-encoder-12v1-relax-native": dynamic_shape,
+}
+
+
+def matches(pattern: Union[str, re.Pattern], text: str) -> bool:
+    if isinstance(pattern, str):
+        return pattern in text
+
+    return pattern.search(text) is not None
+
+
+@parameterize_configs(MODELS)
+def test_offload_coverage(request, show_test_name, result_directory, run_config: Dict[str, Any]):
+    """ """
+
+
+@parameterize_configs(MODELS)
+def test_mean_runtime(request, show_test_name, result_directory, run_config: Dict[str, Any]):
+    """
+    Tests end to end mean/p95 runtime for models on available backends
+    """
+    if run_config["executor"] == "relax-native":
+        pytest.skip("relax-native results are slow and not needed")
+
+    cuda_arg = request.config.getoption("--cuda-sm")
+    if cuda_arg:
+        run_config["cuda-sm"] = cuda_arg
+
+    slug = f"{run_config['set']}.{run_config['name']}@{run_config['version']}"
+    pyslug = pytest_slug(run_config)
+    xfail_regex = EXPECTED_FAILURES.get(pyslug)
+
+    # The entire test runner is wrapped in this try..except to implement some
+    # custom behavior around xfailing, namely that the xfail happens with a
+    # specific message in the error output.
+    try:
+        _test_impl(request, slug=slug, result_directory=result_directory, run_config=run_config)
+    except Exception as e:
+        if xfail_regex is not None:
+            # This test should xfail, check if the error matches
+            if not matches(xfail_regex, str(e)):
+                # The error does not match, don't xfail and raise a normal exceptino
+                raise RuntimeError(
+                    f"Test {pyslug} is in EXPECTED_FAILURES but the expected error regex {xfail_regex} was not found in {str(e)}"
+                )
+            else:
+                # The test failed and the message matches, xfail
+                pytest.xfail(reason=f"{pyslug} is in EXPECTED_FAILURES")
+
+        raise e
+
+    # The test passed, but if the slug is in the xfail list this should be an
+    # error (to mimic pytest.mark.xfail(strict=True))
+    if xfail_regex is not None:
+        raise RuntimeError(f"Expected test {pyslug} to fail but it passed")
+
+
+if __name__ != "__main__":
+    # Running under pytest
+    for config in MODELS:
+        pass
+        # print(
+        #     f"Running {config.config.id()} on {config.config.flow_config} ({config.warmup_runs} warmups, {config.test_runs} runs)"
+        # )
diff --git a/scorecard/relax-coverage/utils.py b/scorecard/relax-coverage/utils.py
new file mode 100644
index 0000000000..9c6f6a5f2d
--- /dev/null
+++ b/scorecard/relax-coverage/utils.py
@@ -0,0 +1,99 @@
+import json
+import random
+import string
+import sys
+import os
+
+from typing import TextIO, List, Dict, Any
+from pathlib import Path
+
+import commentjson
+import jsonschema
+import pytest
+
+
+REPO_ROOT = Path(__file__).parent.parent
+FROM_HUB = object()
+ONNX_REPO = "onnx/models"
+ONNX_REPO_SHA = "8e893eb39b131f6d3970be6ebd525327d3df34ea"
+IS_IN_CI = os.getenv("IS_IN_CI", "0") == "1"
+
+
+def pytest_slug(item: Dict[str, Any]):
+    return f"{item['set']}.{item['name']}v{item['version']}-{item['executor']}"
+
+
+def parameterize_configs(configs: List[Dict[str, Any]]):
+    names = [f"{pytest_slug(c)}" for i, c in enumerate(configs)]
+    return pytest.mark.parametrize("run_config", configs, ids=names)
+
+
+_manifest = None
+
+
+def manifest():
+    global _manifest
+    if _manifest is None:
+        with open(Path(__file__).resolve().parent / "ONNX_HUB_MANIFEST.json") as f:
+            _manifest = json.load(f)
+
+    return _manifest
+
+
+def find_model(name):
+    for model in manifest():
+        if model["model_path"] == name:
+            return model
+    raise ValueError(f"{name} not found")
+
+
+def load_jsonl(jsonl_file: Path) -> List[Dict[str, Any]]:
+    with open(jsonl_file) as f:
+        data = [json.loads(line) for line in f.readlines()]
+
+    return data
+
+
+def gen_test_output_dir(base: Path) -> str:
+    """
+    Creates a 5 character id for the test used to store the result JSONs
+    """
+
+    if "TEST_SUITE_ID" in os.environ:
+        # CI sets this in the docker build
+        test_suite_id = os.environ["TEST_SUITE_ID"]
+        print(f"Using TEST_SUITE_ID from env: {test_suite_id}")
+        return base / test_suite_id
+
+    for _ in range(1000):
+        test_suite_id = "".join([random.choice(string.ascii_lowercase) for _ in range(5)])
+        test_output_dir = base / test_suite_id
+        if not test_output_dir.exists():
+            return test_output_dir
+
+    raise RuntimeError("Unable to generate a unique ID for this test run")
+
+
+def _load_and_strip_comments(f):
+    return commentjson.loads(f.read())
+
+
+def concat_test_results(schema_file: Path, test_results: List[Path], output: TextIO):
+    with open(schema_file) as f:
+        schema = _load_and_strip_comments(f)
+
+    for path in test_results:
+        with open(path, "r") as f:
+            try:
+                data = _load_and_strip_comments(f)
+            except Exception as e:
+                print(f"while loading {path}:", file=sys.stderr, flush=True)
+                raise e
+
+        try:
+            jsonschema.validate(instance=data, schema=schema)
+        except jsonschema.SchemaError as e:
+            print(f"while validating {path}:", file=sys.stderr, flush=True)
+            raise e
+        output.write(json.dumps({"r": json.dumps(data)}))
+        output.write("\n")
diff --git a/scorecard/relax_scorecard/__init__.py b/scorecard/relax_scorecard/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/scorecard/relax_scorecard/concat_testdata.py b/scorecard/relax_scorecard/concat_testdata.py
new file mode 100644
index 0000000000..171c40620e
--- /dev/null
+++ b/scorecard/relax_scorecard/concat_testdata.py
@@ -0,0 +1,64 @@
+import argparse
+import commentjson
+import json
+import jsonschema
+import pathlib
+import re
+import sys
+
+
+def _load_and_strip_comments(f):
+    return commentjson.loads(f.read())
+
+
+def concat_test_results(schema, test_results, output):
+    for path in test_results:
+        with open(path, "r") as f:
+            try:
+                data = _load_and_strip_comments(f)
+            except Exception as e:
+                print(f"while loading {path}:", file=sys.stderr)
+                raise e
+
+        try:
+            jsonschema.validate(instance=data, schema=schema)
+        except jsonschema.SchemaError as e:
+            print(f"while validating {path}:", file=sys.stderr)
+            raise e
+        output.write(json.dumps({"r": json.dumps(data)}))
+        output.write("\n")
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--schema",
+        type=pathlib.Path,
+        required=True,
+        help="Path to the JSON schema describing the test data",
+    )
+    parser.add_argument(
+        "test_results",
+        type=pathlib.Path,
+        nargs="+",
+        help="Path to test results which should be concatenated",
+    )
+
+    return parser.parse_args(argv)
+
+
+def main(argv):
+    args = parse_args(argv)
+
+    with open(args.schema) as f:
+        schema = _load_and_strip_comments(f)
+
+    concat_test_results(schema, args.test_results, sys.stdout)
+    print(
+        f"Prepared {len(args.test_results)} test results for upload to BigQuery",
+        file=sys.stderr,
+    )
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
diff --git a/scorecard/schema/schema.jsonschema b/scorecard/schema/schema.jsonschema
new file mode 100644
index 0000000000..2366fe5806
--- /dev/null
+++ b/scorecard/schema/schema.jsonschema
@@ -0,0 +1,68 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://example.com/product.schema.json",
+
+  "type": "object",
+  "properties": {
+    // Identifying fields
+    "test_run_id": {"type": "string", "description": "Uniquely identifies this benchmarking run"},
+    "model_set_id": {"type": "string", "description": "Identifies the group of models to which this one belongs"},
+    "model_name": {"type": "string", "description": "Uniquely identifies the model within the model set"},
+
+    // Operators
+    "framework_ops": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {"type": "string", "description": "Name that uniquely identifies the per-framework operator"},
+          "op_type": {"type": "string", "description": "Name of the operator"},
+        },
+      },
+    },
+    "relay_ops": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {"type": "string", "description": "Name of the operator"},
+          "framework_op_index": {"type": "integer", "description": "Index into framework_ops of the framework operator that generated this one"},
+          "schedule_method": {"type": "string", "description": "one of cutlass or native"},
+        },
+      },
+    },
+    "relay_fusion_groups": {
+      "type": "array",
+      "items": {
+        "type": "array",
+        "items": {
+          "type": "integer",
+          "description": "Index into relay_ops of an operator in this fusion group"
+        },
+      },
+    },
+
+    // Test results
+    "tvm_latency": {
+      "type": "object",
+      "properties": {
+        "config_name": {
+          "type": "string",
+          "description": "Describes the configuration of the test runner to the extnnt needed to differentiate between configurations."
+        },
+        "num_iterations": {
+          "type": "integer",
+          "description": "Number of inference iterations as part of the mean"
+        },
+        "mean_sec": {
+          "type": "number",
+          "description": "Mean inference latency, in seconds. Excludes time spent copying data to and from the input and output tensor memory."
+        },
+        "p95_sec": {
+          "type": "number",
+          "description": "95th percentile inference latency, in seconds. Excludes time spent copying data to and from the input and output tensor memory."
+        },
+      },
+    },
+  },
+}
diff --git a/scorecard/scripts/show_node_info.sh b/scorecard/scripts/show_node_info.sh
new file mode 100755
index 0000000000..cf836d8e13
--- /dev/null
+++ b/scorecard/scripts/show_node_info.sh
@@ -0,0 +1,40 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -euxo pipefail
+
+apt update
+apt install -y curl
+
+echo "===== EC2 INFO ====="
+function ec2_metadata() {
+    # See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
+    curl -w '\n' -fsSL "http://169.254.169.254/latest/meta-data/$1" || echo failed
+}
+
+ec2_metadata ami-id
+ec2_metadata instance-id
+ec2_metadata instance-type
+ec2_metadata hostname
+ec2_metadata public-hostname
+
+echo "===== RUNNER INFO ====="
+df --human-readable
+nvidia-smi || true
+lscpu
+free
diff --git a/tests/lint/check_file_type.py b/tests/lint/check_file_type.py
index 7753961c17..7ed0720893 100644
--- a/tests/lint/check_file_type.py
+++ b/tests/lint/check_file_type.py
@@ -93,6 +93,8 @@
     "groovy",
     # Python-parseable config files
     "ini",
+    # for scorecard
+    "jsonschema",
 }
 
 # List of file names allowed