diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index e7c6d95a68..ca01225313 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,5 +1,15 @@ +variables: + GCP_AUTH_JSON: $gcpAuthJson + AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID + AWS_DEFAULT_REGION: $AWS_DEFAULT_REGION + AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY + IS_IN_CI: "1" + stages: - prepare-auth + - build-docker + - test + update_token: image: @@ -9,5 +19,76 @@ update_token: before_script: - aws --version script: - - echo hello + - USER=AWS + - TOKEN=$(aws ecr get-login-password) + - AUTH=$(echo -n "$USER:$TOKEN" | base64 | tr -d "\n") + - | + set -eux + curl --request PUT --header "PRIVATE-TOKEN: $GITLAB_PERSONAL_ACCESS_TOKEN" \ + --silent --output /dev/null --show-error --fail \ + "https://gitlab.com/api/v4/projects/$CI_PROJECT_ID/variables/AWS_ECR_AUTH" --form "value=$AUTH" + only: + variables: + - $CI_PIPELINE_SOURCE == "web" + - $CI_COMMIT_REF_NAME == "main" + +docker_build: + stage: build-docker + image: docker:20 + dependencies: + - update_token + tags: + - cpu-sole-tenant + script: + - apk add --update py-pip + - pip install awscli + - NO_CACHE=1 sh ./scorecard/docker/build.sh + # Generate a test suite ID to be used in later runs so all the concurrent results can be grouped together + - sh -c 'TEST_SUITE_ID=$(tr -dc a-z > output.env' + variables: + DOCKER_HOST: dind-service.kube-system.svc.cluster.local:2375 + PUSH_TO_ECR: 1 + GIT_COMMIT_SHA: $CI_COMMIT_SHA + artifacts: + reports: + dotenv: output.env + +.benchmark_template: &benchmark_template + tags: + - gpu-triton + stage: test + dependencies: + - docker_build + image: + # name: 186900524924.dkr.ecr.us-west-2.amazonaws.com/scorecard:2023-03-10-b4fb5b6 + name: 186900524924.dkr.ecr.us-west-2.amazonaws.com/scorecard:$TAG + script: | + set -eux + ls + ./scorecard/scripts/show_node_info.sh + mkdir model-data + echo "$GCP_AUTH_JSON" > gcp_auth.json + export UPLOAD_GCP=1 + export TEST_RUNS=10 + export WARMUP_RUNS=3 + pytest --tb=native -rA -v -s -q scorecard/relax-coverage/ -k "$PYTEST_FILTER" + +benchmarks-baseline: + <<: *benchmark_template + variables: + PYTEST_FILTER: onnx-trt + +benchmarks-relax: + <<: *benchmark_template + variables: + PYTEST_FILTER: relax-cuda and not stable-diffusion + +benchmarks-relax-sd-unet: + <<: *benchmark_template + variables: + PYTEST_FILTER: relax-cuda and stable-diffusion and unet +benchmarks-relax-sd-vae: + <<: *benchmark_template + variables: + PYTEST_FILTER: relax-cuda and stable-diffusion and vae diff --git a/scorecard/Makefile b/scorecard/Makefile new file mode 100644 index 0000000000..53d5375cc6 --- /dev/null +++ b/scorecard/Makefile @@ -0,0 +1,17 @@ +TEST_DATA ?= build/testdata.jsonl + +clean: + rm -rf build + +build/venv-created.touch: poetry.lock pyproject.toml + poetry install + mkdir -p build + touch build/venv-created.touch + +prepare_testdata: build/venv-created.touch testdata/*.jsonc schema/schema.jsonschema relax_scorecard/*.py + mkdir -p build + poetry run python3 -m relax_scorecard.concat_testdata --schema schema/schema.jsonschema testdata/*.jsonc >"${TEST_DATA}" + @echo "Prepared testdata in ${TEST_DATA}" + + +.DEFAULT_GOAL = prepare_testdata diff --git a/scorecard/bashrc.sh b/scorecard/bashrc.sh new file mode 100644 index 0000000000..7719d4e159 --- /dev/null +++ b/scorecard/bashrc.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +echo "scorecard Docker image + +ensure S3 credentials are set up (ask the team to get a new set): +export AWS_ACCESS_KEY_ID=... +export AWS_SECRET_ACCESS_KEY=... + +run tests with: +pytest --tb=native -v -s -q relax-coverage +" diff --git a/scorecard/docker/Dockerfile.auth-test b/scorecard/docker/Dockerfile.auth-test new file mode 100644 index 0000000000..5589580043 --- /dev/null +++ b/scorecard/docker/Dockerfile.auth-test @@ -0,0 +1,5 @@ +FROM scratch + +COPY docker/Dockerfile.auth-test / + +ARG TVM_BUILT_AT diff --git a/scorecard/docker/Dockerfile.scorecard b/scorecard/docker/Dockerfile.scorecard new file mode 100644 index 0000000000..26039890e3 --- /dev/null +++ b/scorecard/docker/Dockerfile.scorecard @@ -0,0 +1,93 @@ +# TensorRT image +# uses CUDA 11.7 +FROM nvcr.io/nvidia/tensorrt:22.12-py3 +# FROM nvcr.io/nvidia/tensorrt:23.02-py3 # uses CUDA 12.0, not supported on the gpu-triton runners' CUDA driver + +# CUDA images (requires us to manually install tensorrt) +# FROM nvidia/cuda:11.7.1-devel-ubuntu22.04 +# FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 + +WORKDIR /opt/scorecard + +RUN apt update && DEBIAN_FRONTEND=noninteractive apt install -y \ + build-essential \ + curl \ + fish \ + git \ + python3 \ + libpq-dev \ + postgresql \ + postgresql-contrib \ + python3-dev \ + python3-pip \ + sudo \ + vim \ + wget \ + ; + +# llvm +RUN echo "deb http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main" >> /etc/apt/sources.list +RUN echo "deb-src http://apt.llvm.org/focal/ llvm-toolchain-focal-15 main" >> /etc/apt/sources.list +RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add +RUN apt update && apt install -y libllvm-15-ocaml-dev \ + libllvm15 \ + llvm-15 \ + llvm-15-dev \ + llvm-15-runtime + +# python dependencies +RUN python3 -m pip install --no-cache-dir \ + cmake \ + commentjson==0.9.0 \ + google-cloud-bigquery==3.5.0 \ + jinja2 \ + jsonschema==4.17.3 \ + ninja \ + nvidia-tensorrt \ + onnx \ + onnxruntime-gpu \ + psycopg2==2.9.5 \ + pytest \ + pytest-xdist \ + pyyaml \ + tabulate==0.9.0 \ + torch \ + typing_extensions \ + xgboost \ + ; + +RUN python3 -m pip --no-cache-dir install onnx_graphsurgeon==0.3.26 --index-url https://pypi.ngc.nvidia.com --no-deps + +# onnx nightly +RUN mkdir /opt/onnx_nightly +RUN PYTHONUSERBASE=/opt/onnx_nightly pip install --user \ + --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ \ + # ort-nightly \ + ort-nightly-gpu +ENV ONNX_NIGHTLY_PATH /opt/onnx_nightly/lib/python3.10/site-packages + +# Build TVM +ARG TVM_BUILT_AT +RUN git clone https://github.com/octoml/relax --recursive +RUN cd relax && git fetch origin && git config user.name test && git config user.email test@example.com + +# Add this line to build in an un-merged PR +# RUN PR_NUMBER=NN bash -c 'cd relax && curl -L "https://github.com/octoml/relax/pull/$PR_NUMBER.diff" | patch -p1 -N -d . && git add . && git commit -m"PR #$PR_NUMBER"' +RUN bash -c 'cd relax && curl -L "https://github.com/octoml/relax/compare/TUZ-145.diff" | patch -p1 -N -d . && git add . && git commit -m"Add TUZ-145"' + +RUN rm -rf relax/build +COPY docker/build_relax.sh docker/build_relax.sh +RUN bash docker/build_relax.sh +RUN cd relax/python && python3 -m pip install --no-cache-dir -e . + +# aws CLI +RUN pip install awscli + +# testbench code +COPY relax-coverage relax-coverage +COPY schema schema +COPY models.yaml models.yaml +COPY hub_models.yaml hub_models.yaml + +ENV ORT_TENSORRT_FP16_ENABLE 1 +ENV AWS_DEFAULT_REGION us-west-2 diff --git a/scorecard/docker/build.sh b/scorecard/docker/build.sh new file mode 100755 index 0000000000..1e0458b637 --- /dev/null +++ b/scorecard/docker/build.sh @@ -0,0 +1,47 @@ +#!/bin/bash +set -eux + +set +x +source docker/retry.sh +set -x + +PUSH_TO_ECR="${PUSH_TO_ECR:=0}" +NO_CACHE="${NO_CACHE:=0}" +TVM_BUILT_AT="${TVM_BUILT_AT:=0}" +RETRIES="${RETRIES:=5}" +IMAGE_NAME="${IMAGE_NAME:=scorecard}" + +CACHE_ARG="" +if [ "$NO_CACHE" == "1" ]; then + CACHE_ARG="--no-cache" +fi + +retry $RETRIES docker build . --build-arg TVM_BUILT_AT=$TVM_BUILT_AT -f docker/Dockerfile.${IMAGE_NAME} $CACHE_ARG --tag ${IMAGE_NAME}:latest + +# # testing code to skip the docker build but still have an image to work with +# docker pull hello-world +# docker tag hello-world scorecard:latest + +if [ "$PUSH_TO_ECR" == "1" ]; then + DATE=$(date '+%Y-%m-%d') + HASH=${GIT_COMMIT_SHA:0:7} + TAG="$DATE-$HASH" + + REGION="us-west-2" + ACCOUNT_ID="186900524924" + + # Make 'docker push' authenticated with ECR + aws ecr get-login-password --region $REGION | docker login --username AWS --password-stdin $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com + + # Push to ECR registry (latest) + retry 5 docker tag ${IMAGE_NAME}:latest $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:latest + retry 5 docker push $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:latest + + # Push to ECR registry (fixed tag) + retry 5 docker tag ${IMAGE_NAME}:latest $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:$TAG + retry 5 docker push $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:$TAG + + # Save the tag so it can be used later + echo "TAG=$TAG" >> output.env + echo "ECR_IMAGE=$ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com/${IMAGE_NAME}:$TAG" >> output.env +fi diff --git a/scorecard/docker/build_relax.sh b/scorecard/docker/build_relax.sh new file mode 100755 index 0000000000..216ac83ec9 --- /dev/null +++ b/scorecard/docker/build_relax.sh @@ -0,0 +1,17 @@ +#!/bin/bash +set -euxo pipefail +cd relax +mkdir -p build +cd build +cmake -GNinja \ + -DCMAKE_LINKER=/usr/bin/lld-15 \ + -DCMAKE_CUDA_ARCHITECTURES=75 \ + -DUSE_LLVM=llvm-config-15 \ + -DSUMMARIZE=1 \ + -DUSE_CUDA=1 \ + -DUSE_MICRO=1 \ + -DCMAKE_BUILD_TYPE=Release \ + -DUSE_CUTLASS=1 \ + -DUSE_THRUST=1 \ + .. +cmake --build . -- diff --git a/scorecard/docker/dev.sh b/scorecard/docker/dev.sh new file mode 100755 index 0000000000..dc40ea27aa --- /dev/null +++ b/scorecard/docker/dev.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +set -euxo pipefail + +# NB: Also source MODEL_DATA_DIR and GCP_AUTH_JSON from a .env or whatever +# is relevant for the running platform +set +x +UPLOAD_GCP="${UPLOAD_GCP:=0}" +UPLOAD_PG="${UPLOAD_PG:=0}" +TEST_RUNS="${TEST_RUNS:=1}" +WARMUP_RUNS="${WARMUP_RUNS:=0}" +IMAGE="${IMAGE:=scorecard}" +MODEL_DATA_DIR="${MODEL_DATA_DIR:=model-data}" +GCP_AUTH_JSON="${GCP_AUTH_JSON:=none.json}" +PWD=$(pwd) + +touch .fish_history +sudo rm -rf doc-relax +mkdir -p doc-relax +mkdir -p onnx-hub-cache +mkdir -p model-data + +set -x + +docker run \ + --gpus all \ + --env TEST_RUNS=$TEST_RUNS \ + --env WARMUP_RUNS=$WARMUP_RUNS \ + --env UPLOAD_GCP=$UPLOAD_GCP \ + --env UPLOAD_PG=$UPLOAD_PG \ + -v $PWD/$MODEL_DATA_DIR:/opt/scorecard/model-data \ + -v $GCP_AUTH_JSON:/opt/scorecard/gcp_auth.json:ro \ + -v $PWD/.coverage_results:/opt/scorecard/.coverage_results \ + -v $PWD/.tuning_records:/opt/scorecard/.tuning_records \ + -v $PWD/.fish_history:/root/.local/share/fish/fish_history \ + -v $PWD/relax-coverage:/opt/scorecard/relax-coverage \ + -v $PWD/schema:/opt/scorecard/schema \ + -v $PWD/scripts:/opt/scorecard/scripts \ + -v $PWD/models.yaml:/opt/scorecard/models.yaml \ + -v $PWD/hub_models.yaml:/opt/scorecard/hub_models.yaml \ + --mount type=volume,dst=/opt/scorecard/relax,volume-driver=local,volume-opt=type=none,volume-opt=o=bind,volume-opt=device=$PWD/doc-relax \ + --mount type=volume,dst=/root/.cache/onnx/hub,volume-driver=local,volume-opt=type=none,volume-opt=o=bind,volume-opt=device=$PWD/onnx-hub-cache \ + -it $IMAGE \ + fish diff --git a/scorecard/docker/output_login.sh b/scorecard/docker/output_login.sh new file mode 100755 index 0000000000..6b6997202f --- /dev/null +++ b/scorecard/docker/output_login.sh @@ -0,0 +1,8 @@ +#!/bin/sh +set -euxo pipefail + +REGION="us-west-2" +ACCOUNT_ID="186900524924" +aws ecr get-login-password --region $REGION | docker login --username AWS --password-stdin $ACCOUNT_ID.dkr.ecr.$REGION.amazonaws.com +AUTH_JSON=$(cat ~/.docker/config.json | tr '\n' ' ') +echo "$AUTH_JSON" >> output.env diff --git a/scorecard/docker/retry.sh b/scorecard/docker/retry.sh new file mode 100644 index 0000000000..5fca5e74f8 --- /dev/null +++ b/scorecard/docker/retry.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -eux + +retry() { + local max_retries=$1 + shift + local n=0 + until [ "$n" -ge "$max_retries" ] + do + "$@" && break + n=$((n+1)) + if [ "$n" -eq "$max_retries" ]; then + echo "failed to update after attempt $n / $max_retries, giving up" + exit 1 + fi + + WAIT=$(( ( RANDOM % 200 ) + 30 )) + echo "failed to update $n / $max_retries, waiting $WAIT to try again" + sleep "$WAIT" + done +} diff --git a/scorecard/docker/run.sh b/scorecard/docker/run.sh new file mode 100755 index 0000000000..532a03b106 --- /dev/null +++ b/scorecard/docker/run.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -euxo pipefail + +# NB: Also source MODEL_DATA_DIR and GCP_AUTH_JSON from a .env or whatever +# is relevant for the running platform +export UPLOAD_GCP=1 +export UPLOAD_PG=0 +mkdir -p model-data + +IMAGE="${IMAGE:-186900524924.dkr.ecr.us-west-2.amazonaws.com/scorecard:latest}" + +docker run \ + --gpus all \ + --env TEST_RUNS=10 \ + --env WARMUP_RUNS=3 \ + --env UPLOAD_GCP=1 \ + --env AWS_ACCESS_KEY_ID \ + --env AWS_SECRET_ACCESS_KEY \ + --env AWS_DEFAULT_REGION=us-west-2 \ + -v $(pwd)/model-data:/opt/scorecard/model-data \ + -v $GCP_AUTH_JSON:/opt/scorecard/gcp_auth.json:ro \ + -v $(pwd)/.coverage_results:/opt/scorecard/.coverage_results \ + $IMAGE \ + pytest --tb=native -v -s -q relax-coverage/ diff --git a/scorecard/docker/with_the_same_user b/scorecard/docker/with_the_same_user new file mode 100755 index 0000000000..4cde94d6cf --- /dev/null +++ b/scorecard/docker/with_the_same_user @@ -0,0 +1,95 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# This script is a wrapper creating the same user inside container as the one +# running the docker/build.sh outside the container. It also set the home directory +# for the user inside container to match the same absolute path as the workspace +# outside of container. Do not run this manually. It does not make sense. It is +# intended to be called by ci_build.sh only. + +set -ex + +# NOTE: sudo uses the env_reset option to reset environment variables to a secure bare minimum. +# The --preserve-env option below passes those variables through to the invoked process; however, +# this appears not to affect the environment used with execve, so we resolve the binary to run +# in this file using the $PATH specified in the Dockerfile. +COMMAND=( "$(which "$1")" ) +shift +COMMAND=( "${COMMAND[@]}" "$@" ) + +if ! touch /this_is_writable_file_system; then + echo "You can't write to your filesystem!" + echo "If you are in Docker you should check you do not have too many images" \ + "with too many files in them. Docker has some issue with it." + exit 1 +else + rm /this_is_writable_file_system +fi + +getent group "${CI_BUILD_GID}" || ( + # Ensure "${CI_BUILD_GROUP}" is not already some other gid inside container. + if grep -q "^${CI_BUILD_GROUP}:" /etc/group; then + CI_BUILD_GROUP="${CI_BUILD_GROUP}2" + fi + addgroup --force-badname --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}" >/dev/null) + +getent group tvm-venv || (addgroup tvm-venv >/dev/null) +getent passwd "${CI_BUILD_UID}" || adduser --force-badname --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" \ + --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" \ + --disabled-password --home "${CI_BUILD_HOME}" --quiet "${CI_BUILD_USER}" +usermod -a -G sudo -G tvm-venv "${CI_BUILD_USER}" +usermod -a -G sudo -G dialout "${CI_BUILD_USER}" + +# Add user to video group for ROCm +if [[ ! -z "${ROCM_ENABLED-}" ]]; then + usermod -a -G video "${CI_BUILD_USER}" +fi + +# This is a grotesque hack to get PYTEST_ADD_OPTS available to all task scripts. +echo "${CI_BUILD_USER} ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/90-nopasswd-sudo +sudo chown "${CI_BUILD_USER}:${CI_BUILD_GID}" /opt/scorecard +cp /root/.bashrc . +chown "${CI_BUILD_UID}:${CI_BUILD_GID}" .bashrc + +if [ -e /root/.aws ]; then + cp -r /root/.aws /opt/scorecard + chown -R "${CI_BUILD_USER}:${CI_BUILD_GID}" /opt/scorecard/.aws +fi + +if [[ ! -z "${CUDA_VISIBLE_DEVICES-}" ]]; then + CUDA_ENV="CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}" +else + CUDA_ENV="" +fi + +if [[ "$CI_IMAGE_NAME" == *"hexagon"* ]] && [[ ${CI:-false} != "true" ]]; then + PATH=$(echo "$PATH" | sed 's/\/opt\/sccache://g') +fi + +sudo -u "#${CI_BUILD_UID}" --preserve-env \ +${CUDA_ENV} \ +PATH=${PATH} \ +JAVA_HOME=${JAVA_HOME} \ +LD_LIBRARY_PATH="${LD_LIBRARY_PATH-}" \ +PYTHONPATH="${PYTHONPATH-}" \ +HOME="$(pwd)" \ +CI_IMAGE_NAME="${CI_IMAGE_NAME-}" \ +"${COMMAND[@]}" + +#HOME="${CI_BUILD_HOME-}" \ diff --git a/scorecard/relax-coverage/.gitignore b/scorecard/relax-coverage/.gitignore new file mode 100644 index 0000000000..70a9e1982b --- /dev/null +++ b/scorecard/relax-coverage/.gitignore @@ -0,0 +1 @@ +old*.py diff --git a/scorecard/relax-coverage/conftest.py b/scorecard/relax-coverage/conftest.py new file mode 100644 index 0000000000..a78ad282b7 --- /dev/null +++ b/scorecard/relax-coverage/conftest.py @@ -0,0 +1,2 @@ +def pytest_addoption(parser): + parser.addoption("--cuda-sm", help="--cuda-sm arg to cli.py") diff --git a/scorecard/relax-coverage/runners/base.py b/scorecard/relax-coverage/runners/base.py new file mode 100644 index 0000000000..f295d41de9 --- /dev/null +++ b/scorecard/relax-coverage/runners/base.py @@ -0,0 +1,103 @@ +import datetime +import json + +from pathlib import Path +from typing import List, Optional, Dict, Any + +import numpy as np +from benchmarking_utils import BenchmarkConfig, FROM_HUB, sha256sum + + +def flush_result( + result_directory: Path, + compile_time_ms: float, + import_error: Optional[str], + runtimes_ms: List[float], + shapes: Dict[str, List[int]], + output_deltas: List[List[np.ndarray]], + run_config: BenchmarkConfig, + framework_ops: List[str], + runtime_metadata: Dict[str, Any], + relay_ops: List[Dict[str, Any]], +) -> None: + """ + Print the results of a run to stdout + """ + model_config = run_config.config + + outputs_match_onnx = True + for deltas in output_deltas: + for delta in deltas: + if not np.allclose( + np.zeros_like(delta), delta, atol=run_config.atol, rtol=run_config.rtol + ): + outputs_match_onnx = False + + end_to_end_runtimes_ms = np.array(runtimes_ms) + relay_fusion_groups: List[List[int]] = [] + # branch, sha = git_info() + branch = "tbd" + sha = "tbd" + + if model_config.file() == FROM_HUB: + # TODO: implement this for hub models + model_sha = "unknown" + else: + model_sha = sha256sum(model_config.file()) + + if import_error is not None or len(runtimes_ms) == 0: + mean_sec = 0 + p95_sec = 0 + std_dev_sec = 0 + variance_sec2 = 0 + cov = 0 + else: + runtimes_s = np.array(end_to_end_runtimes_ms) / 1000.0 + mean_s = np.mean(runtimes_s) + std_dev_s = np.std(runtimes_s) + mean_sec = mean_s + p95_sec = np.percentile(runtimes_s, 95) + std_dev_sec = std_dev_s + variance_sec2 = np.var(runtimes_s) + cov = std_dev_s / mean_s + + data = { + # identifying fields + "test_run_id": "to be filled in", + "run_at": "to be filled in", + "test_suite_id": "to be filled in", + "model_set_id": model_config.set, + "model_name": model_config.name, + "config_name": model_config.flow_config, + # info to reproduce results + "model_hash": model_sha, + "repo": { + "owner": "octoml", + "repo": "relax", + "sha": sha, + "branch": branch, + }, + "runtime_metadata": runtime_metadata, + "import_error": import_error, + "warmup_runs": run_config.warmup_runs, + "test_runs": run_config.test_runs, + "input_shapes": shapes, + # coarse grained timings + "inference_stats": { + "mean_sec": mean_sec, + "p95_sec": p95_sec, + "std_dev_sec": std_dev_sec, + "variance_sec2": variance_sec2, + "cov": cov, + }, + "compile_time_ms": compile_time_ms, + "raw_stats_ms": runtimes_ms, + "outputs_match_onnx": outputs_match_onnx, + # model details + "framework_ops": framework_ops, + "relay_ops": relay_ops, + "relay_fusion_groups": relay_fusion_groups, + # coverage results + } + + print(json.dumps(data, indent=2), flush=True) diff --git a/scorecard/relax-coverage/runners/benchmarking_utils.py b/scorecard/relax-coverage/runners/benchmarking_utils.py new file mode 100644 index 0000000000..2f21ecc7fe --- /dev/null +++ b/scorecard/relax-coverage/runners/benchmarking_utils.py @@ -0,0 +1,372 @@ +import json +import random +import string +import subprocess +import sys +import time +import os +import argparse +import collections +import functools + +from typing import * +from pathlib import Path +from dataclasses import dataclass + +import psycopg2 +import commentjson +import jsonschema +import onnx +import pytest + +from cloud_utils import aws_download, IS_IN_CI + +import numpy as np +from google.cloud import bigquery +from google.oauth2 import service_account +from onnx import hub +from tvm import relax + + +REPO_ROOT = Path(__file__).parent.parent.parent +FROM_HUB = object() +ONNX_REPO = "onnx/models" +ONNX_REPO_SHA = "8e893eb39b131f6d3970be6ebd525327d3df34ea" +MODELS_DIR = REPO_ROOT / "model-data" + + +class ImportError: + FAILED_ONNX_IMPORT = "failed_onnx_import" + FAILED_RELAX_BUILD = "failed_relax_build" + FAILED_EXECUTION = "failed_execution" + FAILED_OCTO_COMPILE = "failed_octo_compile" + + +def eprint(*args): + print(*args, file=sys.stderr, flush=True) + + +@dataclass +class ModelConfig: + set: str + name: str + sha256: str + version: str + flow_config: str + requires_toposort: bool + input_scale: float + tuning_steps: Optional[int] + shapes: Optional[Dict[str, List[int]]] + dtypes: Optional[Dict[str, str]] + files: Optional[List[str]] + + @staticmethod + def from_json(raw: str) -> "ModelConfig": + data = json.loads(raw) + return ModelConfig( + set=data["set"], + name=data["name"], + sha256=data["sha256"], + version=data["version"], + flow_config=data["flow_config"], + requires_toposort=data["requires_toposort"], + input_scale=data["input_scale"], + shapes=data["shapes"], + dtypes=data["dtypes"], + ) + + def id(self) -> str: + return f"{self.set}-{self.name}" + + def model_dir(self) -> Path: + if self.set == "onnx-hub": + return FROM_HUB + return MODELS_DIR / self.set / f"{self.name}@{self.version}" + + def file(self) -> Path: + if self.set == "onnx-hub": + return FROM_HUB + return self.model_dir() / "model.onnx" + + def load_model(self, verify_sha256: bool = True) -> onnx.ModelProto: + path = self.file() + s3_prefix = f"{self.set}/{self.name}@{self.version}" + if path == FROM_HUB: + repo = f"{ONNX_REPO}:{ONNX_REPO_SHA}" + model = hub.load(self.name, repo=repo, silent=True) + if verify_sha256: + eprint(f"Skipping verification for {path} since it was loaded from ONNX Hub") + return model + else: + if not path.exists(): + eprint(f"Model file at {path} not found, trying to download from S3 storage...") + (MODELS_DIR / self.set).mkdir(exist_ok=True, parents=True) + out_path = aws_download( + blob_name=f"{s3_prefix}/model.onnx", + out_path=path, + ) + if not out_path.exists(): + raise RuntimeError(f"Model file at {path} not found, has it been downloaded?") + if verify_sha256: + actual_sha256 = sha256sum(path) + if actual_sha256 != self.sha256: + raise RuntimeError( + f"Model's sha256 ({actual_sha256}) did not match expected sha256 ({self.sha256})" + ) + + eprint(f"Loading model at {path}") + model = onnx.load(path, load_external_data=False) + for external_file in self.files: + external_file_path = self.model_dir() / external_file + if not external_file_path.exists(): + eprint(f"{external_file_path} does not exist, downloading from S3...") + aws_download( + blob_name=f"{s3_prefix}/{external_file}", + out_path=external_file_path, + ) + onnx.load_external_data_for_model(model, self.model_dir()) + + if self.requires_toposort: + import onnx_graphsurgeon as gs + + sorted = gs.import_onnx(model) + sorted.toposort() + model = gs.export_onnx(sorted) + + return model + + +@dataclass +class BenchmarkConfig: + config: ModelConfig + warmup_runs: int + test_runs: int + check_accuracy: bool + atol: float + rtol: float + cuda_sm: int + + @staticmethod + def from_json(raw: str) -> "BenchmarkConfig": + data = json.loads(raw) + return BenchmarkConfig( + config=ModelConfig.from_json(data["config"]), + warmup_runs=data["warmup_runs"], + test_runs=data["test_runs"], + check_accuracy=data["check_accuracy"], + atol=data["atol"], + rtol=data["rtol"], + cuda_sm=data["cuda_sm"], + ) + + def __str__(self): + return f"{self.config.set}.{self.config.name}.{self.config.flow_config}" + + +def sha256sum(model_file_name: str): + proc = subprocess.run( + ["sha256sum", model_file_name], + stdout=subprocess.PIPE, + check=True, + encoding="utf-8", + ) + return proc.stdout.strip().split()[0] + + +def git_info() -> Tuple[str, str]: + """ + Determine the git branch and sha + """ + proc = subprocess.run( + ["git", "rev-parse", "--abbrev-ref", "HEAD"], + stdout=subprocess.PIPE, + check=True, + encoding="utf-8", + ) + branch = proc.stdout.strip() + proc = subprocess.run( + ["git", "rev-parse", "--verify", "HEAD"], + stdout=subprocess.PIPE, + check=True, + encoding="utf-8", + ) + sha = proc.stdout.strip() + return branch, sha + + +def infer_shapes( + model: "onnx.ModelProto", axes: Optional[Dict[str, int]] = None +) -> Dict[str, List[int]]: + # N.B. Defer the import so as not to unconditionally require other runtimes. + from tvm import relay + from tvm.tir import Any as Any + + input_shapes = {} + if axes is None: + axes = {} + initializer_names = [n.name for n in model.graph.initializer] + # The inputs contains both the inputs and parameters. We are just interested in the + # inputs so skip all parameters listed in graph.initializer + unspecified_dynamic_axes = [] + for input_info in model.graph.input: + if input_info.name not in initializer_names: + name, shape, dtype, axis_names = relay.frontend.onnx.get_info(input_info) + + # Normalize the shape dimensions to integers + assert isinstance(input_shapes, dict) + new_shape = [] + for value, axis_name in zip(shape, axis_names): + if isinstance(value, Any): + lookup_value = axes.get(axis_name) + if lookup_value is None: + unspecified_dynamic_axes.append((axis_name, name)) + value = -1 + else: + value = lookup_value + else: + value = int(value) + + new_shape.append(value) + input_shapes.update({input_info.name: new_shape}) + + if len(unspecified_dynamic_axes) > 0: + axes_to_inputs = collections.defaultdict(list) + for axis_name, input_name in unspecified_dynamic_axes: + axes_to_inputs[axis_name].append(input_name) + + msg = "\n".join( + [ + f" {axis_name} on {', '.join(input_names)}" + for axis_name, input_names in axes_to_inputs.items() + ] + ) + raise RuntimeError( + f"Unspecified dynamic shapes detected, shapes must be manually specified or an $axis entry provided:\n{msg}" + ) + return input_shapes + + +def infer_dtypes(model: "onnx.ModelProto") -> Dict[str, str]: + # N.B. Defer the import so as not to unconditionally require other runtimes. + from tvm import relay + from tvm.tir import Any as Any + + input_dtypes = {} + initializer_names = [n.name for n in model.graph.initializer] + # The inputs contains both the inputs and parameters. We are just interested in the + # inputs so skip all parameters listed in graph.initializer + for input_info in model.graph.input: + if input_info.name not in initializer_names: + name, shape, dtype, axis_names = relay.frontend.onnx.get_info(input_info) + if dtype is None: + raise RuntimeError( + f"Unknown dtype on input '{input_info.name}' is not supported. inputs: '{input_info.name}'", + ) + + input_dtypes.update({input_info.name: dtype}) + + return input_dtypes + + +class Timer(object): + def __enter__(self): + self.start = time.perf_counter_ns() + return self + + def __exit__(self, *args): + self.end = time.perf_counter_ns() + self.ms_duration = (self.end - self.start) / 1000 / 1000 + + +def extract_framework_ops(model: onnx.ModelProto) -> List[Dict[str, str]]: + return [] + return [{"name": node.name, "op_type": node.op_type} for node in model.graph.node] + + +def extract_relay_ops( + model: onnx.ModelProto, + framework_ops: List[Dict[str, str]], + shapes: Dict[str, List[int]], +) -> List[str]: + tvm_model = relax.from_onnx(model, shape=shapes) + + ops = [] + for item in tvm_model.functions.keys(): + ops.append( + { + "framework_op_index": -1, + "name": item.name_hint, + "schedule_method": "unknown", + } + ) + + return ops + + +class BaseRunner: + benchmark_config: BenchmarkConfig + + def __init__(self, benchmark_config: BenchmarkConfig): + self.benchmark_config = benchmark_config + + self._model = self.benchmark_config.config.load_model( + verify_sha256=benchmark_config.config.sha256 is not None + ) + + def metadata(self): + raise NotImplementedError + + def run(self, *args, **kwargs): + raise NotImplementedError + + def load_model(self) -> "onnx.ModelProto": + return self._model + + def run_onnx_cpu_inference(self, inputs: Dict[str, "np.ndarray"]) -> List["np.ndarray"]: + import onnxruntime as ort + + sess_opt = ort.SessionOptions() + + # Set up an onnx inference on GPU + sess = ort.InferenceSession( + self._model.SerializeToString(), + sess_options=sess_opt, + providers=["CPUExecutionProvider"], + ) + output_names = [] + output = sess.run(output_names, inputs) + return output + + def generate_inputs(self, n: int) -> List[Dict[str, np.ndarray]]: + all_inputs = [] + + inferred_dtypes = None + + if self.benchmark_config.config.shapes is None: + shapes = infer_shapes(self._model) + else: + axes = self.benchmark_config.config.shapes.get("$axes") + if len(self.benchmark_config.config.shapes) == 1 and axes is not None: + shapes = infer_shapes(self._model, axes=axes) + else: + shapes = self.benchmark_config.config.shapes + + if self.benchmark_config.config.dtypes is None: + if inferred_dtypes is not None: + dtypes = inferred_dtypes + else: + dtypes = infer_dtypes(self._model) + else: + dtypes = self.benchmark_config.config.dtypes + + for _ in range(n): + input_names = list(shapes.keys()) + inputs = {} + for name in input_names: + inputs[name] = ( + np.random.uniform(size=shapes[name]) * self.benchmark_config.config.input_scale + ).astype(dtypes[name]) + + all_inputs.append(inputs) + + return all_inputs diff --git a/scorecard/relax-coverage/runners/cli.py b/scorecard/relax-coverage/runners/cli.py new file mode 100755 index 0000000000..ca885cae99 --- /dev/null +++ b/scorecard/relax-coverage/runners/cli.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +from benchmarking_utils import ( + BenchmarkConfig, + ModelConfig, + eprint, + extract_framework_ops, +) +from base import flush_result +from pathlib import Path +from typing import * + +import numpy as np + +import importlib +import re +import json +import argparse +import warnings +import sys + +np.set_printoptions(threshold=5, precision=4) +# warnings.filterwarnings(action="ignore", category=DeprecationWarning, module=r".*") +# warnings.filterwarnings(action="error", category=UserWarning, module=r".*") + + +def find_inputs_and_outputs(dir: Path): + input_dir = Path(dir) + + all_inputs = [] + gold_results = [] + eprint(f"Loading sample inputs from {dir}...") + for input_path in input_dir.glob("sample_input*.npy"): + output_path = input_path.parent / input_path.name.replace("input", "output") + all_inputs.append(np.load(input_path, allow_pickle=True).item()) + gold_results.append(np.load(output_path, allow_pickle=True)) + + if len(all_inputs) == 0: + eprint(f"No sample inputs (e.g. files named sample_input0.npy found in '{dir}')") + exit(1) + elif len(all_inputs) > 1: + eprint(f"Found multiple input files in '{dir}', use --input to choose a specific one") + exit(1) + + return all_inputs, gold_results + + +def run(args, runner): + """ + Run the benchmark as defined by the CLI args + """ + + # Determine where the model inputs should come from + if args.random_inputs: + # No specific input to use, make one + all_inputs = runner.generate_inputs(1) + gold_results = None + elif args.input is not None: + # A specific file has been chosen, use it + all_inputs = [np.load(Path(args.input), allow_pickle=True).item()] + gold_results = None + if args.output is not None: + gold_results = np.load(Path(args.output), allow_pickle=True) + elif args.input_dir is not None: + # Find a file in a particular directory + all_inputs, gold_results = find_inputs_and_outputs(args.input_dir) + else: + # Find an input file in the same directory as the model.onnx file + all_inputs, gold_results = find_inputs_and_outputs( + runner.benchmark_config.config.model_dir() + ) + + # Only one input is used per run, multiple inputs should be specified as + # separate runs + inputs = all_inputs[0] + + # Trigger the onnx.load call + eprint("Loading model...") + try: + onnx_model = runner.load_model() + except Exception as error: + flush_result( + result_directory=None, + run_config=runner.benchmark_config, + runtimes_ms=[], + shapes=[], + import_error="Failed ONNX load", + compile_time_ms=[], + output_deltas=[], + relay_ops=[], + framework_ops=[], + runtime_metadata=runner.metadata(), + ) + raise error + + if gold_results is None: + # Generate the expected results if necessary + eprint("Generating expected results at runtime") + gold_results = runner.run_onnx_cpu_inference(inputs) + + # Run the model a few times and extract timings + error, import_error, compile_time_ms, runtimes_ms, output_deltas = runner.run( + inputs=inputs, + gold_results=gold_results, + ) + + try: + framework_ops = extract_framework_ops(onnx_model) + except Exception as e: + framework_ops = [] + error = e + + # TODO: relay ops + + # Send the output results to a JSON file on disk + flush_result( + result_directory=None, + run_config=runner.benchmark_config, + runtimes_ms=runtimes_ms, + shapes=None, + import_error=import_error, + compile_time_ms=compile_time_ms, + output_deltas=output_deltas, + relay_ops=[], + framework_ops=framework_ops, + runtime_metadata=runner.metadata(), + ) + + # Re-raise any failures + if error is not None: + raise error + + +def generate(args, runner): + """ + Generate pairs of sample inputs and outputs + """ + import numpy as np + + np.random.seed(int(args.seed)) + n = int(args.n) + all_inputs = runner.generate_inputs(n=n) + output_dir = Path(args.result_directory) + + eprint("Loading ONNX model...") + onnx_model = runner.load_model() + + should_generate_outputs = not args.skip_run + + for i, inputs in enumerate(all_inputs): + input_path = output_dir / f"sample_input{i}.npy" + output_path = output_dir / f"sample_output{i}.npy" + + if input_path.exists() and not args.force: + eprint(f"Refusing to overwrite {input_path} since --force was not used") + exit(1) + + if output_path.exists() and not args.force: + eprint(f"Refusing to overwrite {output_path} since --force was not used") + exit(1) + + if should_generate_outputs: + desc = f"{input_path.name}, {output_path.name}" + else: + desc = f"{input_path.name}" + + eprint(f"[{i + 1} / {n}] Generating input and output ({desc})") + + if should_generate_outputs: + outputs = runner.run_onnx_cpu_inference(inputs) + + np.save(input_path, inputs) + + if should_generate_outputs: + np.save(output_path, outputs) + + +def parse_args(valid_executors: List[str]): + parser = argparse.ArgumentParser() + subparsers = parser.add_subparsers(help="benchmarking utilities", dest="command") + + def add_shared_arguments(sub): + sub.add_argument( + "--cuda-sm", + type=int, + default=75, + help="CUDA target sm level (default: 75, compute capability for Tesla T4)", + ) + sub.add_argument( + "-m", + "--model", + required=True, + help="the model slug to run (e.g. oss-onnx.t5-encoder-12@1)", + ) + sub.add_argument( + "--sha", + help="the model's sha256 to use to verify file integrity", + ) + sub.add_argument("--input-scale", help="scalar to scale np.random results by", default=1.0) + sub.add_argument( + "--shapes", + help="shapes as JSON (will be inferred if not provided), the $axes key can be used to fill in dynamic shapes by axis name", + ) + sub.add_argument( + "--files", + help="comma separated list of files to download", + ) + sub.add_argument( + "--tuning-steps", + help="if tuning should be used, the number of steps", + ) + sub.add_argument( + "--dtypes", + help="comma separated list of dtypes (will be inferred if not provided)", + ) + + # CLI for running models + run = subparsers.add_parser("run", help="run the benchmark") + run.add_argument("-i", "--input", help=".npy file to use for input") + run.add_argument("-o", "--output", help=".npy file to use for output") + add_shared_arguments(run) + + run.add_argument("--runs", help="number of test runs (default: 1)", default=1) + run.add_argument( + "--warmup-runs", + help="number of warmup runs (default: 0)", + default=0, + ) + run.add_argument( + "--toposort", + action="store_true", + help="toposort nodes in model before running", + ) + run.add_argument( + "--atol", + help="absolute tolerance (default: 0.0001)", + default=0.0001, + ) + run.add_argument( + "--rtol", + help="relative tolerance (default: 0.0001)", + default=0.0001, + ) + run.add_argument( + "--input-dir", + help="directory of sample_inputN.npy and sample_outputN.npy files", + ) + run.add_argument( + "--random-inputs", + action="store_true", + help="generate random values for inputs, execute on CPU to generate expected results at runtime", + ) + run.add_argument( + "-e", + "--executor", + required=True, + help=f"executor to use (options are {', '.join(valid_executors)})", + ) + + # CLI for generating inputs for a model + generate = subparsers.add_parser( + "generate", help="generate new output results for a set of inputs" + ) + generate.add_argument("--shape", help="input shapes as JSON") + generate.add_argument( + "--skip-run", + action="store_true", + help="only generate inputs, skip running the model and generating outputs", + ) + generate.add_argument( + "-f", + "--force", + action="store_true", + help="overwrite existing files", + ) + generate.add_argument("--seed", help="int to use for np.random.seed (default=0)", default=0) + generate.add_argument("-n", help="how many inputs to generate (default=5)", default=5) + generate.add_argument( + "-r", + "--result-directory", + required=True, + help="directory to store resulting .npy files in", + ) + + args = parser.parse_args() + + return args + + +if __name__ == "__main__": + # Find the possible values for --executor (i.e. the modules that have a .Runner attribute) + ignored_files = set( + [ + "all.py", + "cli.py", + "base.py", + "benchmarking_utils.py", + ] + ) + executors = [ + x.stem for x in Path(__file__).resolve().parent.glob("*.py") if x.name not in ignored_files + ] + + args = parse_args(executors) + + # Break apart the model slug + m = re.match(pattern=r"(.+)\.(.+)@(\d+)", string=args.model) + if m is None: + eprint( + f"--model must match the pattern '.@' (e.g. 'oss-onnx.t5-encoder-12@1'), found {args.model}" + ) + exit(1) + set, name, version = m.groups() + + # Find what should run the model + if hasattr(args, "executor"): + executor = importlib.import_module(args.executor) + else: + executor = importlib.import_module("onnx-nightly-cpu") + + # Check if shapes or dtypes were provided + shapes = None + if args.shapes is not None: + shapes = json.loads(args.shapes) + + dtypes = None + if args.dtypes is not None: + dtypes = [d.strip() for d in args.dtypes.split(",")] + + files = [] + if args.files is not None: + files = [x.strip() for x in args.files.split(",")] + + # Instantiate the runner + runner_cls = getattr(executor, "Runner") + runner = runner_cls( + benchmark_config=BenchmarkConfig( + config=ModelConfig( + **{ + "set": set, + "name": name, + "sha256": args.sha, + "version": version, + "flow_config": getattr(args, "executor", None), + "requires_toposort": args.toposort, + "tuning_steps": None if args.tuning_steps is None else int(args.tuning_steps), + "input_scale": float(args.input_scale), + "shapes": shapes, + "dtypes": dtypes, + "files": files, + } + ), + warmup_runs=int(args.warmup_runs), + test_runs=int(args.runs), + check_accuracy=True, + atol=float(args.atol), + rtol=float(args.rtol), + cuda_sm=args.cuda_sm, + ), + ) + + # Run the specified CLI command + if args.command == "generate": + generate(args, runner) + elif args.command == "run": + run(args, runner) + else: + eprint("Unknown command") + exit(1) diff --git a/scorecard/relax-coverage/runners/cloud_utils.py b/scorecard/relax-coverage/runners/cloud_utils.py new file mode 100644 index 0000000000..f3481bb908 --- /dev/null +++ b/scorecard/relax-coverage/runners/cloud_utils.py @@ -0,0 +1,111 @@ +import json +import random +import string +import subprocess +import sys +import os + +from typing import * +from pathlib import Path + +import psycopg2 + +from google.cloud import bigquery +from google.oauth2 import service_account + + +IS_IN_CI = os.getenv("IS_IN_CI", "0") == "1" + + +def eprint(*args): + print(*args, file=sys.stderr, flush=True) + + +_bigquery_client_and_config = None + + +def bigquery_client_and_config( + key_path: str = "gcp_auth.json", schema: Optional[List[bigquery.SchemaField]] = None +): + if not Path(key_path).exists(): + raise RuntimeError(f"{key_path} was not found, did you forget to mount it?") + + global _bigquery_client_and_config + if _bigquery_client_and_config is None: + credentials = service_account.Credentials.from_service_account_file( + key_path, + scopes=["https://www.googleapis.com/auth/cloud-platform"], + ) + client = bigquery.Client( + credentials=credentials, + project=credentials.project_id, + ) + + if schema is None: + schema = [ + bigquery.SchemaField("r", "STRING", mode="REQUIRED"), + ] + + job_config = bigquery.LoadJobConfig( + schema=schema, + ) + job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON + job_config.autodetect = True + _bigquery_client_and_config = (client, job_config) + + return _bigquery_client_and_config + + +def bigquery_upload(jsonl_file: Path, dataset_id: str, table_id: str) -> int: + client, job_config = bigquery_client_and_config() + dataset_ref = client.dataset(dataset_id) + table_ref = dataset_ref.table(table_id) + with open(jsonl_file, "rb") as source_file: + job = client.load_table_from_file( + source_file, + table_ref, + location="us-west1", # Must match the destination dataset location. + job_config=job_config, + ) # API request + + job.result() + + return job + + +def postgres_upload(jsonl_file: Path, database: str, table_name: str) -> int: + """ + Uploads records in jsonl_file (one JSON document per line) to postgres + """ + rows = [(json.dumps(d["r"]),) for d in load_jsonl(jsonl_file=jsonl_file)] + sql = f"INSERT INTO {table_name} (r) VALUES (%s)" + conn = None + password = os.environ["POSTGRES_PASSWORD"] + ip = os.environ["POSTGRES_IP"] + user = os.getenv("POSTGRES_USER", "ci") + try: + conn = psycopg2.connect( + host=ip, + database=database, + user=user, + password=password, + ) + cur = conn.cursor() + cur.executemany(sql, rows) + conn.commit() + cur.close() + finally: + if conn is not None: + conn.close() + + return len(rows) + + +def aws_download(blob_name: str, out_path: Path, bucket_name: str = "scorecard-models"): + command = ["aws", "s3", "cp", f"s3://{bucket_name}/{blob_name}", out_path] + if IS_IN_CI: + command.append("--no-progress") + command = [str(c) for c in command] + eprint(f"+ {' '.join(command)}") + subprocess.run(command, check=True, stdout=sys.stderr) + return out_path diff --git a/scorecard/relax-coverage/runners/onnx-nightly-cpu.py b/scorecard/relax-coverage/runners/onnx-nightly-cpu.py new file mode 100644 index 0000000000..62c546a03a --- /dev/null +++ b/scorecard/relax-coverage/runners/onnx-nightly-cpu.py @@ -0,0 +1,29 @@ +from pathlib import Path +from typing import * + +import os +import sys + +# unused but needed to get CUDA working in onnx, too lazy to actually fix the +# issue +# https://stackoverflow.com/questions/75267445/why-does-onnxruntime-fail-to-create-cudaexecutionprovider-in-linuxubuntu-20/75267493#75267493 +import torch + +# Load the nightly ONNX version from its install directory +sys.path.insert(0, os.environ["ONNX_NIGHTLY_PATH"]) +import onnxruntime as ort + +from benchmarking_utils import BenchmarkConfig +from onnx_base import OnnxBase + + +class OnnxTrt(OnnxBase): + def __init__(self, benchmark_config: BenchmarkConfig): + super().__init__( + benchmark_config=benchmark_config, + ort=ort, + providers=["CPUExecutionProvider"], + ) + + +Runner = OnnxTrt diff --git a/scorecard/relax-coverage/runners/onnx-nightly-trt.py b/scorecard/relax-coverage/runners/onnx-nightly-trt.py new file mode 100644 index 0000000000..7bbdfe2e94 --- /dev/null +++ b/scorecard/relax-coverage/runners/onnx-nightly-trt.py @@ -0,0 +1,29 @@ +from pathlib import Path +from typing import * + +import os +import sys + +# unused but needed to get CUDA working in onnx, too lazy to actually fix the +# issue +# https://stackoverflow.com/questions/75267445/why-does-onnxruntime-fail-to-create-cudaexecutionprovider-in-linuxubuntu-20/75267493#75267493 +import torch + +# Load the nightly ONNX version from its install directory +sys.path.insert(0, os.environ["ONNX_NIGHTLY_PATH"]) +import onnxruntime as ort + +from benchmarking_utils import BenchmarkConfig +from onnx_base import OnnxBase + + +class OnnxTrt(OnnxBase): + def __init__(self, benchmark_config: BenchmarkConfig): + super().__init__( + benchmark_config=benchmark_config, + ort=ort, + providers=["TensorrtExecutionProvider", "CUDAExecutionProvider"], + ) + + +Runner = OnnxTrt diff --git a/scorecard/relax-coverage/runners/onnx-trt.py b/scorecard/relax-coverage/runners/onnx-trt.py new file mode 100644 index 0000000000..d249d0fa0e --- /dev/null +++ b/scorecard/relax-coverage/runners/onnx-trt.py @@ -0,0 +1,24 @@ +from pathlib import Path +from typing import * + +# unused but needed to get CUDA working in onnx, too lazy to actually fix the +# issue +# https://stackoverflow.com/questions/75267445/why-does-onnxruntime-fail-to-create-cudaexecutionprovider-in-linuxubuntu-20/75267493#75267493 +import torch + +import onnxruntime as ort + +from benchmarking_utils import BenchmarkConfig +from onnx_base import OnnxBase + + +class OnnxTrt(OnnxBase): + def __init__(self, benchmark_config: BenchmarkConfig): + super().__init__( + benchmark_config=benchmark_config, + ort=ort, + providers=["TensorrtExecutionProvider", "CUDAExecutionProvider"], + ) + + +Runner = OnnxTrt diff --git a/scorecard/relax-coverage/runners/onnx_base.py b/scorecard/relax-coverage/runners/onnx_base.py new file mode 100644 index 0000000000..a6aa6e8944 --- /dev/null +++ b/scorecard/relax-coverage/runners/onnx_base.py @@ -0,0 +1,82 @@ +from pathlib import Path +from typing import * +from types import ModuleType + +import numpy as np + +from benchmarking_utils import BenchmarkConfig, Timer, eprint, BaseRunner + + +class OnnxBase(BaseRunner): + def __init__( + self, + benchmark_config: BenchmarkConfig, + providers: List[str], + ort: ModuleType, + ): + self.ort = ort + self.providers = providers + + super().__init__(benchmark_config=benchmark_config) + + def metadata(self) -> Dict[str, Any]: + return { + "ort-version": self.ort.__version__, + } + + def run( + self, + inputs, + gold_results: List[np.ndarray], + ): + """ + Run an onnx `model` with onnxruntime's TensorRT EP + """ + runtimes_ms = [] + output_deltas = [] + sess_opt = self.ort.SessionOptions() + + run_config = self.benchmark_config + + id = run_config.config.id() + eprint(f"[{id}] Running onnx trt") + eprint( + f"[{id}] Running for {run_config.warmup_runs} warmups and {run_config.test_runs} tests" + ) + + # Set up an onnx inference on the specified providers + sess = self.ort.InferenceSession( + self._model.SerializeToString(), + sess_options=sess_opt, + providers=self.providers, + ) + + # Unwrap input if necessary + if isinstance(inputs, list) and len(inputs) == 1: + inputs = inputs[0] + else: + inputs = inputs + + output_names = [] + compile_time_ms = 0 + for i in range(run_config.warmup_runs): + eprint( + f"[{id}][{i + 1} / {run_config.warmup_runs}][onnx] Warmup {run_config.config.id()}" + ) + sess.run(output_names, inputs) + + # Run the model a few times and record the end to end execution time + for i in range(run_config.test_runs): + eprint( + f"[{id}][{i + 1} / {run_config.test_runs}][onnx] Running {run_config.config.id()}" + ) + with Timer() as timer: + output = sess.run(output_names, inputs) + + # Stash the runtime + runtimes_ms.append(timer.ms_duration) + + # Check accuracy + output_deltas.append([gold_results[i] - output[i] for i in range(len(output))]) + + return None, None, compile_time_ms, runtimes_ms, output_deltas diff --git a/scorecard/relax-coverage/runners/relax-cuda.py b/scorecard/relax-coverage/runners/relax-cuda.py new file mode 100644 index 0000000000..ad91bc213f --- /dev/null +++ b/scorecard/relax-coverage/runners/relax-cuda.py @@ -0,0 +1,19 @@ +from typing import * + + +from relax_base import RelaxBase + + +class RelaxCuda(RelaxBase): + name = "relax-cuda" + + def __init__(self, *args, **kwargs): + cuda_sm = kwargs["benchmark_config"].cuda_sm + super().__init__( + target=f"cuda -libs=thrust -arch=sm_{cuda_sm} -max_shared_memory_per_block=49152 -max_threads_per_block=1024 -thread_warp_size=32 -registers_per_block=65536", + *args, + **kwargs, + ) + + +Runner = RelaxCuda diff --git a/scorecard/relax-coverage/runners/relax-native.py b/scorecard/relax-coverage/runners/relax-native.py new file mode 100644 index 0000000000..b7f63038d6 --- /dev/null +++ b/scorecard/relax-coverage/runners/relax-native.py @@ -0,0 +1,14 @@ +from typing import * + + +from relax_base import RelaxBase + + +class RelaxNative(RelaxBase): + name = "relax-native" + + def __init__(self, *args, **kwargs): + super().__init__(target="llvm -mcpu=core-avx2", *args, **kwargs) + + +Runner = RelaxNative diff --git a/scorecard/relax-coverage/runners/relax_base.py b/scorecard/relax-coverage/runners/relax_base.py new file mode 100644 index 0000000000..91c1acba4c --- /dev/null +++ b/scorecard/relax-coverage/runners/relax_base.py @@ -0,0 +1,106 @@ +from pathlib import Path +from typing import * +from types import ModuleType + +import numpy as np + +from benchmarking_utils import ( + Timer, + eprint, + ImportError, + BaseRunner, +) + +import tvm +import logging +from tvm import octo +from tvm.relax.frontend.onnx.onnx_frontend import ONNXGraphImporter + + +class RelaxBase(BaseRunner): + def __init__(self, target, *args, **kwargs): + super().__init__(*args, **kwargs) + self.target = target + + def metadata(self) -> Dict[str, Any]: + return { + "relax-version": "unknown", + } + + def load_model(self) -> "onnx.ModelProto": + return self._model + + def run( + self, + inputs, + gold_results: List[np.ndarray], + ): + runtimes_ms = [] + output_deltas = [] + run_config = self.benchmark_config + + tvm_version = tvm.support.libinfo()["GIT_COMMIT_HASH"] + id = run_config.config.id() + eprint( + f"[{id}] Running octo.compile (from {tvm_version}) with shapes={self.benchmark_config.config.shapes}, target={self.target}, and tuning_steps={self.benchmark_config.config.tuning_steps}" + ) + eprint( + f"[{id}] Running for {run_config.warmup_runs} warmups and {run_config.test_runs} tests" + ) + + # Disable tuning logs + ms_logger = logging.getLogger("tvm.meta_schedule") + ms_logger.setLevel(logging.CRITICAL) + for name in logging.root.manager.loggerDict: + if "tvm" in name: + logger = logging.getLogger(name) + logger.setLevel(logging.CRITICAL) + + with Timer() as compile_timer: + try: + tvm_model = octo.compile( + self._model, + shape_dict=self.benchmark_config.config.shapes, + target=tvm.target.Target(self.target), + tuning_steps=self.benchmark_config.config.tuning_steps, + ) + except Exception as e: + return e, ImportError.FAILED_OCTO_COMPILE, 0, [], [] + + compile_time_ms = compile_timer.ms_duration + breakpoint() + + # NOTE: ONNX frontend sanitizes input names. This hack is brittle and presumes Python dict ordering is the same + # between invocations. The real fix should be that OctoModel carries a mapping of framework names to Relax names. + importer = ONNXGraphImporter({}, {}) + for k in list(inputs): + sanitized_k = importer._sanitize_name(k) + if sanitized_k != k: + inputs[sanitized_k] = inputs[k] + del inputs[k] + + for i in range(run_config.warmup_runs): + eprint( + f"[{id}][{i + 1} / {run_config.warmup_runs}][{self.name}] Warmup {run_config.config.id()}" + ) + tvm_model.run(inputs) + + # Run the model a few times and record the end to end execution time + for i in range(run_config.test_runs): + eprint( + f"[{id}][{i + 1} / {run_config.test_runs}][{self.name}] Running {run_config.config.id()}" + ) + try: + with Timer() as timer: + output = tvm_model.run(inputs) + + # Stash the runtime + runtimes_ms.append(timer.ms_duration) + + except Exception as e: + return e, ImportError.FAILED_EXECUTION, 0, [], [] + + # Check accuracy + output_deltas.append([gold_results[i] - output[i] for i in range(len(output))]) + + return None, None, compile_time_ms, runtimes_ms, output_deltas diff --git a/scorecard/relax-coverage/runners/voltaml.py b/scorecard/relax-coverage/runners/voltaml.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scorecard/relax-coverage/test_coverage.py b/scorecard/relax-coverage/test_coverage.py new file mode 100644 index 0000000000..c1c0bf30bb --- /dev/null +++ b/scorecard/relax-coverage/test_coverage.py @@ -0,0 +1,412 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json +import os +import shlex +import tempfile +import shutil +import re +import sys +import datetime + +from pathlib import Path +from typing import * + + +import pytest +import tabulate +import yaml +import numpy as np + +from onnx import hub +from utils import * +from runners.cloud_utils import * + +# Directory to store output JSON files +JSON_SCHEMA = REPO_ROOT / "schema" / "schema.jsonschema" +OUTPUT_DIR = REPO_ROOT / ".coverage_results" +UPLOAD_GCP = os.getenv("UPLOAD_GCP", "0") == "1" +UPLOAD_PG = os.getenv("UPLOAD_PG", "0") == "1" +TEST_RUNS = int(os.getenv("TEST_RUNS", "1")) +WARMUP_RUNS = int(os.getenv("WARMUP_RUNS", "0")) +ONNX_NIGHTLY_PATH = os.environ["ONNX_NIGHTLY_PATH"] + +# Load additional tests from disk +MODELS_YAML = REPO_ROOT / "models.yaml" +HUB_MODELS_YAML = REPO_ROOT / "hub_models.yaml" +MODELS: List[Dict[str, Any]] = [] +CLI = Path(__file__).resolve().parent / "runners" / "cli.py" + + +def generate_configs(item: Dict[str, Any]) -> List[Dict[str, Any]]: + return [{**item, "executor": config} for config in item["configs"]] + + +def get_configs_from_file(path: Path) -> List[Dict[str, Any]]: + configs = [] + if path.exists(): + with open(path) as f: + data = yaml.safe_load(f) + + for item in data: + configs.extend(generate_configs(item)) + return configs + + +# MODELS = get_configs_from_file(HUB_MODELS_YAML) + get_configs_from_file(MODELS_YAML) +MODELS = get_configs_from_file(MODELS_YAML) + + +def flush_print(*args): + print(*args, flush=True) + + +@pytest.fixture(scope="function", autouse=True) +def show_test_name(request): + flush_print(f"Test '{request.node.nodeid}' STARTED") + + def fin(): + flush_print(f"Test '{request.node.nodeid}' COMPLETED") + + request.addfinalizer(fin) + + +@pytest.fixture(scope="session") +def upload_coverage(request): + output_dir = gen_test_output_dir(base=OUTPUT_DIR) + output_dir = output_dir / "coverage" + + def finalizer(): + pass + + if UPLOAD_GCP: + flush_print(f"[GCP] Uploading records from {catted_results_path}") + dataset_id = "contrived_test_results_1" + # prod table + table_id = "sampledate_coverage_name" + + # testing table + # table_id = "gitlab_ci_data" + job = bigquery_upload( + jsonl_file=catted_results_path, dataset_id=dataset_id, table_id=table_id + ) + flush_print( + f"[GCP] Done uploading {job.output_rows} records to {dataset_id}:{table_id}" + ) + else: + flush_print( + f"[GCP] Skipping result upload for {output_dir} since UPLOAD_GCP was not 1 ({os.getenv('UPLOAD_GCP', 'unset')})" + ) + + request.addfinalizer(finalizer) + + yield output_dir + + +@pytest.fixture(scope="session") +def result_directory(request): + output_dir = gen_test_output_dir(base=OUTPUT_DIR) + + def finalizer(): + catted_results_path = output_dir / "concatted_results.jsonl" + + flush_print(f"Preparing files in {output_dir} for upload") + outputs = list(output_dir.glob("*.json")) + if len(outputs) == 0: + flush_print(f"Skipping result upload for {output_dir} there were no result files") + return + + with open(catted_results_path, "w") as f: + concat_test_results( + schema_file=JSON_SCHEMA, + test_results=list(output_dir.glob("*.json")), + output=f, + ) + + if UPLOAD_GCP: + flush_print(f"[GCP] Uploading records from {catted_results_path}") + dataset_id = "contrived_test_results_1" + # prod table + table_id = "contrived-string" + + # testing table + # table_id = "gitlab_ci_data" + job = bigquery_upload( + jsonl_file=catted_results_path, dataset_id=dataset_id, table_id=table_id + ) + flush_print( + f"[GCP] Done uploading {job.output_rows} records to {dataset_id}:{table_id}" + ) + else: + flush_print( + f"[GCP] Skipping result upload for {output_dir} since UPLOAD_GCP was not 1 ({os.getenv('UPLOAD_GCP', 'unset')})" + ) + + if UPLOAD_PG: + database = "scorecard" + table_name = "gitlab_data" + rows = postgres_upload( + jsonl_file=catted_results_path, + database=database, + table_name=table_name, + ) + flush_print(f"[PG] Done uploading {rows} records to {database}:{table_name}") + else: + flush_print( + f"[PG] Skipping result upload for {output_dir} since UPLOAD_PG was not 1 ({os.getenv('UPLOAD_PG', 'unset')})" + ) + + # Plain text report + data = load_jsonl(jsonl_file=catted_results_path) + rows = [] + for item in data: + item = json.loads(item["r"]) + name = f"{item['model_set_id']} / {item['model_name']} / {item['config_name']}" + if len(item["raw_stats_ms"]) == 0: + rows.append([name, "no data", "no data", "no data", "no data"]) + continue + + runtimes_s = [x / 1000.0 for x in item["raw_stats_ms"]] + if item["import_error"] is None: + rows.append( + [ + name, + item["inference_stats"]["mean_sec"], + np.min(runtimes_s), + np.var(runtimes_s), + item["inference_stats"]["cov"], + ] + ) + else: + rows.append( + [ + name, + "err", + "err", + "err", + "err", + ] + ) + rows = sorted(rows, key=lambda row: row[0]) + + flush_print(f"benchmark over {TEST_RUNS} runs and {WARMUP_RUNS} warmup runs") + flush_print( + tabulate.tabulate(rows, headers=["model", "mean (s)", "min (s)", "var (s^2)", "cov"]) + ) + + request.addfinalizer(finalizer) + + yield output_dir + + +BAD_WARNINGS = [ + "UserWarning: Specified provider 'TensorrtExecutionProvider' is not in available provider names", + "UserWarning: Specified provider 'CUDAExecutionProvider' is not in available provider names", +] + + +def _test_impl(request, slug, result_directory, run_config: Dict[str, Any]): + DATE_FORMAT = "%Y-%m-%d-%H:%M:%S" + run_at = datetime.datetime.now().strftime(DATE_FORMAT) + executor = run_config["executor"] + cmd = [ + sys.executable, + CLI, + "run", + "--sha", + run_config["sha256"], + "--model", + slug, + "--executor", + executor, + "--random-inputs", + "--runs", + TEST_RUNS, + "--warmup-runs", + WARMUP_RUNS, + ] + if "shapes" in run_config and run_config["shapes"] is not None: + cmd.append("--shapes") + cmd.append(json.dumps(run_config["shapes"])) + + if run_config.get("requires_toposort", False): + cmd.append("--toposort") + + if "cuda-sm" in run_config: + cmd.append("--cuda-sm") + cmd.append(run_config["cuda-sm"]) + + if "tuning-steps" in run_config: + cmd.append("--tuning-steps") + cmd.append(run_config["tuning-steps"]) + + cmd = [shlex.quote(str(c)) for c in cmd] + cmd = " ".join(cmd) + env = os.environ.copy() + env["CUDA_PATH"] = "/usr/local/cuda-11.8" + env["CUDA_MODULE_LOADING"] = "LAZY" + flush_print(f"+ {cmd}") + with tempfile.NamedTemporaryFile() as stderr_file: + full_cmd = cmd + f" 2> >(tee -a {stderr_file.name} >&2)" + proc = subprocess.run( + full_cmd, + check=False, + stdout=subprocess.PIPE, + encoding="utf-8", + shell=True, + env=env, + executable=shutil.which("bash"), + ) + + with open(stderr_file.name) as f: + stderr = f.read().strip() + + stdout = proc.stdout.strip() + + if stdout == "": + raise RuntimeError(f"No stdout found from process. stderr: {stderr}") + + try: + data = json.loads(stdout) + except json.decoder.JSONDecodeError as e: + raise RuntimeError(f"Could not decode JSON: {e}\n{stdout}") + + result_directory.mkdir(exist_ok=True, parents=True) + data["test_run_id"] = f"{result_directory.name}-{run_at}-{slug}" + data["run_at"] = run_at + data["test_suite_id"] = result_directory.name + i = 0 + while True: + output_path = result_directory / f"{executor}_{i}.json" + if not output_path.exists(): + break + i += 1 + + flush_print(f"Writing to {output_path}") + with open(output_path, "w") as f: + json.dump(data, f, indent=2) + + if proc.returncode != 0 and not stderr.endswith("free(): invalid pointer"): + raise RuntimeError(f"Process failed: stdout:\n{proc.stdout}\nstderr:{stderr}") + + # Prints from C++ don't get captured by Python at all, so check them after the + # fact to see if cli.py ran any native code that printed warnings we don't + # want to see + for warning in BAD_WARNINGS: + if warning in stdout: + raise RuntimeError(f"Found {warning} in stdout:\n{stdout}") + if warning in stderr: + raise RuntimeError(f"Found {warning} in stderr:\n{stderr}") + + +def regex_for_unsupported_ops(ops: List[str]): + """ + The list of unsupported ops can be in any order + """ + match_any_op = "|".join(ops) + match_any_op = f"({match_any_op})" + op_str = ", ".join([match_any_op for i in range(len(ops))]) + return re.compile( + f"tvm.error.OpNotImplemented: The following operators are not supported for frontend ONNX: {op_str}" + ) + + +encoder_ops = regex_for_unsupported_ops(ops=["Range", "Log", "Abs", "Greater", "Less", "Min"]) +decoder_ops = regex_for_unsupported_ops( + ops=["Log", "Max", "LessOrEqual", "Range", "Less", "Min", "Neg"] +) +pt_decoder_ops = regex_for_unsupported_ops( + ops=["Less", "Min", "Range", "Neg", "Log", "Identity", "LessOrEqual"] +) +gptj_ops = regex_for_unsupported_ops(ops=["Einsum", "Cos", "Sin", "Range", "Neg"]) +dynamic_shape = "AttributeError: has no attribute value" +missing_weights = re.compile(r"No such file or directory: '.*/weights.pb'") +missing_trt = "'TensorrtExecutionProvider' is not in available provider" +cuda_initialization = "CUDA initialization failure with error: 35" +missing_shapes_in_models_yaml = "Unspecified dynamic shapes detected" +cutlass_offload_failure = "KeyError: bool: + if isinstance(pattern, str): + return pattern in text + + return pattern.search(text) is not None + + +@parameterize_configs(MODELS) +def test_offload_coverage(request, show_test_name, result_directory, run_config: Dict[str, Any]): + """ """ + + +@parameterize_configs(MODELS) +def test_mean_runtime(request, show_test_name, result_directory, run_config: Dict[str, Any]): + """ + Tests end to end mean/p95 runtime for models on available backends + """ + if run_config["executor"] == "relax-native": + pytest.skip("relax-native results are slow and not needed") + + cuda_arg = request.config.getoption("--cuda-sm") + if cuda_arg: + run_config["cuda-sm"] = cuda_arg + + slug = f"{run_config['set']}.{run_config['name']}@{run_config['version']}" + pyslug = pytest_slug(run_config) + xfail_regex = EXPECTED_FAILURES.get(pyslug) + + # The entire test runner is wrapped in this try..except to implement some + # custom behavior around xfailing, namely that the xfail happens with a + # specific message in the error output. + try: + _test_impl(request, slug=slug, result_directory=result_directory, run_config=run_config) + except Exception as e: + if xfail_regex is not None: + # This test should xfail, check if the error matches + if not matches(xfail_regex, str(e)): + # The error does not match, don't xfail and raise a normal exceptino + raise RuntimeError( + f"Test {pyslug} is in EXPECTED_FAILURES but the expected error regex {xfail_regex} was not found in {str(e)}" + ) + else: + # The test failed and the message matches, xfail + pytest.xfail(reason=f"{pyslug} is in EXPECTED_FAILURES") + + raise e + + # The test passed, but if the slug is in the xfail list this should be an + # error (to mimic pytest.mark.xfail(strict=True)) + if xfail_regex is not None: + raise RuntimeError(f"Expected test {pyslug} to fail but it passed") + + +if __name__ != "__main__": + # Running under pytest + for config in MODELS: + pass + # print( + # f"Running {config.config.id()} on {config.config.flow_config} ({config.warmup_runs} warmups, {config.test_runs} runs)" + # ) diff --git a/scorecard/relax-coverage/utils.py b/scorecard/relax-coverage/utils.py new file mode 100644 index 0000000000..9c6f6a5f2d --- /dev/null +++ b/scorecard/relax-coverage/utils.py @@ -0,0 +1,99 @@ +import json +import random +import string +import sys +import os + +from typing import TextIO, List, Dict, Any +from pathlib import Path + +import commentjson +import jsonschema +import pytest + + +REPO_ROOT = Path(__file__).parent.parent +FROM_HUB = object() +ONNX_REPO = "onnx/models" +ONNX_REPO_SHA = "8e893eb39b131f6d3970be6ebd525327d3df34ea" +IS_IN_CI = os.getenv("IS_IN_CI", "0") == "1" + + +def pytest_slug(item: Dict[str, Any]): + return f"{item['set']}.{item['name']}v{item['version']}-{item['executor']}" + + +def parameterize_configs(configs: List[Dict[str, Any]]): + names = [f"{pytest_slug(c)}" for i, c in enumerate(configs)] + return pytest.mark.parametrize("run_config", configs, ids=names) + + +_manifest = None + + +def manifest(): + global _manifest + if _manifest is None: + with open(Path(__file__).resolve().parent / "ONNX_HUB_MANIFEST.json") as f: + _manifest = json.load(f) + + return _manifest + + +def find_model(name): + for model in manifest(): + if model["model_path"] == name: + return model + raise ValueError(f"{name} not found") + + +def load_jsonl(jsonl_file: Path) -> List[Dict[str, Any]]: + with open(jsonl_file) as f: + data = [json.loads(line) for line in f.readlines()] + + return data + + +def gen_test_output_dir(base: Path) -> str: + """ + Creates a 5 character id for the test used to store the result JSONs + """ + + if "TEST_SUITE_ID" in os.environ: + # CI sets this in the docker build + test_suite_id = os.environ["TEST_SUITE_ID"] + print(f"Using TEST_SUITE_ID from env: {test_suite_id}") + return base / test_suite_id + + for _ in range(1000): + test_suite_id = "".join([random.choice(string.ascii_lowercase) for _ in range(5)]) + test_output_dir = base / test_suite_id + if not test_output_dir.exists(): + return test_output_dir + + raise RuntimeError("Unable to generate a unique ID for this test run") + + +def _load_and_strip_comments(f): + return commentjson.loads(f.read()) + + +def concat_test_results(schema_file: Path, test_results: List[Path], output: TextIO): + with open(schema_file) as f: + schema = _load_and_strip_comments(f) + + for path in test_results: + with open(path, "r") as f: + try: + data = _load_and_strip_comments(f) + except Exception as e: + print(f"while loading {path}:", file=sys.stderr, flush=True) + raise e + + try: + jsonschema.validate(instance=data, schema=schema) + except jsonschema.SchemaError as e: + print(f"while validating {path}:", file=sys.stderr, flush=True) + raise e + output.write(json.dumps({"r": json.dumps(data)})) + output.write("\n") diff --git a/scorecard/relax_scorecard/__init__.py b/scorecard/relax_scorecard/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/scorecard/relax_scorecard/concat_testdata.py b/scorecard/relax_scorecard/concat_testdata.py new file mode 100644 index 0000000000..171c40620e --- /dev/null +++ b/scorecard/relax_scorecard/concat_testdata.py @@ -0,0 +1,64 @@ +import argparse +import commentjson +import json +import jsonschema +import pathlib +import re +import sys + + +def _load_and_strip_comments(f): + return commentjson.loads(f.read()) + + +def concat_test_results(schema, test_results, output): + for path in test_results: + with open(path, "r") as f: + try: + data = _load_and_strip_comments(f) + except Exception as e: + print(f"while loading {path}:", file=sys.stderr) + raise e + + try: + jsonschema.validate(instance=data, schema=schema) + except jsonschema.SchemaError as e: + print(f"while validating {path}:", file=sys.stderr) + raise e + output.write(json.dumps({"r": json.dumps(data)})) + output.write("\n") + + +def parse_args(argv): + parser = argparse.ArgumentParser() + parser.add_argument( + "--schema", + type=pathlib.Path, + required=True, + help="Path to the JSON schema describing the test data", + ) + parser.add_argument( + "test_results", + type=pathlib.Path, + nargs="+", + help="Path to test results which should be concatenated", + ) + + return parser.parse_args(argv) + + +def main(argv): + args = parse_args(argv) + + with open(args.schema) as f: + schema = _load_and_strip_comments(f) + + concat_test_results(schema, args.test_results, sys.stdout) + print( + f"Prepared {len(args.test_results)} test results for upload to BigQuery", + file=sys.stderr, + ) + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/scorecard/schema/schema.jsonschema b/scorecard/schema/schema.jsonschema new file mode 100644 index 0000000000..2366fe5806 --- /dev/null +++ b/scorecard/schema/schema.jsonschema @@ -0,0 +1,68 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://example.com/product.schema.json", + + "type": "object", + "properties": { + // Identifying fields + "test_run_id": {"type": "string", "description": "Uniquely identifies this benchmarking run"}, + "model_set_id": {"type": "string", "description": "Identifies the group of models to which this one belongs"}, + "model_name": {"type": "string", "description": "Uniquely identifies the model within the model set"}, + + // Operators + "framework_ops": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string", "description": "Name that uniquely identifies the per-framework operator"}, + "op_type": {"type": "string", "description": "Name of the operator"}, + }, + }, + }, + "relay_ops": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string", "description": "Name of the operator"}, + "framework_op_index": {"type": "integer", "description": "Index into framework_ops of the framework operator that generated this one"}, + "schedule_method": {"type": "string", "description": "one of cutlass or native"}, + }, + }, + }, + "relay_fusion_groups": { + "type": "array", + "items": { + "type": "array", + "items": { + "type": "integer", + "description": "Index into relay_ops of an operator in this fusion group" + }, + }, + }, + + // Test results + "tvm_latency": { + "type": "object", + "properties": { + "config_name": { + "type": "string", + "description": "Describes the configuration of the test runner to the extnnt needed to differentiate between configurations." + }, + "num_iterations": { + "type": "integer", + "description": "Number of inference iterations as part of the mean" + }, + "mean_sec": { + "type": "number", + "description": "Mean inference latency, in seconds. Excludes time spent copying data to and from the input and output tensor memory." + }, + "p95_sec": { + "type": "number", + "description": "95th percentile inference latency, in seconds. Excludes time spent copying data to and from the input and output tensor memory." + }, + }, + }, + }, +} diff --git a/scorecard/scripts/show_node_info.sh b/scorecard/scripts/show_node_info.sh new file mode 100755 index 0000000000..cf836d8e13 --- /dev/null +++ b/scorecard/scripts/show_node_info.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -euxo pipefail + +apt update +apt install -y curl + +echo "===== EC2 INFO =====" +function ec2_metadata() { + # See https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html + curl -w '\n' -fsSL "http://169.254.169.254/latest/meta-data/$1" || echo failed +} + +ec2_metadata ami-id +ec2_metadata instance-id +ec2_metadata instance-type +ec2_metadata hostname +ec2_metadata public-hostname + +echo "===== RUNNER INFO =====" +df --human-readable +nvidia-smi || true +lscpu +free diff --git a/tests/lint/check_file_type.py b/tests/lint/check_file_type.py index 7753961c17..7ed0720893 100644 --- a/tests/lint/check_file_type.py +++ b/tests/lint/check_file_type.py @@ -93,6 +93,8 @@ "groovy", # Python-parseable config files "ini", + # for scorecard + "jsonschema", } # List of file names allowed