diff --git a/cmake/tpp-mlir.cmake b/cmake/tpp-mlir.cmake index 300ac99df3b734..d988ac7a919271 100644 --- a/cmake/tpp-mlir.cmake +++ b/cmake/tpp-mlir.cmake @@ -36,6 +36,8 @@ if (TPP_MLIR_DIR) -Wl,--no-as-needed -L${TPP_MLIR_DIR}/lib -ltpp_xsmm_runner_utils + -L${LLVM_LIBRARY_DIR} + -lmlir_c_runner_utils -Wl,--as-needed ) #FIXME: Provide platform-independent way of doing that: diff --git a/tools/mlir_bench/README.md b/tools/mlir_bench/README.md new file mode 100644 index 00000000000000..85dcdb65dccdb3 --- /dev/null +++ b/tools/mlir_bench/README.md @@ -0,0 +1,72 @@ +# MLP benchmarks + +Various MLP benchmarks. +Describes usage of the `*_bench.sh` scripts. + +## LIBXSMM +- F32: +```bash +libxsmm_bench.sh +``` +- BF16: +```bash +libxsmm_bench.sh -B +``` + +## Pure MLIR +- F32: +```bash +tpp_mlir_bench.sh -t f32 +``` +- BF16: +```bash +tpp_mlir_bench.sh -t bf16 +``` + +## OV - no MLIR +Default model:\ +`matmul_transpose_b + bias broadcast` + +Alternative model - scritp flag `-b mlp`:\ +`matmul + bias (no broadcast)` + +- F32: +```bash +OV_MLIR=0 mlp_bench.sh -t f32 +``` +- BF16: +```bash +OV_MLIR=0 mlp_bench.sh -t bf16 +``` + +## OV + MLIR - full +Default model:\ +`matmul_transpose_b + bias broadcast` + +Alternative model - scritp flag `-b mlp`:\ +`matmul + bias (no broadcast)` + +- F32: +```bash +OV_MLIR=1 mlp_bench.sh -t f32 +``` +- BF16: +```bash +OV_MLIR=1 mlp_bench.sh -t bf16 +``` + +## OV + MLIR - kernel only +Default model:\ +`matmul_transpose_b + bias broadcast` + +Alternative model - scritp flag `-b mlp`:\ +`matmul + bias (no broadcast)` + +- F32: +```bash +ov_raw_mlir_bench.sh -t f32 +``` +- BF16: +```bash +ov_raw_mlir_bench.sh -t bf16 +``` diff --git a/tools/mlir_bench/libxsmm_bench.sh b/tools/mlir_bench/libxsmm_bench.sh new file mode 100755 index 00000000000000..b39da1d187ceae --- /dev/null +++ b/tools/mlir_bench/libxsmm_bench.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Runs MLP benchmarks using libxsmm. + +die_syntax() { + echo "Syntax: $0 [-B] [-D]" + echo "" + echo " -B: Use bf16 data type" + echo " -D: Set model shapes to dynamic" + exit 1 +} + +# Cmd-line opts +while getopts "BD" arg; do + case ${arg} in + B) + DATA_TYPE="bf16" + ;; + D) + IS_DYNAMIC=true + ;; + ?) + echo "Invalid option: ${OPTARG}" + die_syntax + ;; + esac +done + +BENCH_RUNNER=xsmm_dnn_mlp + +# Initial validation. +if ! [ "$(command -v ${BENCH_RUNNER})" ]; then + echo "Missing benchmark runner ${BENCH_RUNNER}" + exit 1 +fi +if [ ${IS_DYNAMIC} ]; then + echo "Dynamic shapes are not supported by ${BENCH_RUNNER}" + exit 1 +fi + +# Kernel config. +INPUT_SIZES=( 1024 2048 4096 8192 ) +OUTPUT_SIZES=( 128 256 512 ) +if [ ! "${DATA_TYPE}" ]; then + DATA_TYPE="f32" +fi + +echo "Result type: GFLOPS" +for OUT_SIZE in "${OUTPUT_SIZES[@]}"; do + echo "MLP - OUT: ${OUT_SIZE} INS: ${INPUT_SIZES[@]}" + for IN_SIZE in "${INPUT_SIZES[@]}"; do + # Run benchmark. + NUM_ITER=10000 + FUSE_TYPE=5 + TYPE=F + TILES=(64 64 64) + LAYOUT=(0 0) + if [ "${DATA_TYPE}" = "bf16" ]; then + LAYOUT=(1 1) + fi + # Disable parallelism. + ENV_FLAGS=OMP_NUM_THREADS=1 + exec env ${ENV_FLAGS} ${BENCH_RUNNER} ${NUM_ITER} ${OUT_SIZE} ${FUSE_TYPE} ${TYPE} ${TILES[@]} \ + ${LAYOUT[@]} ${IN_SIZE} ${OUT_SIZE} \ + | sed -nE "s/.*GFLOPS\s+=\s*([0-9.]+).*/\\1/p" + done +done diff --git a/tools/mlir_bench/mlp_bench.sh b/tools/mlir_bench/mlp_bench.sh new file mode 100755 index 00000000000000..ca52ee995d01cd --- /dev/null +++ b/tools/mlir_bench/mlp_bench.sh @@ -0,0 +1,109 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Runs OV MLP benchmarks. + +die_syntax() { + echo "Syntax: $0 [-t (f32|f16|bf16|...)] [-b (mlp)] [-D]" + echo "" + echo " -t: Optional data type" + echo " -b: Optional baseline model" + echo " -D: Set model shapes to dynamic" + exit 1 +} + +# Cmd-line opts +while getopts "t:b:D" arg; do + case ${arg} in + t) + DATA_TYPE=${OPTARG} + ;; + b) + BASELINE_MODEL=${OPTARG} + ;; + D) + IS_DYNAMIC=true + ;; + ?) + echo "Invalid option: ${OPTARG}" + die_syntax + ;; + esac +done + +OV_ROOT=$(git rev-parse --show-toplevel) +BENCH_ROOT=$(realpath ${OV_ROOT}/tools/mlir_bench) + +MODEL_GEN=$(realpath ${BENCH_ROOT}/ov_model_gen.py) +BENCH_RUNNER=benchmark_app + +# Initial validation. +if ! [ -d ${OV_ROOT} ]; then + echo "Missing OV repo" + exit 1 +fi +if ! [ -d ${BENCH_ROOT} ]; then + echo "Missing MLIR benchmark directory" + exit 1 +fi +if ! [ -f ${MODEL_GEN} ]; then + echo "Missing model generator" + exit 1 +fi +if ! [ "$(command -v ${BENCH_RUNNER})" ]; then + echo "Missing benchmark runner ${BENCH_RUNNER}" + exit 1 +fi +if [ "${BASELINE_MODEL}" ] && [ ${IS_DYNAMIC} ]; then + echo "Baseline models with dynamic shapes not supported" + exit 1 +fi + +# Kernel config. +INPUT_SIZES=( 1024 2048 4096 8192 ) +OUTPUT_SIZES=( 128 256 512 ) +if [ ! "${DATA_TYPE}" ]; then + DATA_TYPE="f32" +fi +MODEL_NAME="MLIR_MLP_BENCH.xml" + +echo "Result type: time [ms]" +for OUT_SIZE in "${OUTPUT_SIZES[@]}"; do + echo "MLP - OUT: ${OUT_SIZE} INS: ${INPUT_SIZES[@]}" + for IN_SIZE in "${INPUT_SIZES[@]}"; do + # Generate model. + if [ "${BASELINE_MODEL}" ]; then + # Enable baseline model flag. + MODEL_CONFIG=(-b="${BASELINE_MODEL}[${OUT_SIZE},${OUT_SIZE},${IN_SIZE}]") + else + # Generate default PyTorch MLP. + MODEL_CONFIG=(-l="linear[${IN_SIZE},${OUT_SIZE}] relu[]") + fi + GEN_FLAGS=(-t ${DATA_TYPE} -n ${MODEL_NAME}) + if [ ${IS_DYNAMIC} ]; then + GEN_FLAGS+=(--dynamic) + fi + python3 ${MODEL_GEN} "${MODEL_CONFIG[@]}" "${GEN_FLAGS[@]}" + if [ $? != 0 ]; then + echo "Failed to generate model" + exit 1 + fi + # Run benchmark. + PRECISION=${DATA_TYPE} + if [ "${DATA_TYPE}" = "bf16" ]; then + # No native support for bf16, use simple f16 instead. + PRECISION="f16" + fi + if [ ${IS_DYNAMIC} ]; then + DATA_SHAPE=(-data_shape [${OUT_SIZE},${IN_SIZE}]) + fi + # Benchmark config. Disable parallelism. + PERF_FLAGS="-niter 10000 -hint none -nstreams 1 -nthreads 1" + BENCH_FLAGS="-m ${MODEL_NAME} -d CPU \ + -ip ${PRECISION} ${DATA_SHAPE[@]} ${PERF_FLAGS}" + ${BENCH_RUNNER} ${BENCH_FLAGS} 2>/dev/null | \ + sed -nE "s/.*\[ INFO \]\s*Median:\s*([0-9.]+).*/\\1/p" + done +done diff --git a/tools/mlir_bench/ov_model_gen.py b/tools/mlir_bench/ov_model_gen.py new file mode 100644 index 00000000000000..f0b4fd0a7f1e28 --- /dev/null +++ b/tools/mlir_bench/ov_model_gen.py @@ -0,0 +1,247 @@ +#!/usr/bin/python3 + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations +import argparse +import string +import sys +import os + +import torch +import torch.nn as nn +import openvino as ov + + +class TorchAdd(nn.Module): + def __init__(self, sizes, type=None): + super().__init__() + # Generate random data + self.tensor = torch.empty(*sizes, dtype=type).data.normal_(0, 0.01) + def forward(self, a): + return a + self.tensor + + +class TorchSub(nn.Module): + def __init__(self, sizes, type=None): + super().__init__() + # Generate random data + self.tensor = torch.empty(*sizes, dtype=type).data.normal_(0, 0.01) + def forward(self, a): + return a - self.tensor + + +class TorchMul(nn.Module): + def __init__(self, sizes, type=None): + super().__init__() + # Generate random data + self.tensor = torch.empty(*sizes, dtype=type).data.normal_(0, 0.01) + def forward(self, a): + return a * self.tensor + + +class TorchMatmul(nn.Module): + def __init__(self, sizes_mnk, type=None): + super().__init__() + k = sizes_mnk[2] + n = sizes_mnk[1] + # Generate random data + self.weights = torch.empty(k, n, dtype=type).data.normal_(0, 0.01) + def forward(self, a): + return torch.matmul(a, self.weights) + + +class TorchDiv(nn.Module): + def __init__(self, sizes, type=None): + super().__init__() + # Generate random weights + self.tensor = torch.empty(*sizes, dtype=type).data.normal_(1, 10) + def forward(self, a): + return a / self.tensor + + +class TorchSequential(nn.Module): + def __init__(self): + super(TorchSequential, self).__init__() + self.model = nn.Sequential() + def forward(self, a): + return self.model(a) + def append(self, module: nn.Module): + self.model.append(module) + + +def get_torch_type(type: str) -> torch.dtype: + if type == 'f32': + return torch.float32 + if type == 'f16': + return torch.float16 + if type == 'bf16': + return torch.bfloat16 + assert False, f"Unsupported torch data type {type}" + + +def get_torch_layer(layer: str, sizes: list[int], type: str) -> nn.Module: + data_type = get_torch_type(type) + if layer == 'linear': + assert len(sizes) == 2, "invalid sizes for linear" + linear = nn.Linear(*sizes, dtype=data_type) + # Generate random weights + linear.weight.data.normal_(0, 0.01) + linear.bias.data.fill_(0.01) + return linear + if layer == 'relu': + return nn.ReLU() + if layer == 'add': + return TorchAdd(sizes, data_type) + if layer == 'sub': + return TorchSub(sizes, data_type) + if layer == 'mul': + return TorchMul(sizes, data_type) + if layer == 'div': + return TorchDiv(sizes, data_type) + if layer == 'matmul': + assert len(sizes) == 3, "invalid sizes for mm" + return TorchMatmul(sizes, data_type) + assert False, f"Unsupported torch layer type {layer}" + + +def get_layer_name(layer_desc: str) -> str: + return layer_desc[0:layer_desc.find('[')] + + +def get_layer_sizes(layer_desc: str) -> list[int]: + desc_sizes = layer_desc[layer_desc.find('[')+1:layer_desc.find(']')] + return [int(size) for size in filter(None, desc_sizes.split(','))] + + +def parse_layer(layer_desc: str, type: str) -> nn.Module: + layer = get_layer_name(layer_desc) + sizes = get_layer_sizes(layer_desc) + return get_torch_layer(layer, sizes, type) + + +def get_ov_type(type: str) -> ov.Type: + if type == 'f32': + return ov.Type.f32 + if type == 'f16': + return ov.Type.f16 + if type == 'bf16': + return ov.Type.bf16 + assert False, f"Unsupported OV data type {type}" + + +def get_layer_inputs(layer_desc: str, is_dynamic: bool): + input_sizes = get_layer_sizes(layer_desc) + if is_dynamic: + input_sizes = [-1] * len(input_sizes) + + layer = get_layer_name(layer_desc) + + if layer == 'matmul': + m = input_sizes[0] + k = input_sizes[2] + return [[m,k]] + + # Needs to be reversed for nn.Linear, does it apply to other layers too? + if layer == 'linear': + input_sizes.reverse() + + return [input_sizes] + + +def generate_ov_model(layers_desc: str, data_type: str, file_name: str, is_dynamic: bool = False): + layers = layers_desc.split() + torch_seq = TorchSequential() + for layer in layers: + module = parse_layer(layer, data_type) + torch_seq.append(module) + + input_sizes = get_layer_sizes(layers[0]) + if len(input_sizes) == 0: + print("Invalid input layer sizes") + sys.exit(1) + + input_shapes = get_layer_inputs(layers[0], is_dynamic) + ov_type = get_ov_type(data_type) + inputs = [(ov.PartialShape(shapes), ov_type) for shapes in input_shapes] + + ov_model = ov.convert_model(torch_seq, input=inputs) + ov.save_model(ov_model, f"{file_name}") + return ov_model + + +class BaselineMLP(nn.Module): + def __init__(self, sizes_mnk, type=None): + super(BaselineMLP, self).__init__() + m = sizes_mnk[0] + n = sizes_mnk[1] + self.bias = torch.empty((m, n), dtype=type).data.fill_(0.01) + self.relu = nn.ReLU() + def forward(self, a, b): + c = torch.matmul(a, b) + c = torch.add(c, self.bias) + return self.relu(c) + + +def baseline_MLP(model_desc: str, data_type: str, is_dynamic: bool) -> tuple[nn.Model, list]: + sizes = get_layer_sizes(model_desc) + assert len(sizes) == 3, "Invalid baseline MLP sizes" + mlp = BaselineMLP(sizes, get_torch_type(data_type)) + input_shapes = get_layer_inputs(model_desc, is_dynamic)[0] + m = input_shapes[0] + n = input_shapes[1] + k = input_shapes[2] + ov_type = get_ov_type(data_type) + inputs = [(ov.PartialShape([m, k]), ov_type), (ov.PartialShape([k, n]), ov_type)] + return (mlp, inputs) + + +def generate_baseline_model(model_desc: str, data_type: str, file_name: str, is_dynamic: bool = False): + model_name = get_layer_name(model_desc) + + if model_name == 'mlp': + baseline_tuple = baseline_MLP(model_desc, data_type, is_dynamic) + else: + assert False, f"Unsupported baseline model data type {model_name}" + + ov_model = ov.convert_model(baseline_tuple[0], input=baseline_tuple[1]) + ov.save_model(ov_model, f"{file_name}") + return ov_model + + +def main(): + parser = argparse.ArgumentParser( + prog='OV Model generator', + description='Generate PyTorch model and export as OV .xml') + parser.add_argument('-l', '--layers', type=str.lower, + help='Model layers description. For example:\ + -l="linear[64,32] relu[] linear[32,16] gelu[]"\ + -l="matmul[128,128,1024] add[128,128] relu[]"\ + -l="add[8,8] div[8,8]"') + parser.add_argument('-t', '--type', default='f32', type=str.lower, + help='Data type: f32|f16|bf16|...') + parser.add_argument('--dynamic', action='store_true', + help='Make model shapes dynamic') + parser.add_argument('-n', '--name', default='temp.xml', + help='Name for exported XML model') + parser.add_argument('-b', '--baseline', default=None, type=str.lower, + help='Baseline pre-made model - overrides layers. For example:\ + -b=mlp[32,64,16]') + parser.add_argument('-p', '--print', action='store_true', + help='Compile and print the model') + args = parser.parse_args() + + if args.baseline is not None: + model = generate_baseline_model(args.baseline, args.type, args.name, args.dynamic) + else: + model = generate_ov_model(args.layers, args.type, args.name, args.dynamic) + + if args.print: + ov.compile_model(model, 'CPU') + + return 0 + + +if __name__ == '__main__': + os._exit(main()) diff --git a/tools/mlir_bench/ov_raw_mlir_bench.sh b/tools/mlir_bench/ov_raw_mlir_bench.sh new file mode 100755 index 00000000000000..9f12cb50b35d16 --- /dev/null +++ b/tools/mlir_bench/ov_raw_mlir_bench.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Runs pure MLIR part of MLP benchmarks using TPP-MLIR. +# This approach assumes that only one MLIR op is generated. +# For example, the whole graph is outlined to MLIR. + +die_syntax() { + echo "Syntax: $0 [-t (f32|f16|bf16|...)] [-b (mlp)] [-D]" + echo "" + echo " -t: Optional data type" + echo " -b: Optional baseline model" + echo " -D: Set model shapes to dynamic" + exit 1 +} + +# Cmd-line opts +while getopts "t:b:D" arg; do + case ${arg} in + t) + DATA_TYPE=${OPTARG} + ;; + b) + BASELINE_MODEL=${OPTARG} + ;; + D) + IS_DYNAMIC=true + ;; + ?) + echo "Invalid option: ${OPTARG}" + die_syntax + ;; + esac +done + +OV_ROOT=$(git rev-parse --show-toplevel) +BENCH_ROOT=$(realpath ${OV_ROOT}/tools/mlir_bench) + +MODEL_GEN=$(realpath ${BENCH_ROOT}/ov_model_gen.py) +BENCH_RUNNER=tpp-run + +# Initial validation. +if ! [ -d ${OV_ROOT} ]; then + echo "Missing OV repo" + exit 1 +fi +if ! [ -d ${BENCH_ROOT} ]; then + echo "Missing MLIR benchmark directory" + exit 1 +fi +if ! [ -f ${MODEL_GEN} ]; then + echo "Missing model generator" + exit 1 +fi +if ! [ "$(command -v ${BENCH_RUNNER})" ]; then + echo "Missing benchmark runner ${BENCH_RUNNER}" + exit 1 +fi +if [ ${IS_DYNAMIC} ]; then + echo "Dynamic shapes are not supported by ${BENCH_RUNNER}" + exit 1 +fi + +# Kernel config. +INPUT_SIZES=( 1024 2048 4096 8192 ) +OUTPUT_SIZES=( 128 256 512 ) +if [ ! "${DATA_TYPE}" ]; then + DATA_TYPE="f32" +fi +MODEL_NAME="TPP_BENCH.xml" + +echo "Result type: time [ns]" +for OUT_SIZE in "${OUTPUT_SIZES[@]}"; do + echo "MLP - OUT: ${OUT_SIZE} INS: ${INPUT_SIZES[@]}" + for IN_SIZE in "${INPUT_SIZES[@]}"; do + # Generate model. + if [ "${BASELINE_MODEL}" ]; then + # Enable baseline model flag. + MODEL_CONFIG=(-b="${BASELINE_MODEL}[${OUT_SIZE},${OUT_SIZE},${IN_SIZE}]") + else + # Generate default PyTorch MLP. + MODEL_CONFIG=(-l="linear[${IN_SIZE},${OUT_SIZE}] relu[]") + fi + GEN_FLAGS=(-t ${DATA_TYPE} -n ${MODEL_NAME}) + GEN_FLAGS+=(-p) + ENV_FLAGS=OV_MLIR_TPP=0 + MODEL_OUT=$(exec env ${ENV_FLAGS} python3 ${MODEL_GEN} "${MODEL_CONFIG[@]}" "${GEN_FLAGS[@]}" 2>&1) + if [ $? != 0 ]; then + echo "Failed to generate model" + exit 1 + fi + # Run benchmark. + MLIR_IR=$(echo "${MODEL_OUT}" \ + | awk '/Source MLIR:/{flag=1; next} /Target LLVM:/{flag=0} flag' \ + | grep -vE '^[-]+$') + BENCH_FLAGS="-entry-point-result=void -e entry -seed 123 -n 10000" + echo "${MLIR_IR}" | ${BENCH_RUNNER} ${BENCH_FLAGS} + done +done diff --git a/tools/mlir_bench/tpp_mlir_bench.sh b/tools/mlir_bench/tpp_mlir_bench.sh new file mode 100755 index 00000000000000..fbecd29cd80317 --- /dev/null +++ b/tools/mlir_bench/tpp_mlir_bench.sh @@ -0,0 +1,79 @@ +#!/bin/bash + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Runs MLIR only MLP benchmarks using TPP-MLIR. + +die_syntax() { + echo "Syntax: $0 [-t (f32|f16|bf16|...)] [-D]" + echo "" + echo " -t: Optional data type" + echo " -D: Set model shapes to dynamic" + exit 1 +} + +# Cmd-line opts +while getopts "t:D" arg; do + case ${arg} in + t) + DATA_TYPE=${OPTARG} + ;; + D) + IS_DYNAMIC=true + ;; + ?) + echo "Invalid option: ${OPTARG}" + die_syntax + ;; + esac +done + +MODEL_GEN=mlir-gen +BENCH_RUNNER=tpp-run + +# Initial validation. +if ! [ "$(command -v ${MODEL_GEN})" ]; then + echo "Missing model generator ${MODEL_GEN}" + exit 1 +fi +if ! [ "$(command -v ${BENCH_RUNNER})" ]; then + echo "Missing benchmark runner ${BENCH_RUNNER}" + exit 1 +fi +if [ ${IS_DYNAMIC} ]; then + echo "Dynamic shapes are not supported by ${BENCH_RUNNER}" + exit 1 +fi + +# Kernel config. +INPUT_SIZES=( 1024 2048 4096 8192 ) +OUTPUT_SIZES=( 128 256 512 ) +if [ ! "${DATA_TYPE}" ]; then + DATA_TYPE="f32" +fi + +echo "Result type: time [ns]" +for OUT_SIZE in "${OUTPUT_SIZES[@]}"; do + echo "MLP - OUT: ${OUT_SIZE} INS: ${INPUT_SIZES[@]}" + for IN_SIZE in "${INPUT_SIZES[@]}"; do + # Generate model. + if [ "${BASELINE_MODEL}" ]; then + # Enable baseline model flag. + MODEL_CONFIG=(-b="${BASELINE_MODEL}[${OUT_SIZE},${OUT_SIZE},${IN_SIZE}]") + else + # Generate default PyTorch MLP. + MODEL_CONFIG=(-l="linear[${IN_SIZE},${OUT_SIZE}] relu[]") + fi + MODEL_CONFIG=(--batch=${OUT_SIZE} --layers=${IN_SIZE},${OUT_SIZE} -bias -relu) + GEN_FLAGS=(--kernel=args --float-type=${DATA_TYPE} --seed=123) + MLIR_IR=$(${MODEL_GEN} "${MODEL_CONFIG[@]}" "${GEN_FLAGS[@]}") + if [ $? != 0 ]; then + echo "Failed to generate model" + exit 1 + fi + # Run benchmark. + BENCH_FLAGS="-entry-point-result=void -e entry -seed 123 -n 10000" + echo "${MLIR_IR}" | ${BENCH_RUNNER} ${BENCH_FLAGS} + done +done