Add NNPA level compatibility check (#2533)

* Add NNPA level compatability check Signed-off-by: Mike Essenmacher <[email protected]> * Clang format changes Signed-off-by: Mike Essenmacher <[email protected]> * Clang format changes Signed-off-by: Mike Essenmacher <[email protected]> * Add mcpu to NNPA tests Signed-off-by: Mike Essenmacher <[email protected]> * Add link dependency to ONNXToZHigh for OMCompilerOptions Signed-off-by: Mike Essenmacher <[email protected]> * Update link dependency Signed-off-by: Mike Essenmacher <[email protected]> * Review updates Signed-off-by: Mike Essenmacher <[email protected]> * Review updates Signed-off-by: Mike Essenmacher <[email protected]> * Resolve conflicts Signed-off-by: Mike Essenmacher <[email protected]> * Add mcpu z16 to device_placement_pass_perf_model.mlir Signed-off-by: Mike Essenmacher <[email protected]> --------- Signed-off-by: Mike Essenmacher <[email protected]> Signed-off-by: Mike Essenmacher <[email protected]>
onnx · Oct 3, 2023 · 7592edf · 7592edf
1 parent c03bc49
commit 7592edf
Show file tree

Hide file tree

Showing 92 changed files with 219 additions and 93 deletions.
diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/CMakeLists.txt b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/CMakeLists.txt
@@ -11,6 +11,7 @@ add_onnx_mlir_library(OMONNXToZHigh
   libzdnn
 
   LINK_LIBS PUBLIC
+  OMCompilerOptions
   OMONNXOps
   OMONNXToKrnl
   OMZHighOps
@@ -31,6 +32,7 @@ add_onnx_mlir_library(OMRewriteONNXForZHigh
   libzdnn
 
   LINK_LIBS PUBLIC
+  OMCompilerOptions
   OMONNXOps
   OMONNXToKrnl
   OMZHighOps

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/NNPALimit.h b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/NNPALimit.h
@@ -4,7 +4,7 @@
 
 //===----------------------- NNPALimit.h ----------------------------------===//
 //
-// Copyright 2022 The IBM Research Authors.
+// Copyright 2022-2023 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -28,3 +28,6 @@ static constexpr int64_t NNPA_MAXIMUM_TENSOR_SIZE = 4294967296;
 // See zDNN API doc
 static constexpr int64_t MAXIMUM_NUM_HIDDEN_SIZE_LSTM = 8192;
 static constexpr int64_t MAXIMUM_NUM_HIDDEN_SIZE_GRU = 10880;
+
+// The NNPA levels.
+static constexpr const char *NNPA_Z16 = "z16";
diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp
@@ -15,13 +15,37 @@
 
 #include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.hpp"
 #include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/NNPALimit.h"
+#include "src/Compiler/CompilerOptions.hpp"
 #include "src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp"
 #include "src/Dialect/ONNX/ONNXDimAnalysis.hpp"
 #include "src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp"
 
 using namespace mlir;
 using namespace onnx_mlir;
 
+/// Convert the input NNPA level, ie. "z16", to a floating point value
+/// representing the level, ie. "16.0".
+float convertNNPALevel(std::string inputNNPALevel) {
+  float retNNPAFloat = 0;
+  try {
+    retNNPAFloat = std::strtof(
+        inputNNPALevel.substr(1, inputNNPALevel.size()).c_str(), NULL);
+  } catch (...) {
+    retNNPAFloat = 0;
+  }
+  return retNNPAFloat;
+}
+
+/// A function to check whether the input NNPA level, ie. "z16", is compatible
+/// with the current NNPA level.
+bool isCompatibleWithNNPALevel(std::string inputNNPALevel) {
+  float inLevel = convertNNPALevel(inputNNPALevel);
+  float mcpuLevel = convertNNPALevel(mcpu);
+  if (inLevel == 0 && mcpuLevel == 0)
+    return false;
+  return inLevel <= mcpuLevel;
+}
+
 /// A function to check whether a value's element type is valid for zAIU or not.
 /// zAIU supports only F16, F32 and BFLOAT. Since MLIR does not support BFLOAT,
 /// we check F16 and F32 here only. zAIU only supports rank in range of (0, 4].
@@ -250,6 +274,9 @@ bool isSuitableForZDNN(OP_TYPE op, const DimAnalysis *dimAnalysis) {
 template <>
 bool isSuitableForZDNN<ONNXAddOp>(
     ONNXAddOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getA()))
     return false;
   if (!isValidElementTypeAndRank(op.getB()))
@@ -261,6 +288,9 @@ bool isSuitableForZDNN<ONNXAddOp>(
 template <>
 bool isSuitableForZDNN<ONNXSubOp>(
     ONNXSubOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getA()))
     return false;
   if (!isValidElementTypeAndRank(op.getB()))
@@ -272,6 +302,9 @@ bool isSuitableForZDNN<ONNXSubOp>(
 template <>
 bool isSuitableForZDNN<ONNXMulOp>(
     ONNXMulOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getA()))
     return false;
   if (!isValidElementTypeAndRank(op.getB()))
@@ -283,6 +316,9 @@ bool isSuitableForZDNN<ONNXMulOp>(
 template <>
 bool isSuitableForZDNN<ONNXDivOp>(
     ONNXDivOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getA()))
     return false;
   if (!isValidElementTypeAndRank(op.getB()))
@@ -294,6 +330,9 @@ bool isSuitableForZDNN<ONNXDivOp>(
 template <>
 bool isSuitableForZDNN<ONNXSumOp>(
     ONNXSumOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   // Do not support a single input.
   if (op.getData_0().size() < 2)
     return false;
@@ -316,6 +355,9 @@ bool isSuitableForZDNN<ONNXSumOp>(
 template <>
 bool isSuitableForZDNN<ONNXMinOp>(
     ONNXMinOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   int64_t opnum = op.getNumOperands();
   if (opnum != 2) {
     return false;
@@ -332,6 +374,9 @@ bool isSuitableForZDNN<ONNXMinOp>(
 template <>
 bool isSuitableForZDNN<ONNXMaxOp>(
     ONNXMaxOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   int64_t opnum = op.getNumOperands();
   if (opnum != 2) {
     return false;
@@ -349,6 +394,9 @@ bool isSuitableForZDNN<ONNXMaxOp>(
 template <>
 bool isSuitableForZDNN<ONNXSoftmaxOp>(
     ONNXSoftmaxOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getInput()))
     return false;
   ShapedType inputType = op.getType().cast<ShapedType>();
@@ -363,6 +411,9 @@ bool isSuitableForZDNN<ONNXSoftmaxOp>(
 template <>
 bool isSuitableForZDNN<ONNXReluOp>(
     ONNXReluOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getX()))
     return false;
   ShapedType xType = op.getX().getType().cast<ShapedType>();
@@ -373,6 +424,9 @@ bool isSuitableForZDNN<ONNXReluOp>(
 template <>
 bool isSuitableForZDNN<ONNXTanhOp>(
     ONNXTanhOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getInput()))
     return false;
   ShapedType inputType = op.getType().cast<ShapedType>();
@@ -383,6 +437,9 @@ bool isSuitableForZDNN<ONNXTanhOp>(
 template <>
 bool isSuitableForZDNN<ONNXSigmoidOp>(
     ONNXSigmoidOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getX()))
     return false;
   ShapedType xType = op.getX().getType().cast<ShapedType>();
@@ -393,6 +450,9 @@ bool isSuitableForZDNN<ONNXSigmoidOp>(
 template <>
 bool isSuitableForZDNN<ONNXLogOp>(
     ONNXLogOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getInput()))
     return false;
   ShapedType inputType = op.getInput().getType().cast<ShapedType>();
@@ -403,6 +463,9 @@ bool isSuitableForZDNN<ONNXLogOp>(
 template <>
 bool isSuitableForZDNN<ONNXExpOp>(
     ONNXExpOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   if (!isValidElementTypeAndRank(op.getInput()))
     return false;
   ShapedType inputType = op.getInput().getType().cast<ShapedType>();
@@ -413,6 +476,9 @@ bool isSuitableForZDNN<ONNXExpOp>(
 template <>
 bool isSuitableForZDNN<ONNXMatMulOp>(
     ONNXMatMulOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
   int64_t opnum = op.getNumOperands();
   if (opnum != 2) {
     return false;
@@ -467,6 +533,10 @@ bool isSuitableForZDNN<ONNXGemmOp>(
   Value B = op.getB();
   Value C = op.getC();
 
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
+
   // Check data type.
   if (!isValidElementTypeAndRank(A))
     return false;
@@ -519,6 +589,10 @@ bool isSuitableForZDNN<ONNXGemmOp>(
 template <>
 bool isSuitableForZDNN<ONNXReduceMeanV13Op>(
     ONNXReduceMeanV13Op op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
+
   // Check data type.
   if (!isValidElementTypeAndRank(op.getData()))
     return false;
@@ -560,6 +634,10 @@ bool isSuitableForZDNN<ONNXLSTMOp>(
   Value R = op.getR();
   Value B = op.getB();
 
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
+
   // Check direction.
   if ((direction != FORWARD) && (direction != REVERSE) &&
       (direction != BIDIRECTIONAL))
@@ -635,6 +713,10 @@ bool isSuitableForZDNN<ONNXGRUOp>(
   Value R = op.getR();
   Value B = op.getB();
 
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
+
   // Check direction.
   if ((direction != FORWARD) && (direction != REVERSE) &&
       (direction != BIDIRECTIONAL))
@@ -702,6 +784,10 @@ bool isSuitableForZDNN<ONNXGRUOp>(
 template <>
 bool isSuitableForZDNN<ONNXMaxPoolSingleOutOp>(
     ONNXMaxPoolSingleOutOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
+
   // Check data type.
   if (!isValidElementTypeAndRank(op.getX()))
     return false;
@@ -725,6 +811,10 @@ bool isSuitableForZDNN<ONNXMaxPoolSingleOutOp>(
 template <>
 bool isSuitableForZDNN<ONNXAveragePoolOp>(
     ONNXAveragePoolOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
+
   // Check data type.
   if (!isValidElementTypeAndRank(op.getX()))
     return false;
@@ -782,6 +872,10 @@ static bool checkConv2DParamRestrictions(int64_t inputDim, int64_t kernelDim,
 template <>
 bool isSuitableForZDNN<ONNXConvOp>(
     ONNXConvOp op, const DimAnalysis *dimAnalysis) {
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
+
   // Check data type.
   if (!isValidElementTypeAndRank(op.getX()))
     return false;
@@ -864,6 +958,10 @@ bool isSuitableForZDNN<ONNXBatchNormalizationInferenceModeOp>(
   ArrayRef<int64_t> shapeInput = inputType.getShape();
   ArrayRef<int64_t> shapeOutput = outputType.getShape();
 
+  // Check NNPA level.
+  if (!isCompatibleWithNNPALevel(NNPA_Z16))
+    return false;
+
   // 4D tensors(N x C x H x W) are supported as input and output.
   if (shapeInput.size() != 4 || shapeOutput.size() != 4)
     return false;

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.hpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.hpp
@@ -4,7 +4,7 @@
 
 //===---------- ONNXLegalityCheck.hpp - Check legality for ONNX ops -------===//
 //
-// Copyright 2019-2020 The IBM Research Authors.
+// Copyright 2019-2023 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -26,6 +26,10 @@ template <typename OP_TYPE>
 bool isSuitableForZDNN(
     OP_TYPE op, const onnx_mlir::DimAnalysis *dimAnalysis = nullptr);
 
+/// Check if the input NNPA level is compatible with the current NNPA
+/// level.
+bool isCompatibleWithNNPALevel(std::string inputNNPALevel);
+
 /// Get padding type using shape helper. This returns
 /// `SAME_PADDING`, `VALID_PADDING`, or empty.
 template <typename OP, typename OPAdaptor, typename OPShapeHelper>

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/RewriteONNXForZHigh.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/RewriteONNXForZHigh.cpp
@@ -499,6 +499,9 @@ void getRewriteONNXForZHighDynamicallyLegal(
   // broadcasting.
   addDynamicallyLegalOpFor<ONNXAddOp>(
       target, dimAnalysis, [](ONNXAddOp op, const DimAnalysis *dimAnalysis) {
+        // Check NNPA level.
+        if (!isCompatibleWithNNPALevel(NNPA_Z16))
+          return true;
         return !((isDefinedByONNXConstantOp(op.getA()) &&
                      isUniBroadcatableFirstToSecond(op.getA(), op.getB())) ||
                  (isDefinedByONNXConstantOp(op.getB()) &&
@@ -508,20 +511,29 @@ void getRewriteONNXForZHighDynamicallyLegal(
       });
   addDynamicallyLegalOpFor<ONNXDivOp>(
       target, dimAnalysis, [](ONNXDivOp op, const DimAnalysis *dimAnalysis) {
+        // Check NNPA level.
+        if (!isCompatibleWithNNPALevel(NNPA_Z16))
+          return true;
         return !((isDefinedByONNXConstantOp(op.getA()) &&
                      isUniBroadcatableFirstToSecond(op.getA(), op.getB())) ||
                  (isDefinedByONNXConstantOp(op.getB()) &&
                      isUniBroadcatableFirstToSecond(op.getB(), op.getA())));
       });
   addDynamicallyLegalOpFor<ONNXMulOp>(
       target, dimAnalysis, [](ONNXMulOp op, const DimAnalysis *dimAnalysis) {
+        // Check NNPA level.
+        if (!isCompatibleWithNNPALevel(NNPA_Z16))
+          return true;
         return !((isDefinedByONNXConstantOp(op.getA()) &&
                      isUniBroadcatableFirstToSecond(op.getA(), op.getB())) ||
                  (isDefinedByONNXConstantOp(op.getB()) &&
                      isUniBroadcatableFirstToSecond(op.getB(), op.getA())));
       });
   addDynamicallyLegalOpFor<ONNXSubOp>(
       target, dimAnalysis, [](ONNXSubOp op, const DimAnalysis *dimAnalysis) {
+        // Check NNPA level.
+        if (!isCompatibleWithNNPALevel(NNPA_Z16))
+          return true;
         return !((isDefinedByONNXConstantOp(op.getA()) &&
                      isUniBroadcatableFirstToSecond(op.getA(), op.getB())) ||
                  (isDefinedByONNXConstantOp(op.getB()) &&
@@ -540,6 +552,9 @@ void getRewriteONNXForZHighDynamicallyLegal(
   // one where N-D will become 3-D or to split MatMul into smaller MatMuls.
   addDynamicallyLegalOpFor<ONNXMatMulOp>(
       target, dimAnalysis, [](ONNXMatMulOp op, const DimAnalysis *dimAnalysis) {
+        // Check NNPA level.
+        if (!isCompatibleWithNNPALevel(NNPA_Z16))
+          return true;
         Type aType = op.getA().getType();
         Type bType = op.getB().getType();
         if (!isRankedShapedType(aType) || !isRankedShapedType(bType))
@@ -579,10 +594,14 @@ void getRewriteONNXForZHighDynamicallyLegal(
       });
 
   // Illegalize SoftmaxOp if
+  // - the NNPA level is not compatible, or
   // - axis is the last dimension.
   // This SoftmaxOp will be rewritten in which its input is reshaped to 3D.
   addDynamicallyLegalOpFor<ONNXSoftmaxOp>(target, dimAnalysis,
       [](ONNXSoftmaxOp op, const DimAnalysis *dimAnalysis) {
+        // Check NNPA level.
+        if (!isCompatibleWithNNPALevel(NNPA_Z16))
+          return true;
         Value input = op.getInput();
         if (auto shapedType = input.getType().dyn_cast<RankedTensorType>()) {
           if ((shapedType.getRank() > 3) &&

diff --git a/test/mlir/accelerators/nnpa/analysis/dyn-dim-analysis.mlir b/test/mlir/accelerators/nnpa/analysis/dyn-dim-analysis.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir-opt --maccel=NNPA --onnx-dim-analysis %s -split-input-file | FileCheck %s
+// RUN: onnx-mlir-opt --mcpu=z16 --maccel=NNPA --onnx-dim-analysis %s -split-input-file | FileCheck %s
 
 // COM: test zdnn unary operations. Use Relu as a sample.
 func.func @test_stick_unary_unstick(%arg0 : tensor<?x3x?xf32>) -> tensor<?x3x?xf32> {

diff --git a/test/mlir/accelerators/nnpa/conversion/device-placement/device_placement_pass.mlir b/test/mlir/accelerators/nnpa/conversion/device-placement/device_placement_pass.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir-opt --device-placement --maccel=NNPA --split-input-file %s | FileCheck %s
+// RUN: onnx-mlir-opt --device-placement --mcpu=z16 --maccel=NNPA --split-input-file %s | FileCheck %s
 
 module attributes {llvm.data_layout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64", llvm.target_triple = "s390x-ibm-linux", "onnx-mlir.symbol-postfix" = "model"} {
   func.func @mnist(%arg0: tensor<1x1x28x28xf32>) -> tensor<1x10xf32> attributes {input_names = ["Input3"], output_names = ["Plus214_Output_0"]} {