onnx · AlexandreEichenberger · Sep 14, 2023 · Sep 14, 2023 · Sep 14, 2023 · Sep 14, 2023
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp
@@ -48,4 +48,11 @@ llvm::cl::opt<bool> nnpaEnableZHighToOnnx("enable-zhigh-to-onnx",
         "level. Default is true."),
     llvm::cl::init(true), llvm::cl::cat(OnnxMlirOptions));
 
+llvm::cl::opt<bool> nnpaEnableZHighCostModel("enable-zhigh-cost-model",
+    llvm::cl::desc(
+        "Enabling a performance cost model to estimate the benefit of "
+        "migrating an eligible onnx operation to a ZHigh operation. Default is "
+        "false."),
+    llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions));
+
 } // namespace onnx_mlir
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.hpp
@@ -46,6 +46,7 @@ extern llvm::cl::opt<onnx_mlir::NNPAEmissionTargetType> nnpaEmissionTarget;
 extern llvm::cl::list<std::string> execNodesOnCpu;
 extern llvm::cl::opt<bool> nnpaClipToDLFloatRange;
 extern llvm::cl::opt<bool> nnpaEnableZHighToOnnx;
+extern llvm::cl::opt<bool> nnpaEnableZHighCostModel;
 extern llvm::cl::opt<bool> profileZHighIR;
 
 } // namespace onnx_mlir
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp
@@ -45,12 +45,13 @@ using namespace onnx_mlir;
 
 namespace onnx_mlir {
 
-void addONNXToZHighPasses(
-    mlir::PassManager &pm, ArrayRef<std::string> execNodesOnCpu) {
+void addONNXToZHighPasses(mlir::PassManager &pm,
+    ArrayRef<std::string> execNodesOnCpu, bool useCostModel) {
   for (unsigned i = 0; i < 3; i++) {
     // Repeat this process so that shape-related ops such as Shape, Expand,
     // Gather generated during RewriteONNXForZHigh will become constants.
-    pm.addPass(onnx_mlir::createRewriteONNXForZHighPass(execNodesOnCpu));
+    pm.addPass(onnx_mlir::createRewriteONNXForZHighPass(
+        execNodesOnCpu, false /*useCostModel*/));
     // Simplify shape-related ops, including ShapeOp-to-DimOp replacement,
     // constant propagation, shape inference and canonicalize.
     pm.addPass(onnx_mlir::createSimplifyShapeRelatedOpsPass());
@@ -75,7 +76,7 @@ void addONNXToZHighPasses(
     pm.addNestedPass<func::FuncOp>(
         onnx_mlir::createInstrumentPass(instrumentOps, instrumentActions));
 
-  pm.addPass(onnx_mlir::createONNXToZHighPass(execNodesOnCpu));
+  pm.addPass(onnx_mlir::createONNXToZHighPass(execNodesOnCpu, useCostModel));
   pm.addNestedPass<func::FuncOp>(onnx_mlir::createShapeInferencePass());
   // There are more opportunities for const propagation once all zhigh ops were
   // generated.
@@ -155,7 +156,7 @@ void addPassesNNPA(mlir::OwningOpRef<mlir::ModuleOp> &module,
 
   if (emissionTarget >= EmitMLIR) {
     // Lower zAIU-compatible ONNX ops to ZHigh dialect where possible.
-    addONNXToZHighPasses(pm, execNodesOnCpu);
+    addONNXToZHighPasses(pm, execNodesOnCpu, nnpaEnableZHighCostModel);
 
     if (nnpaEmissionTarget >= EmitZHighIR)
       emissionTarget = EmitMLIR;

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/CMakeLists.txt b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/CMakeLists.txt
@@ -5,6 +5,7 @@ add_onnx_mlir_library(OMONNXToZHigh
   ONNXLegalityCheck.cpp
   ONNXToZHigh.cpp
   ONNXToZHighCommon.cpp
+  ZHighPerfModel.cpp
 
   DEPENDS
   OMONNXONNXToZHighIncGen
@@ -25,6 +26,7 @@ add_onnx_mlir_library(OMRewriteONNXForZHigh
   ONNXLegalityCheck.cpp
   RewriteONNXForZHigh.cpp
   ONNXToZHighCommon.cpp
+  ZHighPerfModel.cpp
 
   DEPENDS
   OMONNXRewriteONNXForZHighIncGen