From 810f734215cff4a974460affd8368ae484987b48 Mon Sep 17 00:00:00 2001 From: Alexandre Eichenberger Date: Mon, 28 Aug 2023 03:51:13 -0400 Subject: [PATCH] Opt reporting: add signatures to stats (#2464) * Opt reporting: add signatures to stats Signed-off-by: Alexandre Eichenberger --------- Signed-off-by: Alexandre Eichenberger Co-authored-by: Tung D. Le --- include/onnx-mlir/Runtime/OMTensor.h | 11 +- .../ONNXToKrnl/Math/Elementwise.cpp | 4 +- .../ONNXToKrnl/ONNXToKrnlCommon.cpp | 58 +-- .../ONNXToKrnl/Tensor/PrintSignature.cpp | 27 +- src/Dialect/ONNX/ONNXOps/OpHelper.cpp | 54 +++ src/Dialect/ONNX/ONNXOps/OpHelper.hpp | 6 + src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp | 1 + src/Runtime/OMInstrument.inc | 38 +- src/Runtime/OMTensor.inc | 347 ++++++++++-------- .../ONNX/InstrumentONNXSignaturePass.cpp | 27 +- utils/make-report.py | 133 +++++-- 11 files changed, 427 insertions(+), 279 deletions(-) diff --git a/include/onnx-mlir/Runtime/OMTensor.h b/include/onnx-mlir/Runtime/OMTensor.h index 712a5d8ddf..f25e25c523 100644 --- a/include/onnx-mlir/Runtime/OMTensor.h +++ b/include/onnx-mlir/Runtime/OMTensor.h @@ -195,7 +195,8 @@ OM_EXTERNAL_VISIBILITY const int64_t *omTensorGetShape(const OMTensor *tensor); * * Set the data shape array of the OMTensor to the values in the input array. */ -OM_EXTERNAL_VISIBILITY void omTensorSetShape(OMTensor *tensor, const int64_t *shape); +OM_EXTERNAL_VISIBILITY void omTensorSetShape( + OMTensor *tensor, const int64_t *shape); /** * \brief OMTensor data strides getter @@ -209,7 +210,8 @@ OM_EXTERNAL_VISIBILITY void omTensorSetShape(OMTensor *tensor, const int64_t *sh * @param tensor pointer to the OMTensor * @return pointer to the data strides array. */ -OM_EXTERNAL_VISIBILITY const int64_t *omTensorGetStrides(const OMTensor *tensor); +OM_EXTERNAL_VISIBILITY const int64_t *omTensorGetStrides( + const OMTensor *tensor); /** * \brief OMTensor data strides setter @@ -320,7 +322,10 @@ OM_EXTERNAL_VISIBILITY void omTensorSetOwning(OMTensor *tensor, int64_t owning); /** * Print an OMTensor to stdout. * - * @param msg, pointer to descriptive string + * @param msg, pointer to descriptive string. It accepts one of 3 formats: + * '%t' for printing the tensor's type, '%s' for printing an extensive signature + * printout, and '%d' for printing the full data values of the tensor. + * Additionally it recognize '%e' as the end of the message string. * @param tensor, pointer to the OMTensor to print */ OM_EXTERNAL_VISIBILITY void omTensorPrint( diff --git a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp index ffe68c60a8..fa656219ce 100644 --- a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp +++ b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp @@ -2094,7 +2094,7 @@ struct ONNXElementwiseBinaryOpLowering shapeHelper, create, op, outputMemRefType, collapsedInnermostLoops, estimatedSimdLoopTripCount); if (uVL > 0) { - if (collapsedInnermostLoops == outputRank) + if (collapsedInnermostLoops == (int64_t)outputRank) onnxToKrnlSimdReport(op, /*successful*/ true, uVL, estimatedSimdLoopTripCount, "binary fully flattened"); else @@ -2257,7 +2257,7 @@ struct ONNXElementwiseVariadicOpLowering shapeHelper, create, op, outputMemRefType, collapsedInnermostLoops, estimatedSimdLoopTripCount); if (uVL > 0) { - if (collapsedInnermostLoops == outputRank) + if (collapsedInnermostLoops == (int64_t)outputRank) onnxToKrnlSimdReport(op, /*successful*/ true, uVL, estimatedSimdLoopTripCount, "variadic fully flattened"); else diff --git a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp index 01e636786c..a57df06990 100644 --- a/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp +++ b/src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp @@ -16,7 +16,6 @@ #include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp" #include "mlir/Dialect/Utils/ReshapeOpsUtils.h" #include "mlir/IR/BuiltinTypeInterfaces.h" -#include "llvm/Support/Path.h" #include "src/Accelerators/Accelerator.hpp" #include "src/Dialect/Krnl/DialectBuilder.hpp" @@ -664,66 +663,13 @@ bool hasNonIdentityLayout(ValueRange operands) { // Support functions for reporting. //===----------------------------------------------------------------------===// -// We may try to relate the node names generated by the instrumentation -// with the node names printed by opt-report. Thus it is key to keep the -// code that generates these node name in sync. -// -// The code are found here: -// 1) `matchAndRewrite` from `src/Conversion/KrnlToLLVM/KrnlInstrument.cpp` -// 2) `getNodeNameLikeInKrnlInstrument` from -// `src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp` - -static std::string getNodeNameLikeInKrnlInstrument(Operation *op) { - StringAttr nodeName; - // Try with op onnx_node_name attribute. - nodeName = op->getAttrOfType("onnx_node_name"); - if (nodeName) { - LLVM_DEBUG(llvm::dbgs() << "op has node name\n"); - return nodeName.str(); - } - // Try with op location. - Location loc = op->getLoc(); - if (auto nameLoc = loc.dyn_cast()) { - LLVM_DEBUG(llvm::dbgs() << "op has node name\n"); - return nameLoc.getName().str(); - } - if (auto fusedLoc = loc.dyn_cast()) { - // Combine each location name and set it as nodeName. - LLVM_DEBUG(llvm::dbgs() << "fuse loc has name\n"); - std::string name; - for (Location locIt : fusedLoc.getLocations()) { - if (auto nameLocIt = locIt.dyn_cast()) - name += nameLocIt.getName().str() + "-"; - else if (auto fileLineColLoc = locIt.dyn_cast()) { - std::string filename = - llvm::sys::path::filename(fileLineColLoc.getFilename().str()).str(); - name += filename + ":" + std::to_string(fileLineColLoc.getLine()) + "-"; - } - } - if (name.empty()) - name = "NOTSET"; - else - name.pop_back(); // remove last "-" - return name; - } - if (auto fileLineColLoc = loc.dyn_cast()) { - LLVM_DEBUG(llvm::dbgs() << "file line col has name\n"); - std::string filename = - llvm::sys::path::filename(fileLineColLoc.getFilename().str()).str(); - std::string name = - filename + ":" + std::to_string(fileLineColLoc.getLine()); - return name; - } - return "NOTSET"; -} - void impl::onnxToKrnlParallelReport(Operation *op, bool successful, int64_t loopLevel, int64_t parallelLoopTripCount, const std::string &comment) { assert(OnnxToKrnlLoweringConfiguration::reportOnParallel && "must report"); assert(comment.find(',') == std::string::npos && "no comma in comments"); StringAttr opName = op->getName().getIdentifier(); - std::string nodeNameStr = getNodeNameLikeInKrnlInstrument(op); + std::string nodeNameStr = getNodeNameInPresenceOfOpt(op); // Print report on this op. printf("==PAR-REPORT==, %s%s, %s, %s, %lld, %lld\n", opName.data(), (successful ? "-parallel" : ""), nodeNameStr.c_str(), comment.c_str(), @@ -736,7 +682,7 @@ void impl::onnxToKrnlSimdReport(Operation *op, bool successful, assert(OnnxToKrnlLoweringConfiguration::reportOnSimd && "must report"); assert(comment.find(',') == std::string::npos && "no comma in comments"); StringAttr opName = op->getName().getIdentifier(); - std::string nodeNameStr = getNodeNameLikeInKrnlInstrument(op); + std::string nodeNameStr = getNodeNameInPresenceOfOpt(op); // Handling message. std::string message = OnnxToKrnlLoweringConfiguration::defaultSimdComment; if (message.empty()) diff --git a/src/Conversion/ONNXToKrnl/Tensor/PrintSignature.cpp b/src/Conversion/ONNXToKrnl/Tensor/PrintSignature.cpp index c7c813b47f..0ad00c12df 100644 --- a/src/Conversion/ONNXToKrnl/Tensor/PrintSignature.cpp +++ b/src/Conversion/ONNXToKrnl/Tensor/PrintSignature.cpp @@ -33,15 +33,30 @@ struct ONNXPrintSignatureLowering Location loc = ONNXLoc(op); MultiDialectBuilder create(rewriter, loc); + // First message. std::string opName(printSignatureOp.getOpName().data()); - create.krnl.printf(opName); - std::string msg = "%t "; + std::string msg = + "%i==SIG-REPORT==, " + opName + ", sig"; // meaningless secondary key. + // Discover the values to print, setting aside the last one. + llvm::SmallVector printVal; for (Value oper : adaptor.getInput()) if (!oper.getType().isa()) - create.krnl.printTensor(msg, oper); - Value noneValue; - rewriter.replaceOpWithNewOp(op, "\n", noneValue); - // For debug, no need to report on SIMD. + printVal.emplace_back(oper); + int64_t printNum = printVal.size(); + Value lastVal = printVal.pop_back_val(); + // Print all but the last one. + for (Value oper : printVal) { + create.krnl.printTensor(msg + ", %t%e", oper); + msg = "%i"; + } + // Print the last one with replace with new op. + if (printNum > 0) { + rewriter.replaceOpWithNewOp( + op, msg + ", %t\n%e", lastVal); + } else { + Value noneVal = nullptr; + rewriter.replaceOpWithNewOp(op, msg + "\n%e", noneVal); + } return success(); } }; diff --git a/src/Dialect/ONNX/ONNXOps/OpHelper.cpp b/src/Dialect/ONNX/ONNXOps/OpHelper.cpp index 0ccb098f5b..809a238b48 100644 --- a/src/Dialect/ONNX/ONNXOps/OpHelper.cpp +++ b/src/Dialect/ONNX/ONNXOps/OpHelper.cpp @@ -14,6 +14,7 @@ #include "mlir/IR/TypeUtilities.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/Path.h" #include "src/Dialect/Mlir/IndexExpr.hpp" #include "src/Dialect/ONNX/ONNXLayoutHelper.hpp" @@ -676,4 +677,57 @@ int64_t mlirTypeToOnnxType(Type elemType) { return onnxType; } +//===----------------------------------------------------------------------===// +// Support for location. +//===----------------------------------------------------------------------===// + +// We may try to relate the node names generated by the instrumentation +// with the node names printed by opt-report. Thus it is key to keep the +// code that generates these node name in sync. +// +// The code are found here: +// 1) `matchAndRewrite` from `src/Conversion/KrnlToLLVM/KrnlInstrument.cpp` +// 2) `getNodeNameInPresenceOfOpt` from +// `src/Dialect/ONNX/ONNXOps/OpHelper.cpp` + +std::string getNodeNameInPresenceOfOpt(Operation *op) { + StringAttr nodeName; + // Try with op onnx_node_name attribute. + nodeName = op->getAttrOfType("onnx_node_name"); + if (nodeName) { + return nodeName.str(); + } + // Try with op location. + Location loc = op->getLoc(); + if (auto nameLoc = loc.dyn_cast()) { + return nameLoc.getName().str(); + } + if (auto fusedLoc = loc.dyn_cast()) { + // Combine each location name and set it as nodeName. + std::string name; + for (Location locIt : fusedLoc.getLocations()) { + if (auto nameLocIt = locIt.dyn_cast()) + name += nameLocIt.getName().str() + "-"; + else if (auto fileLineColLoc = locIt.dyn_cast()) { + std::string filename = + llvm::sys::path::filename(fileLineColLoc.getFilename().str()).str(); + name += filename + ":" + std::to_string(fileLineColLoc.getLine()) + "-"; + } + } + if (name.empty()) + name = "NOTSET"; + else + name.pop_back(); // remove last "-" + return name; + } + if (auto fileLineColLoc = loc.dyn_cast()) { + std::string filename = + llvm::sys::path::filename(fileLineColLoc.getFilename().str()).str(); + std::string name = + filename + ":" + std::to_string(fileLineColLoc.getLine()); + return name; + } + return "NOTSET"; +} + } // namespace onnx_mlir diff --git a/src/Dialect/ONNX/ONNXOps/OpHelper.hpp b/src/Dialect/ONNX/ONNXOps/OpHelper.hpp index 06a3479981..2b8aa7d360 100644 --- a/src/Dialect/ONNX/ONNXOps/OpHelper.hpp +++ b/src/Dialect/ONNX/ONNXOps/OpHelper.hpp @@ -261,4 +261,10 @@ bool areDimsFromConcat(mlir::Value val); /// Get all dimensions that are stored by the value. void getDims(mlir::Value val, llvm::SmallVectorImpl &dims); +//===----------------------------------------------------------------------===// +// Support for location. +//===----------------------------------------------------------------------===// + +std::string getNodeNameInPresenceOfOpt(mlir::Operation *op); + } // namespace onnx_mlir diff --git a/src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp b/src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp index b642f2602b..beb50392ef 100644 --- a/src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp +++ b/src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp @@ -895,4 +895,5 @@ void SaveOnnxAttrInOp(mlir::Operation *op, OP_TYPE specificOp = llvm::cast(op); setAttr(specificOp, newAttr); } + } // namespace onnx_mlir diff --git a/src/Runtime/OMInstrument.inc b/src/Runtime/OMInstrument.inc index d61cebd2bc..92cae0c656 100644 --- a/src/Runtime/OMInstrument.inc +++ b/src/Runtime/OMInstrument.inc @@ -80,27 +80,27 @@ static FILE *fout = 0; #endif #ifdef _WIN32 -void TimeInit() { +static void TimeInit() { QueryPerformanceFrequency(&perfFrequency); QueryPerformanceCounter(&globalTime); initTime = globalTime; } #else -void TimeInit() { +static void TimeInit() { gettimeofday(&globalTimeVal, NULL); initTimeVal = globalTimeVal; } #endif #ifdef _WIN32 -inline void WinTimerSub(LARGE_INTEGER newTime, LARGE_INTEGER prevTime, +static inline void WinTimerSub(LARGE_INTEGER newTime, LARGE_INTEGER prevTime, LONGLONG *resultSeconds, LONGLONG *resultMicroseconds) { LONGLONG elapsed = newTime.QuadPart - prevTime.QuadPart; *resultSeconds = elapsed / perfFrequency.QuadPart; *resultMicroseconds = ((elapsed * 1000000) / perfFrequency.QuadPart) % 1000000; } -void ReportTime() { +static void ReportTime() { LARGE_INTEGER newTime; LONGLONG resultSeconds, resultMicroseconds; QueryPerformanceCounter(&newTime); @@ -111,7 +111,7 @@ void ReportTime() { globalTime = newTime; } #else -void ReportTime() { +static void ReportTime() { struct timeval newTimeValue, result; gettimeofday(&newTimeValue, NULL); timersub(&newTimeValue, &globalTimeVal, &result); @@ -125,7 +125,7 @@ void ReportTime() { #endif #ifdef _WIN32 -void ReportMemory() { +static void ReportMemory() { PROCESS_MEMORY_COUNTERS_EX pmc; GetProcessMemoryInfo( GetCurrentProcess(), (PROCESS_MEMORY_COUNTERS *)&pmc, sizeof(pmc)); @@ -133,7 +133,7 @@ void ReportMemory() { fprintf(fout, "%zu\n", vMemSizeKB); } #else -void ReportMemory() { +static void ReportMemory() { char memCommand[200]; char memOutput[200]; FILE *memPipe; @@ -159,6 +159,21 @@ void ReportMemory() { } #endif +FILE *getInstrumentFile() { + if (fout) + return fout; + fout = stdout; + if (getenv("ONNX_MLIR_INSTRUMENT_FILE")) { + char *fileName = getenv("ONNX_MLIR_INSTRUMENT_FILE"); + FILE *newFileHandle = fopen(fileName, "w"); + if (newFileHandle) { + fout = newFileHandle; + } + } + assert(fout); + return fout; +} + void OMInstrumentInit() { // Read environment variables. if (getenv("ONNX_MLIR_NO_INSTRUMENT_TIME")) { @@ -172,14 +187,7 @@ void OMInstrumentInit() { instrumentReportDisabled = true; } // Handle redirection to file if requested. - fout = stdout; - if (getenv("ONNX_MLIR_INSTRUMENT_FILE")) { - char *fileName = getenv("ONNX_MLIR_INSTRUMENT_FILE"); - FILE *newFileHandle = fopen(fileName, "w"); - if (newFileHandle) { - fout = newFileHandle; - } - } + getInstrumentFile(); // Init as appropriate. if (!instrumentReportDisabled) { diff --git a/src/Runtime/OMTensor.inc b/src/Runtime/OMTensor.inc index c5b15bca69..19eb594b60 100644 --- a/src/Runtime/OMTensor.inc +++ b/src/Runtime/OMTensor.inc @@ -34,9 +34,9 @@ #include #endif +#include #include #include -#include #include "onnx-mlir/Runtime/OMTensor.h" @@ -49,7 +49,7 @@ #if defined(__APPLE__) // __errno_location() is called from krnl::emitErrNo() to set errno in // generated llvm IR, but it somehow doesn't come out of the box on MacOS. -int * __attribute__((__weak__)) __errno_location(void) { return &errno; } +int *__attribute__((__weak__)) __errno_location(void) { return &errno; } #endif // On some platforms LLVM generates f16 conversion code that calls some @@ -141,62 +141,6 @@ static inline int64_t getNumElems(const int64_t *shape, int64_t rank) { return numElem; } -/* Helper function to get the ONNX data type C name string */ -static inline const char *getDataTypeName(OM_DATA_TYPE dataType) { - return OM_DATA_TYPE_NAME[dataType]; -} - -/* Helper function to print an element of a tensor */ -static inline void printElement( - void *dataPtr, int64_t elemOffset, OM_DATA_TYPE dataType) { - assert(dataPtr && "Expecting a valid data pointer"); - assert(elemOffset >= 0 && "Expecting a valid element offset"); - - switch (dataType) { - case ONNX_TYPE_BOOL: - printf("%d", ((bool *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_UINT8: - printf("%hhu", ((uint8_t *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_INT8: - printf("%hhd", ((int8_t *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_UINT16: - printf("%hu", ((uint16_t *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_INT16: - printf("%hd", ((int16_t *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_UINT32: - printf("%u", ((uint32_t *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_INT32: - printf("%d", ((int32_t *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_UINT64: - printf("%llu", (unsigned long long)((uint64_t *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_INT64: - printf("%lld", (long long)((int64_t *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_FLOAT16: - printf("%f", om_f16_to_f32(((uint16_t *)dataPtr)[elemOffset])); - break; - case ONNX_TYPE_FLOAT: - printf("%f", ((float *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_DOUBLE: - printf("%f", ((double *)dataPtr)[elemOffset]); - break; - case ONNX_TYPE_STRING: - printf("%s", ((const char **)dataPtr)[elemOffset]); - break; - default: - assert(false && "unexpected data type"); - } -} - /* Helper function to compute the inner product of 2 arrays (of equal size) */ static inline int64_t inner_product( const int64_t v[], const int64_t u[], int64_t len, int64_t init) { @@ -345,7 +289,9 @@ void omTensorSetDataPtr( } /* OMTensor data shape getter */ -const int64_t *omTensorGetShape(const OMTensor *tensor) { return tensor->_shape; } +const int64_t *omTensorGetShape(const OMTensor *tensor) { + return tensor->_shape; +} /* OMTensor data shape setter */ void omTensorSetShape(OMTensor *tensor, const int64_t *shape) { @@ -354,7 +300,9 @@ void omTensorSetShape(OMTensor *tensor, const int64_t *shape) { } /* OMTensor data strides getter */ -const int64_t *omTensorGetStrides(const OMTensor *tensor) { return tensor->_strides; } +const int64_t *omTensorGetStrides(const OMTensor *tensor) { + return tensor->_strides; +} /* OMTensor data strides setter */ void omTensorSetStrides(OMTensor *tensor, const int64_t *strides) { @@ -432,131 +380,232 @@ void *omTensorGetAllocatedPtr(const OMTensor *tensor) { return tensor->_allocatedPtr; } -void omTensorPrint(const char *msg, const OMTensor *tensor) { - assert(tensor && "attempt to print a null OMTensor"); +//===----------------------------------------------------------------------===// +/* omTensorPrint helper */ + +/* Helper function to get the ONNX data type C name string */ +static inline const char *getDataTypeName(OM_DATA_TYPE dataType) { + return OM_DATA_TYPE_NAME[dataType]; +} +/* Helper function to print type as 8x16x1xfloat */ +static void printType(FILE *fout, const OMTensor *tensor) { + const OM_DATA_TYPE dataType = omTensorGetDataType(tensor); + const int64_t rank = omTensorGetRank(tensor); + const int64_t *shape = omTensorGetShape(tensor); + for (int64_t i = 0; i < rank; i++) + fprintf(fout, "%lldx", (long long)shape[i]); + fprintf(fout, "%s", getDataTypeName(dataType)); +} + +/* Helper function to print a detailed signature of a vector */ +static void printSignature(FILE *fout, const OMTensor *tensor) { const OM_DATA_TYPE dataType = omTensorGetDataType(tensor); const int64_t rank = omTensorGetRank(tensor); const int64_t *shape = omTensorGetShape(tensor); const int64_t *strides = omTensorGetStrides(tensor); - void *dataPtr = omTensorGetDataPtr(tensor); - // Process format if any (default is signature and data). - bool signatureInfo = false; - bool typeInfo = false; - bool dataInfo = false; - bool hadFormat = false; - while (strlen(msg) >= 2 && msg[0] == '%') { - if (msg[1] == 's') // S for signature. - hadFormat = signatureInfo = true; - else if (msg[1] == 't') // T for type only - hadFormat = typeInfo = true; - else if (msg[1] == 'd') - hadFormat = dataInfo = true; - else - printf("warning: unknown format `%c`\n", msg[1]); - // Skip the format when printing message. - msg += 2; - } - if (!hadFormat) - signatureInfo = dataInfo = true; - - // Print message. - if (msg) - printf("%s", msg); - - if (signatureInfo) { - // Print detailed info. - printf("\trank = %lld\n", (long long)rank); - printf("\tdataType = %s\n", getDataTypeName(dataType)); - printf("\tnumElems = %lld\n", (long long)omTensorGetNumElems(tensor)); - printf("\tshape: "); - for (int64_t i = 0; i < rank; i++) - printf("[%lld]", (long long)shape[i]); - printf("\n"); - printf("\tstrides: "); - for (int64_t i = 0; i < rank; i++) - printf("[%lld]", (long long)strides[i]); - printf("\n"); - } - if (typeInfo) { - // Print compact type like in MLIR, e.g. 32x16xf32. - if (signatureInfo) - printf("\ttype: "); - for (int64_t i = 0; i < rank; i++) - printf("%lldx", (long long)shape[i]); - printf("%s", getDataTypeName(dataType)); - if (signatureInfo) - printf("\n"); + fprintf(fout, "\trank = %lld\n", (long long)rank); + fprintf(fout, "\tdataType = %s\n", getDataTypeName(dataType)); + fprintf(fout, "\tnumElems = %lld\n", (long long)omTensorGetNumElems(tensor)); + fprintf(fout, "\tshape: "); + for (int64_t i = 0; i < rank; i++) + fprintf(fout, "[%lld]", (long long)shape[i]); + fprintf(fout, "\n"); + fprintf(fout, "\tstrides: "); + for (int64_t i = 0; i < rank; i++) + fprintf(fout, "[%lld]", (long long)strides[i]); + fprintf(fout, "\n"); + + fprintf(fout, "\ttype: "); + printType(fout, tensor); + fprintf(fout, "\n"); +} + +/* Helper function to print an element of a tensor */ +static void printElement( + FILE *fout, void *dataPtr, int64_t elemOffset, OM_DATA_TYPE dataType) { + assert(dataPtr && "Expecting a valid data pointer"); + assert(elemOffset >= 0 && "Expecting a valid element offset"); + + switch (dataType) { + case ONNX_TYPE_BOOL: + fprintf(fout, "%d", ((bool *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_UINT8: + fprintf(fout, "%hhu", ((uint8_t *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_INT8: + fprintf(fout, "%hhd", ((int8_t *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_UINT16: + fprintf(fout, "%hu", ((uint16_t *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_INT16: + fprintf(fout, "%hd", ((int16_t *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_UINT32: + fprintf(fout, "%u", ((uint32_t *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_INT32: + fprintf(fout, "%d", ((int32_t *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_UINT64: + fprintf( + fout, "%llu", (unsigned long long)((uint64_t *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_INT64: + fprintf(fout, "%lld", (long long)((int64_t *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_FLOAT16: + fprintf(fout, "%f", om_f16_to_f32(((uint16_t *)dataPtr)[elemOffset])); + break; + case ONNX_TYPE_FLOAT: + fprintf(fout, "%f", ((float *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_DOUBLE: + fprintf(fout, "%f", ((double *)dataPtr)[elemOffset]); + break; + case ONNX_TYPE_STRING: + fprintf(fout, "%s", ((const char **)dataPtr)[elemOffset]); + break; + default: + assert(false && "unexpected data type"); } +} + +/* Helper function to print the data of a tensor type */ +static void printData(FILE *fout, const OMTensor *tensor) { +/* Helper macros to print data for 1-4D tensors */ #define LOOP_1(INDEX, IV, UB) \ - printf("["); \ + fprintf(fout, "["); \ for (int64_t IV = 0; IV < UB; ++IV) { \ if (IV) \ - printf(", "); \ + fprintf(fout, ", "); \ indexes[INDEX] = IV; \ int64_t elemOffset = computeElemOffset(tensor->_strides, indexes, rank); \ - printElement(dataPtr, elemOffset, dataType); \ + printElement(fout, dataPtr, elemOffset, dataType); \ } \ - printf("]"); + fprintf(fout, "]"); #define LOOP_2(INDEX, IV, UB, ...) \ - printf("["); \ + fprintf(fout, "["); \ for (int64_t IV = 0; IV < UB; ++IV) { \ if (IV) \ - printf(", "); \ + fprintf(fout, ", "); \ indexes[INDEX] = IV; \ LOOP_1(INDEX + 1, __VA_ARGS__) \ } \ - printf("]"); + fprintf(fout, "]"); #define LOOP_3(INDEX, IV, UB, ...) \ - printf("["); \ + fprintf(fout, "["); \ for (int64_t IV = 0; IV < UB; ++IV) { \ if (IV) \ - printf(", "); \ + fprintf(fout, ", "); \ indexes[INDEX] = IV; \ LOOP_2(INDEX + 1, __VA_ARGS__) \ } \ - printf("]"); + fprintf(fout, "]"); #define LOOP_4(INDEX, IV, UB, ...) \ - printf("["); \ + fprintf(fout, "["); \ for (int64_t IV = 0; IV < UB; ++IV) { \ if (IV) \ - printf(", "); \ + fprintf(fout, ", "); \ indexes[INDEX] = IV; \ LOOP_3(INDEX + 1, __VA_ARGS__) \ } \ - printf("]"); - - if (dataInfo) { - printf("\tdata: ("); - switch (rank) { - case 0: - printElement(dataPtr, 0, dataType); - break; - case 1: { - int64_t indexes[1]; - LOOP_1(0, i, shape[0]) - } break; - case 2: { - int64_t indexes[2]; - LOOP_2(0, i, shape[0], j, shape[1]) - } break; - case 3: { - int64_t indexes[3]; - LOOP_3(0, i, shape[0], j, shape[1], k, shape[2]) - } break; - case 4: { - int64_t indexes[4]; - LOOP_4(0, i, shape[0], j, shape[1], k, shape[2], l, shape[3]) - } break; - default: - assert(false && "not implemented"); + fprintf(fout, "]"); + + const OM_DATA_TYPE dataType = omTensorGetDataType(tensor); + const int64_t rank = omTensorGetRank(tensor); + const int64_t *shape = omTensorGetShape(tensor); + void *dataPtr = omTensorGetDataPtr(tensor); + + fprintf(fout, "\tdata: ("); + switch (rank) { + case 0: + printElement(fout, dataPtr, 0, dataType); + break; + case 1: { + int64_t indexes[1]; + LOOP_1(0, i, shape[0]) + } break; + case 2: { + int64_t indexes[2]; + LOOP_2(0, i, shape[0], j, shape[1]) + } break; + case 3: { + int64_t indexes[3]; + LOOP_3(0, i, shape[0], j, shape[1], k, shape[2]) + } break; + case 4: { + int64_t indexes[4]; + LOOP_4(0, i, shape[0], j, shape[1], k, shape[2], l, shape[3]) + } break; + default: + assert(false && "not implemented"); + } + fprintf(fout, ")\n"); +} + +/* Defined in OMInstrument.inc, return stdout or the FILE handle to where + * instrumentation should be printed. */ +FILE *getInstrumentFile(); + +/* Print a string and possibly some info about a tensor. With `%d`, we print the +tensor data; with `s`, we print an extended signature; with `t`, we print the +type of the tensor (e.g. 32 x 16 x float) + +There are two other unpublished modifiers if the message starts with `%i`, this +signal an instrumentation message that may be redirected to the file name given +by ONNX_MLIR_INSTRUMENT_FILE. Additionally, there is a `%e` modifier that +signals the end of the message string. This is unfortunately needed because of +current issues with the termination of strings. +*/ +void omTensorPrint(const char *msg, const OMTensor *tensor) { + /* Process the message string. */ + int64_t len = strlen(msg); + FILE *fout = stdout; + /* Check if this is an instrumentation print, in which case we use the + * instrument file instead of stdout. */ + if (len >= 2 && msg[0] == '%' && msg[1] == 'i') { + fout = getInstrumentFile(); + /* Skip format. */ + msg += 2; + len -= 2; + } + while (len > 0) { + if (msg[0] == '%') { + if (len < 2) { + fprintf(fout, "omTensorPrint: orphan `%%`, skip.\n"); + return; + } + if (msg[1] == 'd') { /* Letter `d` for data. */ + assert(tensor && "attempt to print a null OMTensor"); + printData(fout, tensor); + } else if (msg[1] == 's') { /* Letter `s` for signature. */ + assert(tensor && "attempt to print a null OMTensor"); + printSignature(fout, tensor); + } else if (msg[1] == 't') { /* Letter `t` for type only. */ + assert(tensor && "attempt to print a null OMTensor"); + printType(fout, tensor); + } else if (msg[1] == 'e') { /* Letter `e` for end. */ + return; + } else { + fprintf(fout, "omTensorPrint: unknown format `%c`, skip.\n", msg[1]); + } + /* Skip the format when printing message. */ + msg += 2; + len -= 2; + continue; } - printf(")\n"); + /* No format, just print letter. */ + fprintf(fout, "%c", msg[0]); + msg++; + len--; } } diff --git a/src/Transform/ONNX/InstrumentONNXSignaturePass.cpp b/src/Transform/ONNX/InstrumentONNXSignaturePass.cpp index 82c4f21a7e..94b4bc8449 100644 --- a/src/Transform/ONNX/InstrumentONNXSignaturePass.cpp +++ b/src/Transform/ONNX/InstrumentONNXSignaturePass.cpp @@ -28,6 +28,7 @@ #include "src/Dialect/Krnl/DialectBuilder.hpp" #include "src/Dialect/Krnl/KrnlOps.hpp" #include "src/Dialect/ONNX/ONNXOps.hpp" +#include "src/Dialect/ONNX/ONNXOps/OpHelper.hpp" #include "src/Interface/ShapeInferenceOpInterface.hpp" #include "src/Pass/Passes.hpp" @@ -70,20 +71,18 @@ class InstrumentONNXSignaturePass if (!isa(op)) { Location loc = op->getLoc(); OpBuilder builder(op); - ValueRange operands = op->getOperands(); - std::string opName( - op->getName().getStringRef().data() + 5); // Skip "onnx.". - StringAttr opNameAttr = builder.getStringAttr(opName); - if (isa(op)) { - // Constant has the type in the output, so use it. - operands = op->getResults(); - // Since we use the result of the constant operation, we must insert - // the print operation after the constant operation. - builder.setInsertionPointAfter(op); - builder.create(loc, opNameAttr, operands); - } else if (operands.size() > 0) { - builder.create(loc, opNameAttr, operands); - } + std::string opName = op->getName().getStringRef().str(); + std::string nodeName = onnx_mlir::getNodeNameInPresenceOfOpt(op); + std::string fullName = opName + ", " + nodeName; + StringAttr fullNameAttr = builder.getStringAttr(fullName); + // Enqueue all input operands, and then the results. + llvm::SmallVector operAndRes(op->getOperands()); + for (Value res : op->getResults()) + operAndRes.emplace_back(res); + // Since we may use the result of an operation, we must insert the + // print operation after the operation. + builder.setInsertionPointAfter(op); + builder.create(loc, fullNameAttr, operAndRes); } } }); diff --git a/utils/make-report.py b/utils/make-report.py index e5908a61a1..cf12ae86a7 100755 --- a/utils/make-report.py +++ b/utils/make-report.py @@ -6,14 +6,26 @@ # # Copyright 2023 The IBM Research Authors. # -################################################################################ +############################################################################# # -# This file scan -opt-report=* and process it +# This file scan --opt-report=*, --profile-ir, --instrument-onnx-signature +# and process it. # -# For patterns, see src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp, +# For patterns, see src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.cpp, # impl::onnxToKrnlParallelReport(...) and impl::onnxToKrnlSimdReport(...) # -################################################################################ +# Format for stats: +# '==''==,' ',' ',' +# {',' } +# +# where may have a '-simd' or '-par' suffix to indicate +# the success of that pass. +# +# Format of perf: +# '==PERF==,' ',' ',' ',' +# ',' +# +############################################################################# import sys import getopt @@ -23,8 +35,8 @@ def print_usage(msg = ""): if msg: print("Error:", msg, "\n") - print("make-report.py -[svh] [-c ] [-r ] [-l ]") - print(" [--sort ] [-u ] [-p ]") + print("make-report.py -[vh] [-c ] [-r ] [-l ]") + print(" [-s ] [--sort ] [--supported] [-u ] [-p ]") print("") print("Usage: Report statistics on compiler and runtime characteristics of onnx ops.") print("") @@ -43,20 +55,29 @@ def print_usage(msg = ""): print(" Use `-l 3` to correlate the node name printed here with compiler output.") print("") print("Parameters:") - print(" -c/--compile : File name containing the compile time statistics.") - print(" -r/--runtime : File name containing the runtime statistics.") - print(" -l/--level : Print statistics:") - print(" 0: Just count successful/unsuccessful ops.") + print(" -c/--compile : File name containing the compile-time statistics") + print(" or runtime signature statistics.") + print(" -r/--runtime : File name containing the runtime time statistics.") + print(" -a/--stats : Print specific statistics:") + print(" simd: Print simd optimization stats.") + print(" Default if a compile time file is given.") + print(" par: Print parallel optimization stats.") + print(" sig: Print signatures of op.") + print(" perf: Print runtime execution time of ops.") + print(" Default if no compile time file is given.") + print(" -l/--level : Print detailed level of statistics:") + print(" 0: Just count successful/unsuccessful ops (default).") print(" 1: Also count reasons for success/failure.") print(" 2: Also list metrics.") print(" 3: Also list node name.") print(" -f/--focus : Focus only on ops that match the regexp pattern.") - print(" -s/supported: Focus only on ops that are supported. Namely, the report") + print(" -supported: Focus only on ops that are supported. Namely, the report") print(" will skip ops for which compile-time statistics list") print(" the 'unsupported' keyword in its printout.") print(" For SIMD/parallel statistics, this include all ops that") print(" have currently no support for it.") - print(" -u/--unit : Time in second ('s'), millisecond ('ms') or microsecond ('us).") + print(" -u/--unit : Time in second ('s', default), millisecond ('ms') or") + print(" microsecond ('us).") print(" --sort : Sort output by op 'name', occurrence 'num' or `time`.") print(" -v/--verbose: Run in verbose mode (see error and warnings).") print(" -h/--help: Print usage.") @@ -92,10 +113,11 @@ def common_report_str(stat_name): return r'^==' + stat_name + r'-REPORT==,\s*([0-9a-zA-Z\.\-]+)\s*,\s*([^,]*),\s*(.*)' # ==SIMD-REPORT==, ..., , , -simd_stat_message = "message, SIMD vector length (in elements), SIMD loop trip count (-1 is runtime)" +simd_legend = "message, SIMD vector length (in elements), SIMD loop trip count (-1 is runtime)" +sig_legend = "comma separated list of shapes with inputs followed by results" # ==PERF-REPORT==, ..., "before" | "after", time since last call, absolute time -perf_stat_message = "(after|before), time for op(s), time since start(s)" +perf_legend = "(after|before), time for op(s), time since start(s)" ################################################################################ # # Support. @@ -307,6 +329,14 @@ def get_percent(n, d): return 0.0 return n * 100 / d +def get_sorting_key(count, name, time): + global sorting_preference + if sorting_preference == "num": + key = - count + if sorting_preference == "name": + return name + return - time + def make_report(stat_message): global op_count_dict, op_detail_count_dict global op_time_dict, op_detail_time_dict, tot_time @@ -326,33 +356,40 @@ def make_report(stat_message): count_time_str += ", {:.7f}".format(time * time_unit) count_time_str += ", {:.1f}%".format(get_percent(time, tot_time)) output = " " + op + ", " + count_time_str - if sorting_preference == "name": - key = op - elif sorting_preference == "num": - key = - count - else: - key = - time if report_level: det_dict = op_detail_count_dict[op] det_time_dict = {} + sorted_det_output = {} if op in op_detail_time_dict: det_time_dict = op_detail_time_dict[op] - for det_key in sorted(det_dict): + for det_key in det_dict: det_count = det_dict[det_key] if det_count == count: count_time_str = "*" else: count_time_str = str(det_count) + det_time = 0 if det_key in det_time_dict: det_time = np.sum(det_time_dict[det_key]) count_time_str += ", {:.7f}".format(det_time * time_unit / det_count) count_time_str += ", {:.7f}".format(det_time * time_unit) count_time_str += ", {:.1f}%".format(get_percent(det_time, time)) - output += "\n " + count_time_str + ": " + det_key - if key in sorted_output: - sorted_output[key] = sorted_output[key] + "\n" + output + + det_output = "\n " + count_time_str + ": " + det_key + det_output_key = get_sorting_key(det_count, det_key, det_time) + if det_output_key in sorted_det_output: + sorted_det_output[det_output_key] += det_output + else: + sorted_det_output[det_output_key] = det_output + for key in sorted(sorted_det_output): + output += sorted_det_output[key] + + # add output to sorted_output + output_key = get_sorting_key(count, op, time) + if output_key in sorted_output: + sorted_output[output_key] += "\n" + output else: - sorted_output[key] = output + sorted_output[output_key] = output # Print legend and stats. num_desc = "num" @@ -409,11 +446,13 @@ def main(argv): compile_file_name = "" runtime_file_name = "" + make_stats = "" + make_legend = "" try: opts, args = getopt.getopt( - argv, "c:f:hl:r:su:v", - ["compile=", "focus=", "help", "level=", "runtime=", "supported", - "sort=", "unit=", "verbose"]) + argv, "c:f:hl:r:s:u:v", + ["compile=", "focus=", "help", "level=", "runtime=", "stats=" + "sort=", "supported", "unit=", "verbose"]) except getopt.GetoptError: print_usage("Failure to parse inputs") for opt, arg in opts: @@ -431,7 +470,22 @@ def main(argv): print_usage("detail levels are 0, 1, 2, or 3") elif opt in ('-r', "--runtime"): runtime_file_name = arg - elif opt in ('-s', "--supported"): + elif opt in ("-s", "--stats"): + if re.match(r'\s*par\s*', arg): + make_stats = "PAR" + make_legend = "none" + elif re.match(r'\s*perf\s*', arg): + make_stats = "PERF" + make_legend = perf_legend + elif re.match(r'\s*sig?\s*', arg): + make_stats = "SIG" + make_legend = sig_legend + elif re.match(r'\s*simd\s*', arg): + make_stats = "SIMD" + make_legend = simd_legend + else: + print_usage("statistics options are 'par', 'signature', or 'simd'") + elif opt in ("--supported"): supported_only = True elif opt in ("--sort"): if re.match(r'\s*name\s*', arg): @@ -454,6 +508,17 @@ def main(argv): elif opt in ('-v', "--verbose"): verbose = True + # Default stats. + if not make_stats: + if compile_file_name: + # Default for compile analysis. + make_stats = "SIMD" + make_legend = simd_legend + else: + # Default for perf only (no compile) + make_stats = "PERF" + make_legend = perf_legend + print("Analyse", make_stats) # Default sorting preference. if not sorting_preference: if runtime_file_name: @@ -462,15 +527,15 @@ def main(argv): sorting_preference = "name" if compile_file_name and runtime_file_name: parse_file_for_perf(runtime_file_name, "PERF") - parse_file_for_stat(compile_file_name, "SIMD") - make_report(simd_stat_message) + parse_file_for_stat(compile_file_name, make_stats) + make_report(make_legend) elif compile_file_name: - parse_file_for_stat(compile_file_name, "SIMD") - make_report(simd_stat_message) + parse_file_for_stat(compile_file_name, make_stats) + make_report(make_legend) elif runtime_file_name: parse_file_for_perf(runtime_file_name, "PERF") parse_file_for_stat(runtime_file_name, "PERF") - make_report(perf_stat_message) + make_report(make_legend) else: print_usage("Command requires an input file name (compile/runtime or both).\n")