From e48e094541875d65a7f1b71b7f39bf07cfb77fcb Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Sun, 1 Sep 2024 16:39:21 +0800 Subject: [PATCH 01/16] pnnx print flops memops count --- tools/pnnx/src/ir.cpp | 27 +++++++++++++++++++++++++++ tools/pnnx/src/ir.h | 3 +++ tools/pnnx/src/main.cpp | 3 +++ 3 files changed, 33 insertions(+) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 8b2b6dfd2d7f..994371954d6d 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include "storezip.h" #include "utils.h" @@ -1441,6 +1442,32 @@ static std::string make_index_expression(const Operator* op) return index_expr; } +int Graph::calculate_flops() +{ + int flops = 0; + for(auto op:ops) { + if(expand_expression(op) == "*") + { + int m = op->inputs[0]->shape[0]; + int k = op->inputs[0]->shape[1]; + int n = op->inputs[1]->shape[1]; + flops += 2 * m * k * n; + } + else if(expand_expression(op) == "+") { + int m = op->inputs[0]->shape[0]; + int n = op->inputs[0]->shape[1]; + flops += m * n; + } + } + return flops; +} + +int Graph::calculate_memops() +{ + int mem = sizeof(Operator) * ops.size() + sizeof(Operand) * operands.size(); + return mem; +} + int Graph::python(const std::string& pypath, const std::string& pnnxbinpath) { FILE* pyfp = fopen(pypath.c_str(), "wb"); diff --git a/tools/pnnx/src/ir.h b/tools/pnnx/src/ir.h index 779c2eec9f10..91e0e2a69fe3 100644 --- a/tools/pnnx/src/ir.h +++ b/tools/pnnx/src/ir.h @@ -346,6 +346,9 @@ class Graph std::vector ops; std::vector operands; + int calculate_flops(); + int calculate_memops(); + private: Graph(const Graph& rhs); Graph& operator=(const Graph& rhs); diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp index c25128032d9e..dda54b1932dd 100644 --- a/tools/pnnx/src/main.cpp +++ b/tools/pnnx/src/main.cpp @@ -361,6 +361,9 @@ int main(int argc, char** argv) pnnx_graph.save(pnnxparampath, pnnxbinpath); + fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops()); + fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops()); + pnnx_graph.python(pnnxpypath, pnnxbinpath); #if BUILD_PNNX2ONNX From db91abd606ff00d7ebf9bdb2d3349720eb07fd21 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Sun, 1 Sep 2024 16:57:58 +0800 Subject: [PATCH 02/16] pnnx print flops memops count --- tools/pnnx/src/ir.cpp | 28 ++++++++++++++++++++++------ tools/pnnx/src/ir.h | 4 ++-- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 994371954d6d..7c5923acac1e 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -1442,9 +1442,9 @@ static std::string make_index_expression(const Operator* op) return index_expr; } -int Graph::calculate_flops() +int Graph::calculate_flops_M() { - int flops = 0; + long long flops = 0; for(auto op:ops) { if(expand_expression(op) == "*") { @@ -1459,13 +1459,29 @@ int Graph::calculate_flops() flops += m * n; } } - return flops; + return int(flops / 1e6); } -int Graph::calculate_memops() +int Graph::calculate_memops_M() { - int mem = sizeof(Operator) * ops.size() + sizeof(Operand) * operands.size(); - return mem; + long long mem = 0; + for(auto op : ops) + { + if(expand_expression(op) == "*") + { + int m = op->inputs[0]->shape[0]; + int k = op->inputs[0]->shape[1]; + int n = op->inputs[1]->shape[1]; + mem += m * k + k * n + m * n; + } + else if(expand_expression(op) == "+") + { + int m = op->inputs[0]->shape[0]; + int n = op->inputs[0]->shape[1]; + mem += 3 * m * n; + } + } + return int(mem / 1e6); } int Graph::python(const std::string& pypath, const std::string& pnnxbinpath) diff --git a/tools/pnnx/src/ir.h b/tools/pnnx/src/ir.h index 91e0e2a69fe3..bc1f0089591d 100644 --- a/tools/pnnx/src/ir.h +++ b/tools/pnnx/src/ir.h @@ -346,8 +346,8 @@ class Graph std::vector ops; std::vector operands; - int calculate_flops(); - int calculate_memops(); + int calculate_flops_M(); + int calculate_memops_M(); private: Graph(const Graph& rhs); From 4af97a8c0cb75e0ed171e264b176d77f099850dc Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Mon, 2 Sep 2024 10:49:01 +0800 Subject: [PATCH 03/16] pnnx print flops memops count --- tools/pnnx/src/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp index dda54b1932dd..32e628be8d6f 100644 --- a/tools/pnnx/src/main.cpp +++ b/tools/pnnx/src/main.cpp @@ -361,8 +361,8 @@ int main(int argc, char** argv) pnnx_graph.save(pnnxparampath, pnnxbinpath); - fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops()); - fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops()); + fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops_M()); + fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops_M()); pnnx_graph.python(pnnxpypath, pnnxbinpath); From a4fd3191d66a8d96e679eb7ae8fdd8b8a6e80d49 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Mon, 2 Sep 2024 17:47:02 +0800 Subject: [PATCH 04/16] pnnx print flops memops count --- tools/pnnx/src/main.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp index 32e628be8d6f..5ef47b2409ac 100644 --- a/tools/pnnx/src/main.cpp +++ b/tools/pnnx/src/main.cpp @@ -313,6 +313,8 @@ int main(int argc, char** argv) std::string foldable_constants_zippath = ptbase + ".foldable_constants.zip"; pnnx::Graph pnnx_graph; + fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops_M()); + fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops_M()); #if BUILD_ONNX2PNNX if (!model_file_maybe_torchscript(ptpath)) { @@ -361,9 +363,6 @@ int main(int argc, char** argv) pnnx_graph.save(pnnxparampath, pnnxbinpath); - fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops_M()); - fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops_M()); - pnnx_graph.python(pnnxpypath, pnnxbinpath); #if BUILD_PNNX2ONNX From 54659e042f843547f18b4b908da300ae2af89f8d Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Mon, 2 Sep 2024 17:53:29 +0800 Subject: [PATCH 05/16] pnnx print flops memops count --- tools/pnnx/src/main.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp index 5ef47b2409ac..23fdc0102224 100644 --- a/tools/pnnx/src/main.cpp +++ b/tools/pnnx/src/main.cpp @@ -313,8 +313,6 @@ int main(int argc, char** argv) std::string foldable_constants_zippath = ptbase + ".foldable_constants.zip"; pnnx::Graph pnnx_graph; - fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops_M()); - fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops_M()); #if BUILD_ONNX2PNNX if (!model_file_maybe_torchscript(ptpath)) { @@ -384,6 +382,7 @@ int main(int argc, char** argv) // pnnx_graph2.load("pnnx.param", "pnnx.bin"); // pnnx_graph2.save("pnnx2.param", "pnnx2.bin"); - + fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops_M()); + fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops_M()); return 0; } From 2c80f272cbbe9e2e0ae09321bb1af7eac19e01b0 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Tue, 3 Sep 2024 08:38:27 +0800 Subject: [PATCH 06/16] pnnx print flops memops count --- tools/pnnx/src/ir.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 7c5923acac1e..6dd429ebbfb4 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -1446,14 +1446,14 @@ int Graph::calculate_flops_M() { long long flops = 0; for(auto op:ops) { - if(expand_expression(op) == "*") + if(op->type == "aten::matmul") { int m = op->inputs[0]->shape[0]; int k = op->inputs[0]->shape[1]; int n = op->inputs[1]->shape[1]; flops += 2 * m * k * n; } - else if(expand_expression(op) == "+") { + else if(op->type == "aten::add") { int m = op->inputs[0]->shape[0]; int n = op->inputs[0]->shape[1]; flops += m * n; @@ -1467,14 +1467,14 @@ int Graph::calculate_memops_M() long long mem = 0; for(auto op : ops) { - if(expand_expression(op) == "*") + if(op->type == "aten::matmul") { int m = op->inputs[0]->shape[0]; int k = op->inputs[0]->shape[1]; int n = op->inputs[1]->shape[1]; mem += m * k + k * n + m * n; } - else if(expand_expression(op) == "+") + else if(op->type == "aten::add") { int m = op->inputs[0]->shape[0]; int n = op->inputs[0]->shape[1]; From a89c0f7ce11c58fdef3c292bb198e6c0c741c8cb Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Tue, 3 Sep 2024 09:39:43 +0800 Subject: [PATCH 07/16] pnnx print flops memops count --- tools/pnnx/src/ir.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 6dd429ebbfb4..985cf47a3b97 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -1459,7 +1459,7 @@ int Graph::calculate_flops_M() flops += m * n; } } - return int(flops / 1e6); + return int(flops); } int Graph::calculate_memops_M() @@ -1481,7 +1481,7 @@ int Graph::calculate_memops_M() mem += 3 * m * n; } } - return int(mem / 1e6); + return int(mem); } int Graph::python(const std::string& pypath, const std::string& pnnxbinpath) From d247bb9aba088946b0ea86b0fff0bb62fa0f6c82 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Tue, 3 Sep 2024 09:42:58 +0800 Subject: [PATCH 08/16] pnnx print flops memops count --- tools/pnnx/src/ir.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 985cf47a3b97..59f26b17f054 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -14,6 +14,7 @@ #include "ir.h" +#include #include #include #include @@ -1467,6 +1468,7 @@ int Graph::calculate_memops_M() long long mem = 0; for(auto op : ops) { + fprintf(stderr, "%s\n", op->type.c_str()); if(op->type == "aten::matmul") { int m = op->inputs[0]->shape[0]; From b10faaf661c085179e3a62b33c24cf4826c12e38 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Wed, 11 Sep 2024 17:14:53 +0800 Subject: [PATCH 09/16] test --- tools/pnnx/src/ir.cpp | 148 ++++++++++++++++++++++++++++++++-------- tools/pnnx/src/main.cpp | 5 +- 2 files changed, 123 insertions(+), 30 deletions(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 59f26b17f054..6fd139627ad7 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -1446,21 +1446,75 @@ static std::string make_index_expression(const Operator* op) int Graph::calculate_flops_M() { long long flops = 0; - for(auto op:ops) { - if(op->type == "aten::matmul") - { - int m = op->inputs[0]->shape[0]; - int k = op->inputs[0]->shape[1]; - int n = op->inputs[1]->shape[1]; - flops += 2 * m * k * n; - } - else if(op->type == "aten::add") { - int m = op->inputs[0]->shape[0]; - int n = op->inputs[0]->shape[1]; - flops += m * n; - } - } - return int(flops); + for(auto op:ops) + { + fprintf(stderr, "op->type %s\n", op->type.c_str()); + if(op->type[0] == 'F') + { + std::string sub_type = op->type.substr(2); + if(sub_type == "adaptive_avg_pool1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int o = op->params.at("output_size").ai[0]; + flops += n * c * l * o; + } + else if(sub_type == "adaptive_avg_pool2d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int h = op->inputs[0]->shape[2]; + int w = op->inputs[0]->shape[3]; + int oh = op->params.at("output_size").ai[0]; + int ow = op->params.at("output_size").ai[1]; + flops += n * c * h * w * oh * ow; + } + else if(sub_type == "adaptive_avg_pool3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int od = op->params.at("output_size").ai[0]; + int oh = op->params.at("output_size").ai[1]; + int ow = op->params.at("output_size").ai[2]; + flops += n * c * d * h * w * od * oh * ow; + } + else if(sub_type == "adaptive_max_pool1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int o = op->params.at("output_size").ai[0]; + flops += n * c * l * o; + } + else if(sub_type == "adaptive_max_pool2d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int h = op->inputs[0]->shape[2]; + int w = op->inputs[0]->shape[3]; + int oh = op->params.at("output_size").ai[0]; + int ow = op->params.at("output_size").ai[1]; + flops += n * c * h * w * oh * ow; + } + else if(sub_type == "adaptive_max_pool3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int od = op->params.at("output_size").ai[0]; + int oh = op->params.at("output_size").ai[1]; + int ow = op->params.at("output_size").ai[2]; + flops += n * c * d * h * w * od * oh * ow; + } + } + } + return int(flops / 1e6); } int Graph::calculate_memops_M() @@ -1468,22 +1522,60 @@ int Graph::calculate_memops_M() long long mem = 0; for(auto op : ops) { - fprintf(stderr, "%s\n", op->type.c_str()); - if(op->type == "aten::matmul") + if(op->type[0] == 'F') { - int m = op->inputs[0]->shape[0]; - int k = op->inputs[0]->shape[1]; - int n = op->inputs[1]->shape[1]; - mem += m * k + k * n + m * n; - } - else if(op->type == "aten::add") - { - int m = op->inputs[0]->shape[0]; - int n = op->inputs[0]->shape[1]; - mem += 3 * m * n; + std::string sub_type = op->type.substr(2); + if(sub_type == "adaptive_avg_pool1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int o = op->params.at("output_size").ai[0]; + mem += n * c * l * o; + } + else if(sub_type == "adaptive_avg_pool2d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int h = op->inputs[0]->shape[2]; + int w = op->inputs[0]->shape[3]; + int oh = op->params.at("output_size").ai[0]; + int ow = op->params.at("output_size").ai[1]; + mem += n * c * h * w * oh * ow; + } + else if(sub_type == "adaptive_avg_pool3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int od = op->params.at("output_size").ai[0]; + int oh = op->params.at("output_size").ai[1]; + int ow = op->params.at("output_size").ai[2]; + mem += n * c * d * h * w * od * oh * ow; + } + else if(sub_type == "adaptive_max_pool1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int o = op->params.at("output_size").ai[0]; + mem += n * c * l * o; + } + else if(sub_type == "adaptive_max_pool2d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int h = op->inputs[0]->shape[2]; + int w = op->inputs[0]->shape[3]; + int oh = op->params.at("output_size").ai[0]; + int ow = op->params.at("output_size").ai[1]; + mem += n * c * h * w * oh * ow; + } } } - return int(mem); + return int(mem / 1e6); } int Graph::python(const std::string& pypath, const std::string& pnnxbinpath) diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp index 23fdc0102224..5f5cb3aa7fcd 100644 --- a/tools/pnnx/src/main.cpp +++ b/tools/pnnx/src/main.cpp @@ -362,6 +362,9 @@ int main(int argc, char** argv) pnnx_graph.save(pnnxparampath, pnnxbinpath); pnnx_graph.python(pnnxpypath, pnnxbinpath); + + fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops_M()); + fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops_M()); #if BUILD_PNNX2ONNX pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str(), fp16); @@ -382,7 +385,5 @@ int main(int argc, char** argv) // pnnx_graph2.load("pnnx.param", "pnnx.bin"); // pnnx_graph2.save("pnnx2.param", "pnnx2.bin"); - fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops_M()); - fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops_M()); return 0; } From ef1e8dfcfd82b201c6c811870577bc309df147d8 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Wed, 11 Sep 2024 20:17:11 +0800 Subject: [PATCH 10/16] test --- tools/pnnx/src/ir.cpp | 66 ++++++++--------------------------------- tools/pnnx/src/ir.h | 5 ++-- tools/pnnx/src/main.cpp | 5 ++-- 3 files changed, 19 insertions(+), 57 deletions(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 6fd139627ad7..c01b66b2c31a 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -1443,12 +1443,10 @@ static std::string make_index_expression(const Operator* op) return index_expr; } -int Graph::calculate_flops_M() +void Graph::flops_memops_sum() { - long long flops = 0; for(auto op:ops) { - fprintf(stderr, "op->type %s\n", op->type.c_str()); if(op->type[0] == 'F') { std::string sub_type = op->type.substr(2); @@ -1459,6 +1457,7 @@ int Graph::calculate_flops_M() int l = op->inputs[0]->shape[2]; int o = op->params.at("output_size").ai[0]; flops += n * c * l * o; + memops += n * c * l + n * c * o; } else if(sub_type == "adaptive_avg_pool2d") { @@ -1469,6 +1468,7 @@ int Graph::calculate_flops_M() int oh = op->params.at("output_size").ai[0]; int ow = op->params.at("output_size").ai[1]; flops += n * c * h * w * oh * ow; + memops += n * c * h * w + n * c * oh * ow; } else if(sub_type == "adaptive_avg_pool3d") { @@ -1481,6 +1481,7 @@ int Graph::calculate_flops_M() int oh = op->params.at("output_size").ai[1]; int ow = op->params.at("output_size").ai[2]; flops += n * c * d * h * w * od * oh * ow; + memops += n * c * d * h * w + n * c * od * oh * ow; } else if(sub_type == "adaptive_max_pool1d") { @@ -1489,6 +1490,7 @@ int Graph::calculate_flops_M() int l = op->inputs[0]->shape[2]; int o = op->params.at("output_size").ai[0]; flops += n * c * l * o; + memops += n * c * l + n * c * o; } else if(sub_type == "adaptive_max_pool2d") { @@ -1499,6 +1501,7 @@ int Graph::calculate_flops_M() int oh = op->params.at("output_size").ai[0]; int ow = op->params.at("output_size").ai[1]; flops += n * c * h * w * oh * ow; + memops += n * c * h * w + n * c * oh * ow; } else if(sub_type == "adaptive_max_pool3d") { @@ -1511,71 +1514,28 @@ int Graph::calculate_flops_M() int oh = op->params.at("output_size").ai[1]; int ow = op->params.at("output_size").ai[2]; flops += n * c * d * h * w * od * oh * ow; + memops += n * c * d * h * w + n * c * od * oh * ow; } - } - } - return int(flops / 1e6); -} - -int Graph::calculate_memops_M() -{ - long long mem = 0; - for(auto op : ops) - { - if(op->type[0] == 'F') - { - std::string sub_type = op->type.substr(2); - if(sub_type == "adaptive_avg_pool1d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int o = op->params.at("output_size").ai[0]; - mem += n * c * l * o; - } - else if(sub_type == "adaptive_avg_pool2d") + else if(sub_type == "celu") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; int h = op->inputs[0]->shape[2]; int w = op->inputs[0]->shape[3]; - int oh = op->params.at("output_size").ai[0]; - int ow = op->params.at("output_size").ai[1]; - mem += n * c * h * w * oh * ow; - } - else if(sub_type == "adaptive_avg_pool3d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int od = op->params.at("output_size").ai[0]; - int oh = op->params.at("output_size").ai[1]; - int ow = op->params.at("output_size").ai[2]; - mem += n * c * d * h * w * od * oh * ow; + flops += n * c * h * w; + memops += 2 * n * c * h * w; } - else if(sub_type == "adaptive_max_pool1d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int o = op->params.at("output_size").ai[0]; - mem += n * c * l * o; - } - else if(sub_type == "adaptive_max_pool2d") + else if(sub_type == "elu") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; int h = op->inputs[0]->shape[2]; int w = op->inputs[0]->shape[3]; - int oh = op->params.at("output_size").ai[0]; - int ow = op->params.at("output_size").ai[1]; - mem += n * c * h * w * oh * ow; + flops += n * c * h * w; + memops += 2 * n * c * h * w; } } } - return int(mem / 1e6); } int Graph::python(const std::string& pypath, const std::string& pnnxbinpath) diff --git a/tools/pnnx/src/ir.h b/tools/pnnx/src/ir.h index bc1f0089591d..c66141d7324c 100644 --- a/tools/pnnx/src/ir.h +++ b/tools/pnnx/src/ir.h @@ -346,8 +346,9 @@ class Graph std::vector ops; std::vector operands; - int calculate_flops_M(); - int calculate_memops_M(); + long long flops = 0; + long long memops = 0; + void flops_memops_sum(); private: Graph(const Graph& rhs); diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp index 5f5cb3aa7fcd..f75af022cdeb 100644 --- a/tools/pnnx/src/main.cpp +++ b/tools/pnnx/src/main.cpp @@ -363,8 +363,9 @@ int main(int argc, char** argv) pnnx_graph.python(pnnxpypath, pnnxbinpath); - fprintf(stderr, "float ops = %dM\n", pnnx_graph.calculate_flops_M()); - fprintf(stderr, "memory ops = %dM\n", pnnx_graph.calculate_memops_M()); + pnnx_graph.flops_memops_sum(); + fprintf(stderr, "float ops = %.3fM\n", double(pnnx_graph.flops) / 1e6); + fprintf(stderr, "mem ops = %.3fM\n", double(pnnx_graph.memops) / 1e6); #if BUILD_PNNX2ONNX pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str(), fp16); From b977f730f5ea119315711d891f22234ec91b3251 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Wed, 11 Sep 2024 20:35:12 +0800 Subject: [PATCH 11/16] test --- tools/pnnx/src/ir.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index c01b66b2c31a..64495974a639 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -1447,6 +1447,7 @@ void Graph::flops_memops_sum() { for(auto op:ops) { + fprintf(stderr, "op->type: %s\n", op->type.c_str()); if(op->type[0] == 'F') { std::string sub_type = op->type.substr(2); From 9f4180002f838924958fed329d5831b8b248ba1a Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Thu, 12 Sep 2024 17:35:19 +0800 Subject: [PATCH 12/16] test --- tools/pnnx/src/ir.cpp | 565 ++++++++++++++++++++++++++++++++++++++-- tools/pnnx/src/ir.h | 6 +- tools/pnnx/src/main.cpp | 2 + 3 files changed, 548 insertions(+), 25 deletions(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 64495974a639..5125ac1d1ad7 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -23,7 +23,6 @@ #include #include #include -#include #include "storezip.h" #include "utils.h" @@ -1445,13 +1444,13 @@ static std::string make_index_expression(const Operator* op) void Graph::flops_memops_sum() { - for(auto op:ops) + for (auto op : ops) { fprintf(stderr, "op->type: %s\n", op->type.c_str()); - if(op->type[0] == 'F') + if (op->type[0] == 'F') { std::string sub_type = op->type.substr(2); - if(sub_type == "adaptive_avg_pool1d") + if (sub_type == "adaptive_avg_pool1d") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; @@ -1460,7 +1459,7 @@ void Graph::flops_memops_sum() flops += n * c * l * o; memops += n * c * l + n * c * o; } - else if(sub_type == "adaptive_avg_pool2d") + else if (sub_type == "adaptive_avg_pool2d") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; @@ -1471,7 +1470,7 @@ void Graph::flops_memops_sum() flops += n * c * h * w * oh * ow; memops += n * c * h * w + n * c * oh * ow; } - else if(sub_type == "adaptive_avg_pool3d") + else if (sub_type == "adaptive_avg_pool3d") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; @@ -1484,7 +1483,58 @@ void Graph::flops_memops_sum() flops += n * c * d * h * w * od * oh * ow; memops += n * c * d * h * w + n * c * od * oh * ow; } - else if(sub_type == "adaptive_max_pool1d") + else if (sub_type == "avg_pool1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int k = op->params.at("kernel_size").ai[0]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int o = (l + 2 * p - k) / s + 1; + flops += n * c * l * k; + memops += n * c * l + n * c * o; + } + else if (sub_type == "avg_pool2d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int h = op->inputs[0]->shape[2]; + int w = op->inputs[0]->shape[3]; + int kh = op->params.at("kernel_size").ai[0]; + int kw = op->params.at("kernel_size").ai[1]; + int sh = op->params.at("stride").ai[0]; + int sw = op->params.at("stride").ai[1]; + int ph = op->params.at("padding").ai[0]; + int pw = op->params.at("padding").ai[1]; + int oh = (h + 2 * ph - kh) / sh + 1; + int ow = (w + 2 * pw - kw) / sw + 1; + flops += n * c * h * w * kh * kw; + memops += n * c * h * w + n * c * oh * ow; + } + else if (sub_type == "avg_pool3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int kd = op->params.at("kernel_size").ai[0]; + int kh = op->params.at("kernel_size").ai[1]; + int kw = op->params.at("kernel_size").ai[2]; + int sd = op->params.at("stride").ai[0]; + int sh = op->params.at("stride").ai[1]; + int sw = op->params.at("stride").ai[2]; + int pd = op->params.at("padding").ai[0]; + int ph = op->params.at("padding").ai[1]; + int pw = op->params.at("padding").ai[2]; + int od = (d + 2 * pd - kd) / sd + 1; + int oh = (h + 2 * ph - kh) / sh + 1; + int ow = (w + 2 * pw - kw) / sw + 1; + flops += n * c * d * h * w * kd * kh * kw; + memops += n * c * d * h * w + n * c * od * oh * ow; + } + else if (sub_type == "adaptive_max_pool1d") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; @@ -1493,7 +1543,7 @@ void Graph::flops_memops_sum() flops += n * c * l * o; memops += n * c * l + n * c * o; } - else if(sub_type == "adaptive_max_pool2d") + else if (sub_type == "adaptive_max_pool2d") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; @@ -1504,7 +1554,7 @@ void Graph::flops_memops_sum() flops += n * c * h * w * oh * ow; memops += n * c * h * w + n * c * oh * ow; } - else if(sub_type == "adaptive_max_pool3d") + else if (sub_type == "adaptive_max_pool3d") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; @@ -1517,23 +1567,492 @@ void Graph::flops_memops_sum() flops += n * c * d * h * w * od * oh * ow; memops += n * c * d * h * w + n * c * od * oh * ow; } - else if(sub_type == "celu") + else if (sub_type == "max_pool1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int k = op->params.at("kernel_size").ai[0]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int o = (l + 2 * p - k) / s + 1; + flops += n * c * l * k; + memops += n * c * l + n * c * o; + } + else if (sub_type == "max_pool2d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int h = op->inputs[0]->shape[2]; + int w = op->inputs[0]->shape[3]; + int kh = op->params.at("kernel_size").ai[0]; + int kw = op->params.at("kernel_size").ai[1]; + int sh = op->params.at("stride").ai[0]; + int sw = op->params.at("stride").ai[1]; + int ph = op->params.at("padding").ai[0]; + int pw = op->params.at("padding").ai[1]; + int oh = (h + 2 * ph - kh) / sh + 1; + int ow = (w + 2 * pw - kw) / sw + 1; + flops += n * c * h * w * kh * kw; + memops += n * c * h * w + n * c * oh * ow; + } + else if (sub_type == "max_pool3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int kd = op->params.at("kernel_size").ai[0]; + int kh = op->params.at("kernel_size").ai[1]; + int kw = op->params.at("kernel_size").ai[2]; + int sd = op->params.at("stride").ai[0]; + int sh = op->params.at("stride").ai[1]; + int sw = op->params.at("stride").ai[2]; + int pd = op->params.at("padding").ai[0]; + int ph = op->params.at("padding").ai[1]; + int pw = op->params.at("padding").ai[2]; + int od = (d + 2 * pd - kd) / sd + 1; + int oh = (h + 2 * ph - kh) / sh + 1; + int ow = (w + 2 * pw - kw) / sw + 1; + flops += n * c * d * h * w * kd * kh * kw; + memops += n * c * d * h * w + n * c * od * oh * ow; + } + else if (sub_type == "lp_pool1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int k = op->params.at("kernel_size").i; + int p = op->params.at("p").i; + if (p == 1) + { + extra_flops += 2 * n * c * l * k; + } + else if (p == 2) + { + extra_flops += 3 * n * c * l * k; + } + extra_memops += 2 * n * c * l; + } + else if (sub_type == "lp_pool2d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int h = op->inputs[0]->shape[2]; + int w = op->inputs[0]->shape[3]; + int kh = op->params.at("kernel_size").ai[0]; + int kw = op->params.at("kernel_size").ai[1]; + int p = op->params.at("p").i; + if (p == 1) + { + extra_flops += 2 * n * c * h * w * kh * kw; + } + else if (p == 2) + { + extra_flops += 3 * n * c * h * w * kh * kw; + } + extra_memops += 2 * n * c * h * w; + } + else if (sub_type == "lp_pool3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int kd = op->params.at("kernel_size").ai[0]; + int kh = op->params.at("kernel_size").ai[1]; + int kw = op->params.at("kernel_size").ai[2]; + int p = op->params.at("p").i; + if (p == 1) + { + extra_flops += 2 * n * c * d * h * w * kd * kh * kw; + } + else if (p == 2) + { + extra_flops += 3 * n * c * d * h * w * kd * kh * kw; + } + extra_memops += 2 * n * c * d * h * w; + } + else if ( + sub_type == "elu" || + sub_type == "celu" || + sub_type == "gelu" || + sub_type == "glu" || + sub_type == "hardshrink" || + sub_type == "hardsigmoid" || + sub_type == "hardswish" || + sub_type == "hardtanh" || + sub_type == "leaky_relu" || + sub_type == "prelu" || + sub_type == "relu" || + sub_type == "relu6" || + sub_type == "rrelu" || + sub_type == "mish" || + sub_type == "normalize" || + sub_type == "batch_norm" || + sub_type == "group_norm" || + sub_type == "instance_norm" || + sub_type == "layer_norm" + ) + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int num_elements = 1; + for (size_t i = 2; i < op->inputs[0]->shape.size(); ++i) + { + num_elements *= op->inputs[0]->shape[i]; + } + if(sub_type == "elu") + { + extra_flops += 2 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "celu") + { + extra_flops += 3 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "gelu") + { + extra_flops += 3 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "glu") + { + int l = op->inputs[0]->shape[2]; + int o = op->outputs[0]->shape[2]; + extra_flops += n * c * l * o; + extra_memops += 2 * n * c * l + n * o; + } + else if(sub_type == "hardshrink") + { + extra_flops += 2 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "hardsigmoid") + { + extra_flops += 6 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "hardswish") + { + extra_flops += 5 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "hardtanh") + { + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "leaky_relu") + { + extra_flops += 2 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "prelu") + { + extra_flops += 2 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "relu") + { + extra_flops += n * c * num_elements; + extra_memops += n * c * num_elements; + } + else if(sub_type == "relu6") + { + extra_memops += n * c * num_elements; + } + else if(sub_type == "rrelu") + { + extra_flops += n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "mish") + { + extra_flops += 2 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if(sub_type == "normalize") + { + extra_flops += 7 * n * c * num_elements + 3; + extra_memops += 2 * n * c * num_elements; + } + else if( + sub_type == "batch_norm" || + sub_type == "group_norm" || + sub_type == "instance_norm" || + sub_type == "layer_norm" + ) + { + extra_flops += 7 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + } + else if (sub_type == "conv1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int k = op->inputs[1]->shape[0]; + int o = op->outputs[0]->shape[2]; + flops += 2 * n * c * l * k * o; + memops += 2 * n * c * l * k + n * o; + } + else if (sub_type == "conv2d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int h = op->inputs[0]->shape[2]; + int w = op->inputs[0]->shape[3]; + int kh = op->inputs[1]->shape[2]; + int kw = op->inputs[1]->shape[3]; + int o = op->outputs[0]->shape[2]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int g = op->params.at("groups").i; + flops += 2 * n * c * h * w * kh * kw * o / g; + memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; + } + else if (sub_type == "conv3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int kd = op->inputs[1]->shape[2]; + int kh = op->inputs[1]->shape[3]; + int kw = op->inputs[1]->shape[4]; + int o = op->outputs[0]->shape[2]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int g = op->params.at("groups").i; + flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; + memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; + } + else if (sub_type == "conv_transpose1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int k = op->inputs[1]->shape[0]; + int o = op->outputs[0]->shape[2]; + flops += 2 * n * c * l * k * o; + memops += 2 * n * c * l * k + n * o; + } + else if (sub_type == "conv_transpose2d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int h = op->inputs[0]->shape[2]; + int w = op->inputs[0]->shape[3]; + int kh = op->inputs[1]->shape[2]; + int kw = op->inputs[1]->shape[3]; + int o = op->outputs[0]->shape[2]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int g = op->params.at("groups").i; + flops += 2 * n * c * h * w * kh * kw * o / g; + memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; + } + else if (sub_type == "conv_transpose3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int kd = op->inputs[1]->shape[2]; + int kh = op->inputs[1]->shape[3]; + int kw = op->inputs[1]->shape[4]; + int o = op->outputs[0]->shape[2]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int g = op->params.at("groups").i; + flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; + memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; + } + else if (sub_type == "embedding") + { + int n = op->inputs[0]->shape[0]; + int l = op->inputs[0]->shape[1]; + int c = op->params.at("num_embeddings").i; + int e = op->params.at("embedding_dim").i; + extra_flops += n * l * e; + extra_memops += n * l + n * e; + } + else if (sub_type == "linear") + { + int n = op->inputs[0]->shape[0]; + int i = op->inputs[0]->shape[1]; + int o = op->outputs[0]->shape[1]; + flops += 2 * n * i * o; + memops += 2 * n * i + n * o; + } + else if (sub_type == "log_softmax") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + extra_flops += 2 * n * c * l; + extra_memops += 2 * n * c * l; + } + else if (sub_type == "logsigmoid") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + extra_flops += 2 * n * c * l; + extra_memops += 2 * n * c * l; + } + else if (sub_type == "scaled_dot_product_attention") + { + int n = op->inputs[0]->shape[0]; + int l = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + flops += 2 * n * l * l + n * l * d + n * l * l * d; + memops += 2 * n * l * d + 3 * n * l * l + n * l; + } + } + + else if (op->type.substr(0, 2) == "nn") + { + std::string sub_type = op->type.substr(3); + if ( + sub_type == "BatchNorm1d" || + sub_type == "BatchNorm2d" || + sub_type == "BatchNorm3d" + ) + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int num_elements = 1; + for (size_t i = 2; i < op->inputs[0]->shape.size(); ++i) + { + num_elements *= op->inputs[0]->shape[i]; + } + extra_flops += 7 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if (sub_type == "Conv1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int k = op->inputs[1]->shape[0]; + int o = op->outputs[0]->shape[2]; + flops += 2 * n * c * l * k * o; + memops += 2 * n * c * l * k + n * o; + } + else if (sub_type == "Conv2d") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; int h = op->inputs[0]->shape[2]; int w = op->inputs[0]->shape[3]; - flops += n * c * h * w; - memops += 2 * n * c * h * w; + int kh = op->inputs[1]->shape[2]; + int kw = op->inputs[1]->shape[3]; + int o = op->outputs[0]->shape[2]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int g = op->params.at("groups").i; + flops += 2 * n * c * h * w * kh * kw * o / g; + memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; + } + else if (sub_type == "Conv3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int kd = op->inputs[1]->shape[2]; + int kh = op->inputs[1]->shape[3]; + int kw = op->inputs[1]->shape[4]; + int o = op->outputs[0]->shape[2]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int g = op->params.at("groups").i; + flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; + memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; + } + else if (sub_type == "ConvTranspose1d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int l = op->inputs[0]->shape[2]; + int k = op->inputs[1]->shape[0]; + int o = op->outputs[0]->shape[2]; + flops += 2 * n * c * l * k * o; + memops += 2 * n * c * l * k + n * o; } - else if(sub_type == "elu") + else if (sub_type == "ConvTranspose2d") { int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; int h = op->inputs[0]->shape[2]; int w = op->inputs[0]->shape[3]; - flops += n * c * h * w; - memops += 2 * n * c * h * w; + int kh = op->inputs[1]->shape[2]; + int kw = op->inputs[1]->shape[3]; + int o = op->outputs[0]->shape[2]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int g = op->params.at("groups").i; + flops += 2 * n * c * h * w * kh * kw * o / g; + memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; + } + else if (sub_type == "PReLU") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int num_elements = 1; + for (size_t i = 2; i < op->inputs[0]->shape.size(); ++i) + { + num_elements *= op->inputs[0]->shape[i]; + } + extra_flops += 2 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; + } + else if (sub_type == "ConvTranspose3d") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int d = op->inputs[0]->shape[2]; + int h = op->inputs[0]->shape[3]; + int w = op->inputs[0]->shape[4]; + int kd = op->inputs[1]->shape[2]; + int kh = op->inputs[1]->shape[3]; + int kw = op->inputs[1]->shape[4]; + int o = op->outputs[0]->shape[2]; + int s = op->params.at("stride").ai[0]; + int p = op->params.at("padding").ai[0]; + int g = op->params.at("groups").i; + flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; + memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; + } + else if (sub_type == "Embedding") + { + int n = op->inputs[0]->shape[0]; + int l = op->inputs[0]->shape[1]; + int c = op->params.at("num_embeddings").i; + int e = op->params.at("embedding_dim").i; + extra_flops += 2 * n * l * e; + extra_memops += 2 * n * l + n * e; + } + else if (sub_type == "GroupNorm" || sub_type == "InstanceNorm" || sub_type == "LayerNorm") + { + int n = op->inputs[0]->shape[0]; + int c = op->inputs[0]->shape[1]; + int num_elements = 1; + for (size_t i = 2; i < op->inputs[0]->shape.size(); ++i) + { + num_elements *= op->inputs[0]->shape[i]; + } + + extra_flops += 7 * n * c * num_elements; + extra_memops += 2 * n * c * num_elements; } } } @@ -1630,10 +2149,10 @@ int Graph::python(const std::string& pypath, const std::string& pnnxbinpath) for (size_t i = 0; i < param.ai.size(); i++) { if ((op->type == "nn.AdaptiveAvgPool2d" - || op->type == "nn.AdaptiveAvgPool3d" - || op->type == "nn.AdaptiveMaxPool2d" - || op->type == "nn.AdaptiveMaxPool3d") - && it.first == "output_size" && param.ai[i] == 0) + || op->type == "nn.AdaptiveAvgPool3d" + || op->type == "nn.AdaptiveMaxPool2d" + || op->type == "nn.AdaptiveMaxPool3d") + && it.first == "output_size" && param.ai[i] == 0) { fprintf(pyfp, "None"); } @@ -2386,10 +2905,10 @@ int Graph::python(const std::string& pypath, const std::string& pnnxbinpath) for (size_t i = 0; i < param.ai.size(); i++) { if ((op->type == "F.adaptive_avg_pool2d" - || op->type == "F.adaptive_avg_pool3d" - || op->type == "F.adaptive_max_pool2d" - || op->type == "F.adaptive_max_pool3d") - && it.first == "output_size" && param.ai[i] == 0) + || op->type == "F.adaptive_avg_pool3d" + || op->type == "F.adaptive_max_pool2d" + || op->type == "F.adaptive_max_pool3d") + && it.first == "output_size" && param.ai[i] == 0) { fprintf(pyfp, "None"); } diff --git a/tools/pnnx/src/ir.h b/tools/pnnx/src/ir.h index c66141d7324c..37ee81e0a6b5 100644 --- a/tools/pnnx/src/ir.h +++ b/tools/pnnx/src/ir.h @@ -346,8 +346,10 @@ class Graph std::vector ops; std::vector operands; - long long flops = 0; - long long memops = 0; + unsigned long long flops = 0; + unsigned long long memops = 0; + unsigned long long extra_flops = 0; + unsigned long long extra_memops = 0; void flops_memops_sum(); private: diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp index f75af022cdeb..949680faab82 100644 --- a/tools/pnnx/src/main.cpp +++ b/tools/pnnx/src/main.cpp @@ -366,6 +366,8 @@ int main(int argc, char** argv) pnnx_graph.flops_memops_sum(); fprintf(stderr, "float ops = %.3fM\n", double(pnnx_graph.flops) / 1e6); fprintf(stderr, "mem ops = %.3fM\n", double(pnnx_graph.memops) / 1e6); + fprintf(stderr, "extra float ops = %.3fM\n", double(pnnx_graph.extra_flops) / 1e6); + fprintf(stderr, "extra mem ops = %.3fM\n", double(pnnx_graph.extra_memops) / 1e6); #if BUILD_PNNX2ONNX pnnx::save_onnx(pnnx_graph, pnnxonnxpath.c_str(), fp16); From a91dc5ce90bcd677a92f92916b70f094dbdfc23b Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Mon, 16 Sep 2024 20:51:36 +0800 Subject: [PATCH 13/16] nn part finished --- tools/pnnx/src/ir.cpp | 722 +++++++++++++++++++++++------------------- 1 file changed, 388 insertions(+), 334 deletions(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 5125ac1d1ad7..6cbf320acf41 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -14,8 +14,11 @@ #include "ir.h" +#include #include +#include #include +#include #include #include #include @@ -23,6 +26,7 @@ #include #include #include +#include #include "storezip.h" #include "utils.h" @@ -1488,9 +1492,9 @@ void Graph::flops_memops_sum() int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; int l = op->inputs[0]->shape[2]; - int k = op->params.at("kernel_size").ai[0]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; + int k = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; + int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; int o = (l + 2 * p - k) / s + 1; flops += n * c * l * k; memops += n * c * l + n * c * o; @@ -1501,12 +1505,12 @@ void Graph::flops_memops_sum() int c = op->inputs[0]->shape[1]; int h = op->inputs[0]->shape[2]; int w = op->inputs[0]->shape[3]; - int kh = op->params.at("kernel_size").ai[0]; - int kw = op->params.at("kernel_size").ai[1]; - int sh = op->params.at("stride").ai[0]; - int sw = op->params.at("stride").ai[1]; - int ph = op->params.at("padding").ai[0]; - int pw = op->params.at("padding").ai[1]; + int kh = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; + int kw = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[1] : 1; + int sh = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int sw = op->has_param("stride") ? op->params.at("stride").ai[1] : 1; + int ph = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; + int pw = op->has_param("padding") ? op->params.at("padding").ai[1] : 0; int oh = (h + 2 * ph - kh) / sh + 1; int ow = (w + 2 * pw - kw) / sw + 1; flops += n * c * h * w * kh * kw; @@ -1519,15 +1523,15 @@ void Graph::flops_memops_sum() int d = op->inputs[0]->shape[2]; int h = op->inputs[0]->shape[3]; int w = op->inputs[0]->shape[4]; - int kd = op->params.at("kernel_size").ai[0]; - int kh = op->params.at("kernel_size").ai[1]; - int kw = op->params.at("kernel_size").ai[2]; - int sd = op->params.at("stride").ai[0]; - int sh = op->params.at("stride").ai[1]; - int sw = op->params.at("stride").ai[2]; - int pd = op->params.at("padding").ai[0]; - int ph = op->params.at("padding").ai[1]; - int pw = op->params.at("padding").ai[2]; + int kd = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; + int kh = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[1] : 1; + int kw = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[2] : 1; + int sd = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int sh = op->has_param("stride") ? op->params.at("stride").ai[1] : 1; + int sw = op->has_param("stride") ? op->params.at("stride").ai[2] : 1; + int pd = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; + int ph = op->has_param("padding") ? op->params.at("padding").ai[1] : 0; + int pw = op->has_param("padding") ? op->params.at("padding").ai[2] : 0; int od = (d + 2 * pd - kd) / sd + 1; int oh = (h + 2 * ph - kh) / sh + 1; int ow = (w + 2 * pw - kw) / sw + 1; @@ -1572,9 +1576,9 @@ void Graph::flops_memops_sum() int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; int l = op->inputs[0]->shape[2]; - int k = op->params.at("kernel_size").ai[0]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; + int k = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; + int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; int o = (l + 2 * p - k) / s + 1; flops += n * c * l * k; memops += n * c * l + n * c * o; @@ -1585,12 +1589,12 @@ void Graph::flops_memops_sum() int c = op->inputs[0]->shape[1]; int h = op->inputs[0]->shape[2]; int w = op->inputs[0]->shape[3]; - int kh = op->params.at("kernel_size").ai[0]; - int kw = op->params.at("kernel_size").ai[1]; - int sh = op->params.at("stride").ai[0]; - int sw = op->params.at("stride").ai[1]; - int ph = op->params.at("padding").ai[0]; - int pw = op->params.at("padding").ai[1]; + int kh = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; + int kw = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[1] : 1; + int sh = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int sw = op->has_param("stride") ? op->params.at("stride").ai[1] : 1; + int ph = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; + int pw = op->has_param("padding") ? op->params.at("padding").ai[1] : 0; int oh = (h + 2 * ph - kh) / sh + 1; int ow = (w + 2 * pw - kw) / sw + 1; flops += n * c * h * w * kh * kw; @@ -1603,192 +1607,24 @@ void Graph::flops_memops_sum() int d = op->inputs[0]->shape[2]; int h = op->inputs[0]->shape[3]; int w = op->inputs[0]->shape[4]; - int kd = op->params.at("kernel_size").ai[0]; - int kh = op->params.at("kernel_size").ai[1]; - int kw = op->params.at("kernel_size").ai[2]; - int sd = op->params.at("stride").ai[0]; - int sh = op->params.at("stride").ai[1]; - int sw = op->params.at("stride").ai[2]; - int pd = op->params.at("padding").ai[0]; - int ph = op->params.at("padding").ai[1]; - int pw = op->params.at("padding").ai[2]; + int kd = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; + int kh = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[1] : 1; + int kw = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[2] : 1; + int sd = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int sh = op->has_param("stride") ? op->params.at("stride").ai[1] : 1; + int sw = op->has_param("stride") ? op->params.at("stride").ai[2] : 1; + int pd = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; + int ph = op->has_param("padding") ? op->params.at("padding").ai[1] : 0; + int pw = op->has_param("padding") ? op->params.at("padding").ai[2] : 0; int od = (d + 2 * pd - kd) / sd + 1; int oh = (h + 2 * ph - kh) / sh + 1; int ow = (w + 2 * pw - kw) / sw + 1; flops += n * c * d * h * w * kd * kh * kw; memops += n * c * d * h * w + n * c * od * oh * ow; } - else if (sub_type == "lp_pool1d") + else if (sub_type == "prelu" || sub_type == "leaky_relu") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int k = op->params.at("kernel_size").i; - int p = op->params.at("p").i; - if (p == 1) - { - extra_flops += 2 * n * c * l * k; - } - else if (p == 2) - { - extra_flops += 3 * n * c * l * k; - } - extra_memops += 2 * n * c * l; - } - else if (sub_type == "lp_pool2d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int h = op->inputs[0]->shape[2]; - int w = op->inputs[0]->shape[3]; - int kh = op->params.at("kernel_size").ai[0]; - int kw = op->params.at("kernel_size").ai[1]; - int p = op->params.at("p").i; - if (p == 1) - { - extra_flops += 2 * n * c * h * w * kh * kw; - } - else if (p == 2) - { - extra_flops += 3 * n * c * h * w * kh * kw; - } - extra_memops += 2 * n * c * h * w; - } - else if (sub_type == "lp_pool3d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int kd = op->params.at("kernel_size").ai[0]; - int kh = op->params.at("kernel_size").ai[1]; - int kw = op->params.at("kernel_size").ai[2]; - int p = op->params.at("p").i; - if (p == 1) - { - extra_flops += 2 * n * c * d * h * w * kd * kh * kw; - } - else if (p == 2) - { - extra_flops += 3 * n * c * d * h * w * kd * kh * kw; - } - extra_memops += 2 * n * c * d * h * w; - } - else if ( - sub_type == "elu" || - sub_type == "celu" || - sub_type == "gelu" || - sub_type == "glu" || - sub_type == "hardshrink" || - sub_type == "hardsigmoid" || - sub_type == "hardswish" || - sub_type == "hardtanh" || - sub_type == "leaky_relu" || - sub_type == "prelu" || - sub_type == "relu" || - sub_type == "relu6" || - sub_type == "rrelu" || - sub_type == "mish" || - sub_type == "normalize" || - sub_type == "batch_norm" || - sub_type == "group_norm" || - sub_type == "instance_norm" || - sub_type == "layer_norm" - ) - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int num_elements = 1; - for (size_t i = 2; i < op->inputs[0]->shape.size(); ++i) - { - num_elements *= op->inputs[0]->shape[i]; - } - if(sub_type == "elu") - { - extra_flops += 2 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "celu") - { - extra_flops += 3 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "gelu") - { - extra_flops += 3 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "glu") - { - int l = op->inputs[0]->shape[2]; - int o = op->outputs[0]->shape[2]; - extra_flops += n * c * l * o; - extra_memops += 2 * n * c * l + n * o; - } - else if(sub_type == "hardshrink") - { - extra_flops += 2 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "hardsigmoid") - { - extra_flops += 6 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "hardswish") - { - extra_flops += 5 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "hardtanh") - { - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "leaky_relu") - { - extra_flops += 2 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "prelu") - { - extra_flops += 2 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "relu") - { - extra_flops += n * c * num_elements; - extra_memops += n * c * num_elements; - } - else if(sub_type == "relu6") - { - extra_memops += n * c * num_elements; - } - else if(sub_type == "rrelu") - { - extra_flops += n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "mish") - { - extra_flops += 2 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if(sub_type == "normalize") - { - extra_flops += 7 * n * c * num_elements + 3; - extra_memops += 2 * n * c * num_elements; - } - else if( - sub_type == "batch_norm" || - sub_type == "group_norm" || - sub_type == "instance_norm" || - sub_type == "layer_norm" - ) - { - extra_flops += 7 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } + } else if (sub_type == "conv1d") { @@ -1809,9 +1645,9 @@ void Graph::flops_memops_sum() int kh = op->inputs[1]->shape[2]; int kw = op->inputs[1]->shape[3]; int o = op->outputs[0]->shape[2]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; - int g = op->params.at("groups").i; + int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; + int g = op->has_param("groups") ? op->params.at("groups").i : 1; flops += 2 * n * c * h * w * kh * kw * o / g; memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; } @@ -1826,9 +1662,9 @@ void Graph::flops_memops_sum() int kh = op->inputs[1]->shape[3]; int kw = op->inputs[1]->shape[4]; int o = op->outputs[0]->shape[2]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; - int g = op->params.at("groups").i; + int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; + int g = op->has_param("groups") ? op->params.at("groups").i : 1; flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; } @@ -1851,9 +1687,9 @@ void Graph::flops_memops_sum() int kh = op->inputs[1]->shape[2]; int kw = op->inputs[1]->shape[3]; int o = op->outputs[0]->shape[2]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; - int g = op->params.at("groups").i; + int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; + int g = op->has_param("groups") ? op->params.at("groups").i : 1; flops += 2 * n * c * h * w * kh * kw * o / g; memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; } @@ -1868,20 +1704,15 @@ void Graph::flops_memops_sum() int kh = op->inputs[1]->shape[3]; int kw = op->inputs[1]->shape[4]; int o = op->outputs[0]->shape[2]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; - int g = op->params.at("groups").i; + int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; + int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; + int g = op->has_param("groups") ? op->params.at("groups").i : 1; flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; } else if (sub_type == "embedding") { - int n = op->inputs[0]->shape[0]; - int l = op->inputs[0]->shape[1]; - int c = op->params.at("num_embeddings").i; - int e = op->params.at("embedding_dim").i; - extra_flops += n * l * e; - extra_memops += n * l + n * e; + /*todo*/ } else if (sub_type == "linear") { @@ -1920,139 +1751,362 @@ void Graph::flops_memops_sum() else if (op->type.substr(0, 2) == "nn") { std::string sub_type = op->type.substr(3); - if ( - sub_type == "BatchNorm1d" || - sub_type == "BatchNorm2d" || - sub_type == "BatchNorm3d" - ) - { + if (sub_type == "BatchNorm1d" + || sub_type == "BatchNorm2d" + || sub_type == "BatchNorm3d" + || sub_type == "GroupNorm" + || sub_type == "LayerNorm" + || sub_type == "InstanceNorm1d" + || sub_type == "InstanceNorm2d" + || sub_type == "InstanceNorm3d") + { + std::vector shape = op->inputs[0]->shape; int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; - int num_elements = 1; - for (size_t i = 2; i < op->inputs[0]->shape.size(); ++i) + int num_elements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + if((op->has_param("affine") && op->params.at("affine").b) + || (op->has_param("elementwise_affine") && op->params.at("elementwise_affine").b)) { - num_elements *= op->inputs[0]->shape[i]; + extra_flops += 2 * num_elements; + extra_memops += 2 * (num_elements + n * c); + } + else + { + extra_flops += num_elements; + extra_memops += num_elements; + } + } + else if (sub_type == "Conv1d" + || sub_type == "Conv2d" + || sub_type == "Conv3d" + || sub_type == "ConvTranspose1d" + || sub_type == "ConvTranspose2d" + || sub_type == "ConvTranspose3d") + { + int c = op->params.at("in_channels").i; + std::vector k = op->params.at("kernel_size").ai; + std::vector input_shape = op->inputs[0]->shape; + std::vector output_shape = op->outputs[0]->shape; + int g = op->params["groups"].i; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + int kernel_size = std::accumulate(k.begin() + 2, k.end(), 1, std::multiplies()); + flops += output_size * c * kernel_size / g; + memops += input_size + output_size + std::accumulate(k.begin(), k.end(), 1, std::multiplies()) * c / g; + if(op->has_param("bias")) + { + flops += output_size; + memops += output_size; + } + } + else if (sub_type == "AvgPool1d" + || sub_type == "AvgPool2d" + || sub_type == "AvgPool3d") + { + std::vector input_shape = op->inputs[0]->shape; + std::vector output_shape = op->outputs[0]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + flops += input_size; + memops += input_size + output_size; + } + else if (sub_type == "AdaptiveAvgPool1d" + || sub_type == "AdaptiveAvgPool2d" + || sub_type == "AdaptiveAvgPool3d") + { + std::vector input_shape = op->inputs[0]->shape; + std::vector output_shape = op->outputs[0]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + std::vector kernel_size; + for(size_t i = 2; i < input_shape.size(); i++) + { + kernel_size.emplace_back(output_shape[i] / input_shape[i]); + } + flops += (std::accumulate(kernel_size.begin(), kernel_size.end(), 1, std::multiplies()) + 1) * output_size; + memops += input_size + output_size; + } + else if(sub_type == "PReLU" + || sub_type == "ELU" + || sub_type == "LeakyReLU" + || sub_type == "GELU") + { + std::vector shape = op->outputs[0]->shape; + int n = shape[0]; + int num_elements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + extra_flops += num_elements; + if(sub_type == "PReLU") + { + extra_memops += 2 * num_elements + n * op->params["num_parameters"].i; + } + else + { + extra_memops += 2 * num_elements; } - extra_flops += 7 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; - } - else if (sub_type == "Conv1d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int k = op->inputs[1]->shape[0]; - int o = op->outputs[0]->shape[2]; - flops += 2 * n * c * l * k * o; - memops += 2 * n * c * l * k + n * o; - } - else if (sub_type == "Conv2d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int h = op->inputs[0]->shape[2]; - int w = op->inputs[0]->shape[3]; - int kh = op->inputs[1]->shape[2]; - int kw = op->inputs[1]->shape[3]; - int o = op->outputs[0]->shape[2]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; - int g = op->params.at("groups").i; - flops += 2 * n * c * h * w * kh * kw * o / g; - memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; } - else if (sub_type == "Conv3d") + else if(sub_type == "Tanh") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int kd = op->inputs[1]->shape[2]; - int kh = op->inputs[1]->shape[3]; - int kw = op->inputs[1]->shape[4]; - int o = op->outputs[0]->shape[2]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; - int g = op->params.at("groups").i; - flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; - memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; + std::vector shape = op->outputs[0]->shape; + int num_elements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); + extra_flops += 2 * num_elements; + extra_memops += 2 * num_elements; } - else if (sub_type == "ConvTranspose1d") + else if (sub_type == "Linear") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int k = op->inputs[1]->shape[0]; - int o = op->outputs[0]->shape[2]; - flops += 2 * n * c * l * k * o; - memops += 2 * n * c * l * k + n * o; + std::vector input_shape = op->inputs[0]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + std::vector output_shape = op->outputs[0]->shape; + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + int in_features = op->params.at("in_features").i; + int out_features = op->params.at("out_features").i; + int bias = op->has_param("bias") ? out_features : 0; + flops += (in_features * out_features + bias) * input_size / in_features; + memops += input_size + output_size + output_size * (bias ? 1 : 0); } - else if (sub_type == "ConvTranspose2d") + else if (sub_type == "Upsample" + || sub_type == "UnsampleBilinear2d" + || sub_type == "UnsampleNearest2d") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int h = op->inputs[0]->shape[2]; - int w = op->inputs[0]->shape[3]; - int kh = op->inputs[1]->shape[2]; - int kw = op->inputs[1]->shape[3]; - int o = op->outputs[0]->shape[2]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; - int g = op->params.at("groups").i; - flops += 2 * n * c * h * w * kh * kw * o / g; - memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; + std::vector input_shape = op->inputs[0]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + std::vector output_shape = op->outputs[0]->shape; + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + std::string mode; + if(sub_type == "Unsample") + { + mode = op->has_param("mode") ? op->params.at("mode").s : "nearest"; + } + else if(sub_type == "UnsampleBilinear2d") + { + mode = "bilinear"; + } + else if(sub_type == "UnsampleNearest2d") + { + mode = "nearest"; + } + + if(mode == "nearest") + { + extra_flops += input_size; + extra_memops += input_size + output_size; + } + else if(mode == "linear") + { + extra_flops += 5 * output_size; + extra_memops += 2 * input_size + output_size; + } + else if(mode == "bilinear") + { + extra_flops += 11 * output_size; + extra_memops += 4 * input_size + output_size; + } + else if(mode == "bicubic") + { + extra_flops += (224 + 35) * output_size; + extra_memops += 16 * input_size + output_size; + } + else if(mode == "trilinear") + { + extra_flops += (13 * 2 + 5) * input_size; + extra_memops += 8 * input_size + output_size; + } } - else if (sub_type == "PReLU") + else if(sub_type == "RNN") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int num_elements = 1; - for (size_t i = 2; i < op->inputs[0]->shape.size(); ++i) + bool bi = op->has_param("bidirectional") && op->params.at("bidirectional").b; + bool bias = op->has_param("bias") && op->params.at("bias").b; + int input_size = op->params.at("input_size").i; + int hidden_size = op->params.at("hidden_size").i; + int flops1 = hidden_size * (input_size + hidden_size) + hidden_size; + if(bias) + { + flops1 += 2 * hidden_size; + } + if(bi) + { + flops1 *= 2; + } + + int num_layers = op->params.at("num_layers").i; + int flops2 = 0; + if(bi) + { + flops2 = 3 * hidden_size * hidden_size + hidden_size; + if(bias) + { + flops2 += 2 * hidden_size; + } + flops2 *= 2 * num_layers; + } + else { - num_elements *= op->inputs[0]->shape[i]; + flops2 = 2 * hidden_size * hidden_size + hidden_size; + if(bias) + { + flops2 += 2 * hidden_size; + } + flops2 *= num_layers; + } + bool batch_first = op->has_param("batch_first") && op->params.at("batch_first").b; + int batch_size = batch_first ? op->inputs[0]->shape[0] : op->inputs[0]->shape[1]; + int num_steps = batch_first ? op->inputs[0]->shape[1] : op->inputs[0]->shape[0]; + flops += (flops1 + flops2) * num_steps * batch_size; + memops += num_steps * batch_size * input_size; + memops += 2 * num_steps * batch_size * hidden_size * num_layers * (bi ? 2 : 1); + if(bias) + { + memops += 2 * hidden_size * num_layers * (bi ? 2 : 1); } - extra_flops += 2 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; } - else if (sub_type == "ConvTranspose3d") + else if(sub_type == "LSTM") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int kd = op->inputs[1]->shape[2]; - int kh = op->inputs[1]->shape[3]; - int kw = op->inputs[1]->shape[4]; - int o = op->outputs[0]->shape[2]; - int s = op->params.at("stride").ai[0]; - int p = op->params.at("padding").ai[0]; - int g = op->params.at("groups").i; - flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; - memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; + bool bi = op->has_param("bidirectional") && op->params.at("bidirectional").b; + bool bias = op->has_param("bias") && op->params.at("bias").b; + int input_size = op->params.at("input_size").i; + int hidden_size = op->params.at("hidden_size").i; + int flops1 = 4 * hidden_size * (input_size + hidden_size) + 4 * hidden_size; + if(bias) + { + flops1 += 8 * hidden_size; + } + if(bi) + { + flops1 *= 2; + } + flops1 += 4 * hidden_size; + + int num_layers = op->params.at("num_layers").i; + int flops2 = 0; + if(bi) + { + flops2 = 12 * hidden_size * hidden_size + 4 * hidden_size; + if(bias) + { + flops2 += 8 * hidden_size; + } + flops2 += 4 * hidden_size; + flops2 *= 2 * num_layers; + } + else + { + flops2 = 4 * hidden_size * hidden_size + 4 * hidden_size; + if(bias) + { + flops2 += 8 * hidden_size; + } + flops2 += 4 * hidden_size; + flops2 *= num_layers; + } + bool batch_first = op->has_param("batch_first") && op->params.at("batch_first").b; + int batch_size = batch_first ? op->inputs[0]->shape[0] : op->inputs[0]->shape[1]; + int num_steps = batch_first ? op->inputs[0]->shape[1] : op->inputs[0]->shape[0]; + flops += (flops1 + flops2) * num_steps * batch_size; + memops += num_steps * batch_size * input_size; + memops += 2 * num_steps * batch_size * hidden_size * num_layers * (bi ? 2 : 1); + if(bias) + { + memops += 8 * hidden_size * num_layers * (bi ? 2 : 1); + } } - else if (sub_type == "Embedding") + else if (sub_type == "GRU") { - int n = op->inputs[0]->shape[0]; - int l = op->inputs[0]->shape[1]; - int c = op->params.at("num_embeddings").i; - int e = op->params.at("embedding_dim").i; - extra_flops += 2 * n * l * e; - extra_memops += 2 * n * l + n * e; + bool bi = op->has_param("bidirectional") && op->params.at("bidirectional").b; + bool bias = op->has_param("bias") && op->params.at("bias").b; + int input_size = op->params.at("input_size").i; + int hidden_size = op->params.at("hidden_size").i; + int flops1 = 3 * hidden_size * (input_size + hidden_size) + 3 * hidden_size; + if(bias) + { + flops1 += 6 * hidden_size; + } + flops1 += 4 * hidden_size; + if(bi) + { + flops1 *= 2; + } + + int num_layers = op->params.at("num_layers").i; + int flops2 = 0; + if(bi) + { + flops2 = 9 * hidden_size * hidden_size + 3 * hidden_size; + if(bias) + { + flops2 += 6 * hidden_size; + } + flops2 += 4 * hidden_size; + flops2 *= 2 * num_layers; + } + else + { + flops2 = 6 * hidden_size * hidden_size + 3 * hidden_size; + if(bias) + { + flops2 += 6 * hidden_size; + } + flops2 += 4 * hidden_size; + flops2 *= num_layers; + } + bool batch_first = op->has_param("batch_first") && op->params.at("batch_first").b; + int batch_size = batch_first ? op->inputs[0]->shape[0] : op->inputs[0]->shape[1]; + int num_steps = batch_first ? op->inputs[0]->shape[1] : op->inputs[0]->shape[0]; + flops += (flops1 + flops2) * num_steps * batch_size; + memops += num_steps * batch_size * input_size; + memops += 2 * num_steps * batch_size * hidden_size * num_layers * (bi ? 2 : 1); + if(bias) + { + memops += 6 * hidden_size * num_layers * (bi ? 2 : 1); + } } - else if (sub_type == "GroupNorm" || sub_type == "InstanceNorm" || sub_type == "LayerNorm") + else if(sub_type == "MultiheadAttention") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int num_elements = 1; - for (size_t i = 2; i < op->inputs[0]->shape.size(); ++i) + bool batch_first = op->has_param("batch_first") && op->params.at("batch_first").b; + int batch_size = batch_first ? op->inputs[0]->shape[0] : op->inputs[0]->shape[1]; + int qlen = batch_first ? op->inputs[0]->shape[1] : op->inputs[0]->shape[0]; + int klen = batch_first ? op->inputs[1]->shape[1] : op->inputs[1]->shape[0]; + int d_model = op->params.at("embed_dim").i; + int num_heads = op->params.at("num_heads").i; + int head_dim = d_model / num_heads; + bool bias = op->params.at("bias").b; + + // Linear transformations for Q, K, V + int flops_qkv = 3 * batch_size * qlen * d_model * d_model; + if (bias) + { + flops_qkv += 3 * batch_size * qlen * d_model; + } + + // Scaled dot-product attention + int flops_attention = batch_size * num_heads * qlen * klen * head_dim; + + // Linear transformation for output + int flops_output = batch_size * qlen * d_model * d_model; + if (bias) + { + flops_output += batch_size * qlen * d_model; + } + + flops += flops_qkv + flops_attention + flops_output; + + // Memory operations for Q, K, V + int memops_qkv = 3 * batch_size * qlen * d_model; + if (bias) + { + memops_qkv += 3 * d_model; + } + + // Memory operations for attention weights + int memops_attention = batch_size * num_heads * qlen * klen; + + // Memory operations for output + int memops_output = batch_size * qlen * d_model; + if (bias) { - num_elements *= op->inputs[0]->shape[i]; + memops_output += d_model; } - extra_flops += 7 * n * c * num_elements; - extra_memops += 2 * n * c * num_elements; + // Total memory operations + memops += memops_qkv + memops_attention + memops_output; } } } From 4adf254c729edc63ef0e7deb86b3bbb3cbf079c9 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Thu, 19 Sep 2024 16:02:54 +0800 Subject: [PATCH 14/16] functional finished --- tools/pnnx/src/ir.cpp | 335 +++++++----------------------------------- 1 file changed, 53 insertions(+), 282 deletions(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index 6cbf320acf41..f05ca65a3c59 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -1454,297 +1454,68 @@ void Graph::flops_memops_sum() if (op->type[0] == 'F') { std::string sub_type = op->type.substr(2); - if (sub_type == "adaptive_avg_pool1d") + if (sub_type == "linear") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int o = op->params.at("output_size").ai[0]; - flops += n * c * l * o; - memops += n * c * l + n * c * o; - } - else if (sub_type == "adaptive_avg_pool2d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int h = op->inputs[0]->shape[2]; - int w = op->inputs[0]->shape[3]; - int oh = op->params.at("output_size").ai[0]; - int ow = op->params.at("output_size").ai[1]; - flops += n * c * h * w * oh * ow; - memops += n * c * h * w + n * c * oh * ow; - } - else if (sub_type == "adaptive_avg_pool3d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int od = op->params.at("output_size").ai[0]; - int oh = op->params.at("output_size").ai[1]; - int ow = op->params.at("output_size").ai[2]; - flops += n * c * d * h * w * od * oh * ow; - memops += n * c * d * h * w + n * c * od * oh * ow; - } - else if (sub_type == "avg_pool1d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int k = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; - int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int o = (l + 2 * p - k) / s + 1; - flops += n * c * l * k; - memops += n * c * l + n * c * o; - } - else if (sub_type == "avg_pool2d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int h = op->inputs[0]->shape[2]; - int w = op->inputs[0]->shape[3]; - int kh = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; - int kw = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[1] : 1; - int sh = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int sw = op->has_param("stride") ? op->params.at("stride").ai[1] : 1; - int ph = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int pw = op->has_param("padding") ? op->params.at("padding").ai[1] : 0; - int oh = (h + 2 * ph - kh) / sh + 1; - int ow = (w + 2 * pw - kw) / sw + 1; - flops += n * c * h * w * kh * kw; - memops += n * c * h * w + n * c * oh * ow; - } - else if (sub_type == "avg_pool3d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int kd = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; - int kh = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[1] : 1; - int kw = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[2] : 1; - int sd = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int sh = op->has_param("stride") ? op->params.at("stride").ai[1] : 1; - int sw = op->has_param("stride") ? op->params.at("stride").ai[2] : 1; - int pd = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int ph = op->has_param("padding") ? op->params.at("padding").ai[1] : 0; - int pw = op->has_param("padding") ? op->params.at("padding").ai[2] : 0; - int od = (d + 2 * pd - kd) / sd + 1; - int oh = (h + 2 * ph - kh) / sh + 1; - int ow = (w + 2 * pw - kw) / sw + 1; - flops += n * c * d * h * w * kd * kh * kw; - memops += n * c * d * h * w + n * c * od * oh * ow; - } - else if (sub_type == "adaptive_max_pool1d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int o = op->params.at("output_size").ai[0]; - flops += n * c * l * o; - memops += n * c * l + n * c * o; - } - else if (sub_type == "adaptive_max_pool2d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int h = op->inputs[0]->shape[2]; - int w = op->inputs[0]->shape[3]; - int oh = op->params.at("output_size").ai[0]; - int ow = op->params.at("output_size").ai[1]; - flops += n * c * h * w * oh * ow; - memops += n * c * h * w + n * c * oh * ow; - } - else if (sub_type == "adaptive_max_pool3d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int od = op->params.at("output_size").ai[0]; - int oh = op->params.at("output_size").ai[1]; - int ow = op->params.at("output_size").ai[2]; - flops += n * c * d * h * w * od * oh * ow; - memops += n * c * d * h * w + n * c * od * oh * ow; - } - else if (sub_type == "max_pool1d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int k = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; - int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int o = (l + 2 * p - k) / s + 1; - flops += n * c * l * k; - memops += n * c * l + n * c * o; - } - else if (sub_type == "max_pool2d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int h = op->inputs[0]->shape[2]; - int w = op->inputs[0]->shape[3]; - int kh = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; - int kw = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[1] : 1; - int sh = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int sw = op->has_param("stride") ? op->params.at("stride").ai[1] : 1; - int ph = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int pw = op->has_param("padding") ? op->params.at("padding").ai[1] : 0; - int oh = (h + 2 * ph - kh) / sh + 1; - int ow = (w + 2 * pw - kw) / sw + 1; - flops += n * c * h * w * kh * kw; - memops += n * c * h * w + n * c * oh * ow; - } - else if (sub_type == "max_pool3d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int kd = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[0] : 1; - int kh = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[1] : 1; - int kw = op->has_param("kernel_size") ? op->params.at("kernel_size").ai[2] : 1; - int sd = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int sh = op->has_param("stride") ? op->params.at("stride").ai[1] : 1; - int sw = op->has_param("stride") ? op->params.at("stride").ai[2] : 1; - int pd = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int ph = op->has_param("padding") ? op->params.at("padding").ai[1] : 0; - int pw = op->has_param("padding") ? op->params.at("padding").ai[2] : 0; - int od = (d + 2 * pd - kd) / sd + 1; - int oh = (h + 2 * ph - kh) / sh + 1; - int ow = (w + 2 * pw - kw) / sw + 1; - flops += n * c * d * h * w * kd * kh * kw; - memops += n * c * d * h * w + n * c * od * oh * ow; - } - else if (sub_type == "prelu" || sub_type == "leaky_relu") - { - - } - else if (sub_type == "conv1d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int k = op->inputs[1]->shape[0]; - int o = op->outputs[0]->shape[2]; - flops += 2 * n * c * l * k * o; - memops += 2 * n * c * l * k + n * o; - } - else if (sub_type == "conv2d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int h = op->inputs[0]->shape[2]; - int w = op->inputs[0]->shape[3]; - int kh = op->inputs[1]->shape[2]; - int kw = op->inputs[1]->shape[3]; - int o = op->outputs[0]->shape[2]; - int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int g = op->has_param("groups") ? op->params.at("groups").i : 1; - flops += 2 * n * c * h * w * kh * kw * o / g; - memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; - } - else if (sub_type == "conv3d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int kd = op->inputs[1]->shape[2]; - int kh = op->inputs[1]->shape[3]; - int kw = op->inputs[1]->shape[4]; - int o = op->outputs[0]->shape[2]; - int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int g = op->has_param("groups") ? op->params.at("groups").i : 1; - flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; - memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; - } - else if (sub_type == "conv_transpose1d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - int k = op->inputs[1]->shape[0]; - int o = op->outputs[0]->shape[2]; - flops += 2 * n * c * l * k * o; - memops += 2 * n * c * l * k + n * o; - } - else if (sub_type == "conv_transpose2d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int h = op->inputs[0]->shape[2]; - int w = op->inputs[0]->shape[3]; - int kh = op->inputs[1]->shape[2]; - int kw = op->inputs[1]->shape[3]; - int o = op->outputs[0]->shape[2]; - int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int g = op->has_param("groups") ? op->params.at("groups").i : 1; - flops += 2 * n * c * h * w * kh * kw * o / g; - memops += 2 * n * c * h * w * kh * kw / g + n * o * h * w; - } - else if (sub_type == "conv_transpose3d") - { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - int h = op->inputs[0]->shape[3]; - int w = op->inputs[0]->shape[4]; - int kd = op->inputs[1]->shape[2]; - int kh = op->inputs[1]->shape[3]; - int kw = op->inputs[1]->shape[4]; - int o = op->outputs[0]->shape[2]; - int s = op->has_param("stride") ? op->params.at("stride").ai[0] : 1; - int p = op->has_param("padding") ? op->params.at("padding").ai[0] : 0; - int g = op->has_param("groups") ? op->params.at("groups").i : 1; - flops += 2 * n * c * d * h * w * kd * kh * kw * o / g; - memops += 2 * n * c * d * h * w * kd * kh * kw / g + n * o * d * h * w; - } - else if (sub_type == "embedding") - { - /*todo*/ + std::vector input_shape = op->inputs[0]->shape; + std::vector output_shape = op->outputs[0]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + int out_features = op->attrs.at("data").shape[0]; + flops += input_size * out_features; + if(op->has_param("bias")) + { + flops += out_features; + } + memops += input_size + output_size; } - else if (sub_type == "linear") + else if (sub_type == "avgpool1d" + || sub_type == "avgpool2d" + || sub_type == "avgpool3d" + || sub_type == "adaptive_avgpool1d" + || sub_type == "adaptive_avgpool2d" + || sub_type == "adaptive_avgpool3d") { - int n = op->inputs[0]->shape[0]; - int i = op->inputs[0]->shape[1]; - int o = op->outputs[0]->shape[1]; - flops += 2 * n * i * o; - memops += 2 * n * i + n * o; + std::vector input_shape = op->inputs[0]->shape; + std::vector output_shape = op->outputs[0]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + flops += input_size; + memops += input_size + output_size; } - else if (sub_type == "log_softmax") + else if (sub_type == "prelu" + || sub_type == "elu" + || sub_type == "leaky_relu" + || sub_type == "gelu" + || sub_type == "silu" + || sub_type == "softmax") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - extra_flops += 2 * n * c * l; - extra_memops += 2 * n * c * l; + std::vector input_shape = op->inputs[0]->shape; + std::vector output_shape = op->outputs[0]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + extra_flops += input_size; + extra_memops += input_size + output_size; } - else if (sub_type == "logsigmoid") + else if (sub_type == "unsample" + || sub_type == "upsample_nearest" + || sub_type == "upsample_bilinear") { - int n = op->inputs[0]->shape[0]; - int c = op->inputs[0]->shape[1]; - int l = op->inputs[0]->shape[2]; - extra_flops += 2 * n * c * l; - extra_memops += 2 * n * c * l; + std::vector input_shape = op->inputs[0]->shape; + std::vector output_shape = op->outputs[0]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + extra_flops += output_size; + extra_memops += input_size + output_size; } - else if (sub_type == "scaled_dot_product_attention") + else if (sub_type == "interpolate") { - int n = op->inputs[0]->shape[0]; - int l = op->inputs[0]->shape[1]; - int d = op->inputs[0]->shape[2]; - flops += 2 * n * l * l + n * l * d + n * l * l * d; - memops += 2 * n * l * d + 3 * n * l * l + n * l; + std::vector input_shape = op->inputs[0]->shape; + std::vector output_shape = op->outputs[0]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + std::vector scale_factor = op->params.at("scale_factor").ai; + extra_flops += input_size * std::accumulate(scale_factor.begin(), scale_factor.end(), 1, std::multiplies()); + extra_memops += input_size + output_size; } } From 296954dee81b2b2bc8c1b87906c121031d538191 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Fri, 20 Sep 2024 12:56:07 +0800 Subject: [PATCH 15/16] all finished --- tools/pnnx/src/ir.cpp | 45 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index f05ca65a3c59..e4974f6b7c87 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -1450,7 +1450,6 @@ void Graph::flops_memops_sum() { for (auto op : ops) { - fprintf(stderr, "op->type: %s\n", op->type.c_str()); if (op->type[0] == 'F') { std::string sub_type = op->type.substr(2); @@ -1880,6 +1879,50 @@ void Graph::flops_memops_sum() memops += memops_qkv + memops_attention + memops_output; } } + + else if (op->type.substr(0, 5) == "torch") + { + std::string sub_type = op->type.substr(6); + if(sub_type == "matmul" + || sub_type == "mm" + || sub_type == "bmm") + { + std::vector input_shape_1 = op->inputs[0]->shape; + std::vector input_shape_2 = op->inputs[1]->shape; + int input_size_1 = std::accumulate(input_shape_1.begin(), input_shape_1.end(), 1, std::multiplies()); + int input_size_2 = std::accumulate(input_shape_2.begin(), input_shape_2.end(), 1, std::multiplies()); + std::vector output_shape = op->outputs[0]->shape; + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + flops += input_size_1 * input_shape_2.back(); + memops += input_size_1 + input_size_2 + output_size; + } + else if (sub_type == "addmm" + || sub_type == "baddbmm") + { + std::vector input_shape = op->inputs[0]->shape; + std::vector mat_shape_1 = op->inputs[1]->shape; + std::vector mat_shape_2 = op->inputs[2]->shape; + int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); + int mat_size_1 = std::accumulate(mat_shape_1.begin(), mat_shape_1.end(), 1, std::multiplies()); + int mat_size_2 = std::accumulate(mat_shape_2.begin(), mat_shape_2.end(), 1, std::multiplies()); + std::vector output_shape = op->outputs[0]->shape; + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + flops += input_size + mat_size_1 * mat_shape_2.back(); + memops += input_size + mat_size_1 + mat_size_2 + output_size; + } + else if (sub_type == "mul" + || sub_type == "add") + { + std::vector input_shape_1 = op->inputs[0]->shape; + std::vector input_shape_2 = op->inputs[1]->shape; + int input_size_1 = std::accumulate(input_shape_1.begin(), input_shape_1.end(), 1, std::multiplies()); + int input_size_2 = std::accumulate(input_shape_2.begin(), input_shape_2.end(), 1, std::multiplies()); + std::vector output_shape = op->outputs[0]->shape; + int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); + flops += output_size; + memops += input_size_1 + input_size_2 + output_size; + } + } } } From 5ff6210585d6d8d48a7050245c979501b2829b49 Mon Sep 17 00:00:00 2001 From: SZUwishion <2559916473@qq.com> Date: Tue, 15 Oct 2024 23:09:30 +0800 Subject: [PATCH 16/16] code format fix --- tools/pnnx/src/ir.cpp | 132 ++++++++++++++++++++-------------------- tools/pnnx/src/main.cpp | 2 +- 2 files changed, 67 insertions(+), 67 deletions(-) diff --git a/tools/pnnx/src/ir.cpp b/tools/pnnx/src/ir.cpp index e4974f6b7c87..c81944c12052 100644 --- a/tools/pnnx/src/ir.cpp +++ b/tools/pnnx/src/ir.cpp @@ -1461,18 +1461,18 @@ void Graph::flops_memops_sum() int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); int out_features = op->attrs.at("data").shape[0]; flops += input_size * out_features; - if(op->has_param("bias")) + if (op->has_param("bias")) { flops += out_features; } memops += input_size + output_size; } else if (sub_type == "avgpool1d" - || sub_type == "avgpool2d" - || sub_type == "avgpool3d" - || sub_type == "adaptive_avgpool1d" - || sub_type == "adaptive_avgpool2d" - || sub_type == "adaptive_avgpool3d") + || sub_type == "avgpool2d" + || sub_type == "avgpool3d" + || sub_type == "adaptive_avgpool1d" + || sub_type == "adaptive_avgpool2d" + || sub_type == "adaptive_avgpool3d") { std::vector input_shape = op->inputs[0]->shape; std::vector output_shape = op->outputs[0]->shape; @@ -1482,11 +1482,11 @@ void Graph::flops_memops_sum() memops += input_size + output_size; } else if (sub_type == "prelu" - || sub_type == "elu" - || sub_type == "leaky_relu" - || sub_type == "gelu" - || sub_type == "silu" - || sub_type == "softmax") + || sub_type == "elu" + || sub_type == "leaky_relu" + || sub_type == "gelu" + || sub_type == "silu" + || sub_type == "softmax") { std::vector input_shape = op->inputs[0]->shape; std::vector output_shape = op->outputs[0]->shape; @@ -1496,8 +1496,8 @@ void Graph::flops_memops_sum() extra_memops += input_size + output_size; } else if (sub_type == "unsample" - || sub_type == "upsample_nearest" - || sub_type == "upsample_bilinear") + || sub_type == "upsample_nearest" + || sub_type == "upsample_bilinear") { std::vector input_shape = op->inputs[0]->shape; std::vector output_shape = op->outputs[0]->shape; @@ -1534,7 +1534,7 @@ void Graph::flops_memops_sum() int n = op->inputs[0]->shape[0]; int c = op->inputs[0]->shape[1]; int num_elements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); - if((op->has_param("affine") && op->params.at("affine").b) + if ((op->has_param("affine") && op->params.at("affine").b) || (op->has_param("elementwise_affine") && op->params.at("elementwise_affine").b)) { extra_flops += 2 * num_elements; @@ -1547,11 +1547,11 @@ void Graph::flops_memops_sum() } } else if (sub_type == "Conv1d" - || sub_type == "Conv2d" - || sub_type == "Conv3d" - || sub_type == "ConvTranspose1d" - || sub_type == "ConvTranspose2d" - || sub_type == "ConvTranspose3d") + || sub_type == "Conv2d" + || sub_type == "Conv3d" + || sub_type == "ConvTranspose1d" + || sub_type == "ConvTranspose2d" + || sub_type == "ConvTranspose3d") { int c = op->params.at("in_channels").i; std::vector k = op->params.at("kernel_size").ai; @@ -1561,17 +1561,17 @@ void Graph::flops_memops_sum() int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); int kernel_size = std::accumulate(k.begin() + 2, k.end(), 1, std::multiplies()); - flops += output_size * c * kernel_size / g; + flops += output_size * c * kernel_size / g; memops += input_size + output_size + std::accumulate(k.begin(), k.end(), 1, std::multiplies()) * c / g; - if(op->has_param("bias")) + if (op->has_param("bias")) { flops += output_size; memops += output_size; } } else if (sub_type == "AvgPool1d" - || sub_type == "AvgPool2d" - || sub_type == "AvgPool3d") + || sub_type == "AvgPool2d" + || sub_type == "AvgPool3d") { std::vector input_shape = op->inputs[0]->shape; std::vector output_shape = op->outputs[0]->shape; @@ -1581,31 +1581,31 @@ void Graph::flops_memops_sum() memops += input_size + output_size; } else if (sub_type == "AdaptiveAvgPool1d" - || sub_type == "AdaptiveAvgPool2d" - || sub_type == "AdaptiveAvgPool3d") + || sub_type == "AdaptiveAvgPool2d" + || sub_type == "AdaptiveAvgPool3d") { std::vector input_shape = op->inputs[0]->shape; std::vector output_shape = op->outputs[0]->shape; int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); std::vector kernel_size; - for(size_t i = 2; i < input_shape.size(); i++) + for (size_t i = 2; i < input_shape.size(); i++) { kernel_size.emplace_back(output_shape[i] / input_shape[i]); } flops += (std::accumulate(kernel_size.begin(), kernel_size.end(), 1, std::multiplies()) + 1) * output_size; memops += input_size + output_size; } - else if(sub_type == "PReLU" - || sub_type == "ELU" - || sub_type == "LeakyReLU" - || sub_type == "GELU") + else if (sub_type == "PReLU" + || sub_type == "ELU" + || sub_type == "LeakyReLU" + || sub_type == "GELU") { std::vector shape = op->outputs[0]->shape; int n = shape[0]; int num_elements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); extra_flops += num_elements; - if(sub_type == "PReLU") + if (sub_type == "PReLU") { extra_memops += 2 * num_elements + n * op->params["num_parameters"].i; } @@ -1614,7 +1614,7 @@ void Graph::flops_memops_sum() extra_memops += 2 * num_elements; } } - else if(sub_type == "Tanh") + else if (sub_type == "Tanh") { std::vector shape = op->outputs[0]->shape; int num_elements = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); @@ -1634,75 +1634,75 @@ void Graph::flops_memops_sum() memops += input_size + output_size + output_size * (bias ? 1 : 0); } else if (sub_type == "Upsample" - || sub_type == "UnsampleBilinear2d" - || sub_type == "UnsampleNearest2d") + || sub_type == "UnsampleBilinear2d" + || sub_type == "UnsampleNearest2d") { std::vector input_shape = op->inputs[0]->shape; int input_size = std::accumulate(input_shape.begin(), input_shape.end(), 1, std::multiplies()); std::vector output_shape = op->outputs[0]->shape; int output_size = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies()); std::string mode; - if(sub_type == "Unsample") + if (sub_type == "Unsample") { mode = op->has_param("mode") ? op->params.at("mode").s : "nearest"; } - else if(sub_type == "UnsampleBilinear2d") + else if (sub_type == "UnsampleBilinear2d") { mode = "bilinear"; } - else if(sub_type == "UnsampleNearest2d") + else if (sub_type == "UnsampleNearest2d") { mode = "nearest"; } - if(mode == "nearest") + if (mode == "nearest") { extra_flops += input_size; extra_memops += input_size + output_size; } - else if(mode == "linear") + else if (mode == "linear") { extra_flops += 5 * output_size; extra_memops += 2 * input_size + output_size; } - else if(mode == "bilinear") + else if (mode == "bilinear") { extra_flops += 11 * output_size; extra_memops += 4 * input_size + output_size; } - else if(mode == "bicubic") + else if (mode == "bicubic") { extra_flops += (224 + 35) * output_size; extra_memops += 16 * input_size + output_size; } - else if(mode == "trilinear") + else if (mode == "trilinear") { extra_flops += (13 * 2 + 5) * input_size; extra_memops += 8 * input_size + output_size; } } - else if(sub_type == "RNN") + else if (sub_type == "RNN") { bool bi = op->has_param("bidirectional") && op->params.at("bidirectional").b; bool bias = op->has_param("bias") && op->params.at("bias").b; int input_size = op->params.at("input_size").i; int hidden_size = op->params.at("hidden_size").i; int flops1 = hidden_size * (input_size + hidden_size) + hidden_size; - if(bias) + if (bias) { flops1 += 2 * hidden_size; } - if(bi) + if (bi) { flops1 *= 2; } int num_layers = op->params.at("num_layers").i; int flops2 = 0; - if(bi) + if (bi) { flops2 = 3 * hidden_size * hidden_size + hidden_size; - if(bias) + if (bias) { flops2 += 2 * hidden_size; } @@ -1711,7 +1711,7 @@ void Graph::flops_memops_sum() else { flops2 = 2 * hidden_size * hidden_size + hidden_size; - if(bias) + if (bias) { flops2 += 2 * hidden_size; } @@ -1723,23 +1723,23 @@ void Graph::flops_memops_sum() flops += (flops1 + flops2) * num_steps * batch_size; memops += num_steps * batch_size * input_size; memops += 2 * num_steps * batch_size * hidden_size * num_layers * (bi ? 2 : 1); - if(bias) + if (bias) { memops += 2 * hidden_size * num_layers * (bi ? 2 : 1); } } - else if(sub_type == "LSTM") + else if (sub_type == "LSTM") { bool bi = op->has_param("bidirectional") && op->params.at("bidirectional").b; bool bias = op->has_param("bias") && op->params.at("bias").b; int input_size = op->params.at("input_size").i; int hidden_size = op->params.at("hidden_size").i; int flops1 = 4 * hidden_size * (input_size + hidden_size) + 4 * hidden_size; - if(bias) + if (bias) { flops1 += 8 * hidden_size; } - if(bi) + if (bi) { flops1 *= 2; } @@ -1747,10 +1747,10 @@ void Graph::flops_memops_sum() int num_layers = op->params.at("num_layers").i; int flops2 = 0; - if(bi) + if (bi) { flops2 = 12 * hidden_size * hidden_size + 4 * hidden_size; - if(bias) + if (bias) { flops2 += 8 * hidden_size; } @@ -1760,7 +1760,7 @@ void Graph::flops_memops_sum() else { flops2 = 4 * hidden_size * hidden_size + 4 * hidden_size; - if(bias) + if (bias) { flops2 += 8 * hidden_size; } @@ -1773,7 +1773,7 @@ void Graph::flops_memops_sum() flops += (flops1 + flops2) * num_steps * batch_size; memops += num_steps * batch_size * input_size; memops += 2 * num_steps * batch_size * hidden_size * num_layers * (bi ? 2 : 1); - if(bias) + if (bias) { memops += 8 * hidden_size * num_layers * (bi ? 2 : 1); } @@ -1785,22 +1785,22 @@ void Graph::flops_memops_sum() int input_size = op->params.at("input_size").i; int hidden_size = op->params.at("hidden_size").i; int flops1 = 3 * hidden_size * (input_size + hidden_size) + 3 * hidden_size; - if(bias) + if (bias) { flops1 += 6 * hidden_size; } flops1 += 4 * hidden_size; - if(bi) + if (bi) { flops1 *= 2; } int num_layers = op->params.at("num_layers").i; int flops2 = 0; - if(bi) + if (bi) { flops2 = 9 * hidden_size * hidden_size + 3 * hidden_size; - if(bias) + if (bias) { flops2 += 6 * hidden_size; } @@ -1810,7 +1810,7 @@ void Graph::flops_memops_sum() else { flops2 = 6 * hidden_size * hidden_size + 3 * hidden_size; - if(bias) + if (bias) { flops2 += 6 * hidden_size; } @@ -1823,12 +1823,12 @@ void Graph::flops_memops_sum() flops += (flops1 + flops2) * num_steps * batch_size; memops += num_steps * batch_size * input_size; memops += 2 * num_steps * batch_size * hidden_size * num_layers * (bi ? 2 : 1); - if(bias) + if (bias) { memops += 6 * hidden_size * num_layers * (bi ? 2 : 1); } } - else if(sub_type == "MultiheadAttention") + else if (sub_type == "MultiheadAttention") { bool batch_first = op->has_param("batch_first") && op->params.at("batch_first").b; int batch_size = batch_first ? op->inputs[0]->shape[0] : op->inputs[0]->shape[1]; @@ -1883,7 +1883,7 @@ void Graph::flops_memops_sum() else if (op->type.substr(0, 5) == "torch") { std::string sub_type = op->type.substr(6); - if(sub_type == "matmul" + if (sub_type == "matmul" || sub_type == "mm" || sub_type == "bmm") { @@ -1897,7 +1897,7 @@ void Graph::flops_memops_sum() memops += input_size_1 + input_size_2 + output_size; } else if (sub_type == "addmm" - || sub_type == "baddbmm") + || sub_type == "baddbmm") { std::vector input_shape = op->inputs[0]->shape; std::vector mat_shape_1 = op->inputs[1]->shape; @@ -1911,7 +1911,7 @@ void Graph::flops_memops_sum() memops += input_size + mat_size_1 + mat_size_2 + output_size; } else if (sub_type == "mul" - || sub_type == "add") + || sub_type == "add") { std::vector input_shape_1 = op->inputs[0]->shape; std::vector input_shape_2 = op->inputs[1]->shape; diff --git a/tools/pnnx/src/main.cpp b/tools/pnnx/src/main.cpp index 949680faab82..a50ca679fbc6 100644 --- a/tools/pnnx/src/main.cpp +++ b/tools/pnnx/src/main.cpp @@ -362,7 +362,7 @@ int main(int argc, char** argv) pnnx_graph.save(pnnxparampath, pnnxbinpath); pnnx_graph.python(pnnxpypath, pnnxbinpath); - + pnnx_graph.flops_memops_sum(); fprintf(stderr, "float ops = %.3fM\n", double(pnnx_graph.flops) / 1e6); fprintf(stderr, "mem ops = %.3fM\n", double(pnnx_graph.memops) / 1e6);