apache · Hzfengsy · Nov 2, 2024 · Oct 28, 2024
diff --git a/cmake/modules/contrib/Mrvl.cmake b/cmake/modules/contrib/Mrvl.cmake
@@ -20,6 +20,7 @@ if(USE_MRVL)
   message(STATUS "Build with Mrvl support")
   file(GLOB RUNTIME_MRVL_SRCS
     src/runtime/contrib/mrvl/mrvl_runtime.cc
+    src/runtime/contrib/mrvl/mrvl_hw_runtime.cc
     src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.cc
   )
   list(APPEND RUNTIME_SRCS ${RUNTIME_MRVL_SRCS})

diff --git a/docs/how_to/deploy/mrvl.rst b/docs/how_to/deploy/mrvl.rst
@@ -100,11 +100,11 @@ integrated MLIP cn10ka processor, using only 4 tiles in the block.
     python3 -m tvm.driver.tvmc compile --target="mrvl, llvm" \
         --target-llvm-mtriple=aarch64-linux-gnu --target-llvm-mcpu=neoverse-n2 \
         --target-mrvl-num_tiles=4 \
+        --target-mrvl-mattr="hw -quantize=fp16 -wb_pin_ocm=1" \
         --cross-compiler aarch64-linux-gnu-gcc \
         --output model.tar \
         mnist-12.onnx
 
-The runtime support for hardware acceleration is a WIP, it will be added in future PR.
 
 3.3. TVMC Compiler: mrvl specific Command Line Options
 ------------------------------------------------------
@@ -125,7 +125,7 @@ The runtime support for hardware acceleration is a WIP, it will be added in futu
     Maximum number of tiles that may be used, possible values = {1,2,4,8}, defaults to 8
 
 * mattr:
-    Attributes for mrvl; possible values = {quantize, wb_pin_ocm}
+    Attributes for mrvl; possible values = {quantize, wb_pin_ocm, run_mode}
 
     mattr specifies the data type, code generation options and optimizations.
 
@@ -141,15 +141,23 @@ The runtime support for hardware acceleration is a WIP, it will be added in futu
     Optimize runtime by preloading a model's weights and bias into
     the on chip memory. Possible values = {0, 1}. Default is 0 (no preload)
 
-4. Compile ONNX model for Simulator + LLVM / x86_64 target
-----------------------------------------------------------
+    **3. run_mode**
+
+    Specify whether to compile for the simulator or for the target hardware (Octeon).
+    Possible values = {sim, hw}. Default is sim (software simulator).
+
+4. Compile ONNX model using the TVMC flow
+-----------------------------------------
 
 In the TVMC mrvl flow, the model is partitioned into Marvell and LLVM regions.
 Building each partitioned Marvell subgraph generates serialized nodes.json and
 const.json. Partitioned nodes.json is the representation of the model graph which is
 suitable for the Marvell compiler (mrvl-tmlc). The compiler compiles the model graph to
 generate the model binary with MLIP instructions.
 
+4.1 Compile and Run ONNX model for Simulator + LLVM / x86_64 target
+--------------------------------------------------------------------
+
 **Model Compilation for Simulator + LLVM / x86_64 target**
 
 .. code:: python
@@ -165,6 +173,23 @@ Generated model binary is simulated using Marvell's MLIP Simulator(mrvl-mlsim).
 
     python3 -m tvm.driver.tvmc run --inputs infer.npz --outputs predict.npz model.tar --number=0
 
+4.2 Compile and Run ONNX model for Octeon target
+----------------------------------------------------------
+
+**Model Compilation for Octeon target**
+
+Please refer to section 3.2 for the example command line.
+
+**Run TVM models on the Octeon Target**
+
+The cross compiled binary can be run on the target hardware using the tvmc run command.
+Alternatively, the RPC flow enables remote execution on the target device from your
+local machine: https://tvm.apache.org/docs/how_to/tutorials/cross_compilation_and_rpc.html
+
+.. code:: python
+
+    python3 -m tvm.driver.tvmc run --inputs infer.npz --outputs predict.npz model.tar
+
 5. Compiling a model using Python APIs
 --------------------------------------
 

diff --git a/python/tvm/relay/op/contrib/mrvl.py b/python/tvm/relay/op/contrib/mrvl.py
@@ -272,6 +272,8 @@ def add_attributes(mod, annotate_target_str, **kwargs):
     mod : module with attributes
     """
     working_dir = mrvl_contrib.get_working_dir()
+    sim_attr_found = False
+    hw_attr_found = False
 
     if "mattr" in kwargs:
         base_opts_str = kwargs.get("mattr")
@@ -286,6 +288,14 @@ def add_attributes(mod, annotate_target_str, **kwargs):
         if "wb_pin_ocm" not in base_opts_str:
             base_opts_str = f"{base_opts_str} -wb_pin_ocm=0"
 
+        if "sim" in base_opts_str:
+            sim_attr_found = True
+            base_opts_str = base_opts_str.replace("sim", "")
+
+        if "hw" in base_opts_str:
+            hw_attr_found = True
+            base_opts_str = base_opts_str.replace("hw", "")
+
     else:
         base_opts_str = "-arch=mlip -quantize=fp16 -wb_pin_ocm=0"
 
@@ -294,13 +304,20 @@ def add_attributes(mod, annotate_target_str, **kwargs):
     elif "num_tiles" not in base_opts_str:
         base_opts_str = f"{base_opts_str} -num_tiles=8"
 
+    mode_string = "sim"
+    if sim_attr_found:
+        mode_string = "sim"
+    elif hw_attr_found:
+        mode_string = "hw"
+
     for var in mod.get_global_vars():
         func_name = var.name_hint
         func = mod[func_name]
 
         if annotate_target_str in func_name:
             func = func.with_attr("working_dir", working_dir)
             func = func.with_attr("compiler_opts_string", base_opts_str)
+            func = func.with_attr("mode", mode_string)
             mod.update_func(var, func)
 
     return mod

diff --git a/src/relay/backend/contrib/mrvl/codegen.cc b/src/relay/backend/contrib/mrvl/codegen.cc
@@ -1467,6 +1467,7 @@ runtime::Module MrvlCompiler(const ObjectRef& ref) {
 
   Function func = Downcast<Function>(ref);
   std::string func_name = backend::GetExtSymbol(func);
+  const std::string mrvl_run_mode = func->GetAttr<String>("mode").value();
   runtime::Module runtime_lib;
 
   // Extract attributes from the frontend to be passed to the runtime
@@ -1485,13 +1486,32 @@ runtime::Module MrvlCompiler(const ObjectRef& ref) {
   std::string modified_json = (*modifyConsts)(nodes_json_string, consts_json_string);
   auto json_vec = split(modified_json, '|');
 
+  // Extract attributes from the nodes_json by key-value lookup using Python API
+  // These are passed to hardware runtime module for initialization
+  const tvm::runtime::PackedFunc* json_lookup;
+  json_lookup = runtime::Registry::Get("tvm.mrvl.find_value_in_KV_pair");
+  const std::string string_inp = (*json_lookup)(nodes_json_string, "num_subgraph_inputs");
+  const int num_inputs = std::stoi(string_inp);
+  const std::string string_out = (*json_lookup)(nodes_json_string, "num_subgraph_outputs");
+  const int num_outputs = std::stoi(string_out);
+  const std::string string_bsize = (*json_lookup)(nodes_json_string, "batch_size");
+  const int batch_size = std::stoi(string_bsize);
+
   // Invoke Marvell Backend compiler to generate binary for sub graph
   const auto* compile = runtime::Registry::Get("tvm.mrvl.CompileModel");
   std::string bin = (*compile)(func_name, json_vec[0], json_vec[1], compiler_opt);
 
-  const auto* pf = runtime::Registry::Get("runtime.mrvl_runtime_create");
-  ICHECK(pf != nullptr) << "Cannot find software simulator runtime module to create";
-  runtime_lib = (*pf)(func_name, json_vec[0], bin);
+  if (mrvl_run_mode == "sim") {
+    const auto* pf = runtime::Registry::Get("runtime.mrvl_runtime_create");
+    ICHECK(pf != nullptr) << "Cannot find software simulator runtime module to create";
+    runtime_lib = (*pf)(func_name, json_vec[0], bin);
+  } else if (mrvl_run_mode == "hw") {
+    const auto* pf = runtime::Registry::Get("runtime.mrvl_hw_runtime_create");
+    ICHECK(pf != nullptr) << "Cannot find hardware runtime module to create";
+    runtime_lib = (*pf)(func_name, json_vec[0], bin, num_inputs, num_outputs, batch_size);
+  } else {
+    ICHECK(0) << "Unrecognized Marvell Run Mode! " << mrvl_run_mode;
+  }
 
   return runtime_lib;
 }