Skip to content

Commit

Permalink
Merge branch 'apache:main' into feature/topi.strided_slice-shape-fix-v2
Browse files Browse the repository at this point in the history
  • Loading branch information
PatrikPerssonInceptron authored Nov 3, 2024
2 parents 84d3c5a + 84141c4 commit 1fc0632
Show file tree
Hide file tree
Showing 5 changed files with 555 additions and 7 deletions.
1 change: 1 addition & 0 deletions cmake/modules/contrib/Mrvl.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ if(USE_MRVL)
message(STATUS "Build with Mrvl support")
file(GLOB RUNTIME_MRVL_SRCS
src/runtime/contrib/mrvl/mrvl_runtime.cc
src/runtime/contrib/mrvl/mrvl_hw_runtime.cc
src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.cc
)
list(APPEND RUNTIME_SRCS ${RUNTIME_MRVL_SRCS})
Expand Down
33 changes: 29 additions & 4 deletions docs/how_to/deploy/mrvl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,11 @@ integrated MLIP cn10ka processor, using only 4 tiles in the block.
python3 -m tvm.driver.tvmc compile --target="mrvl, llvm" \
--target-llvm-mtriple=aarch64-linux-gnu --target-llvm-mcpu=neoverse-n2 \
--target-mrvl-num_tiles=4 \
--target-mrvl-mattr="hw -quantize=fp16 -wb_pin_ocm=1" \
--cross-compiler aarch64-linux-gnu-gcc \
--output model.tar \
mnist-12.onnx
The runtime support for hardware acceleration is a WIP, it will be added in future PR.
3.3. TVMC Compiler: mrvl specific Command Line Options
------------------------------------------------------
Expand All @@ -125,7 +125,7 @@ The runtime support for hardware acceleration is a WIP, it will be added in futu
Maximum number of tiles that may be used, possible values = {1,2,4,8}, defaults to 8

* mattr:
Attributes for mrvl; possible values = {quantize, wb_pin_ocm}
Attributes for mrvl; possible values = {quantize, wb_pin_ocm, run_mode}

mattr specifies the data type, code generation options and optimizations.

Expand All @@ -141,15 +141,23 @@ The runtime support for hardware acceleration is a WIP, it will be added in futu
Optimize runtime by preloading a model's weights and bias into
the on chip memory. Possible values = {0, 1}. Default is 0 (no preload)

4. Compile ONNX model for Simulator + LLVM / x86_64 target
----------------------------------------------------------
**3. run_mode**

Specify whether to compile for the simulator or for the target hardware (Octeon).
Possible values = {sim, hw}. Default is sim (software simulator).

4. Compile ONNX model using the TVMC flow
-----------------------------------------

In the TVMC mrvl flow, the model is partitioned into Marvell and LLVM regions.
Building each partitioned Marvell subgraph generates serialized nodes.json and
const.json. Partitioned nodes.json is the representation of the model graph which is
suitable for the Marvell compiler (mrvl-tmlc). The compiler compiles the model graph to
generate the model binary with MLIP instructions.

4.1 Compile and Run ONNX model for Simulator + LLVM / x86_64 target
--------------------------------------------------------------------

**Model Compilation for Simulator + LLVM / x86_64 target**

.. code:: python
Expand All @@ -165,6 +173,23 @@ Generated model binary is simulated using Marvell's MLIP Simulator(mrvl-mlsim).
python3 -m tvm.driver.tvmc run --inputs infer.npz --outputs predict.npz model.tar --number=0
4.2 Compile and Run ONNX model for Octeon target
----------------------------------------------------------

**Model Compilation for Octeon target**

Please refer to section 3.2 for the example command line.

**Run TVM models on the Octeon Target**

The cross compiled binary can be run on the target hardware using the tvmc run command.
Alternatively, the RPC flow enables remote execution on the target device from your
local machine: https://tvm.apache.org/docs/how_to/tutorials/cross_compilation_and_rpc.html

.. code:: python
python3 -m tvm.driver.tvmc run --inputs infer.npz --outputs predict.npz model.tar
5. Compiling a model using Python APIs
--------------------------------------

Expand Down
17 changes: 17 additions & 0 deletions python/tvm/relay/op/contrib/mrvl.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ def add_attributes(mod, annotate_target_str, **kwargs):
mod : module with attributes
"""
working_dir = mrvl_contrib.get_working_dir()
sim_attr_found = False
hw_attr_found = False

if "mattr" in kwargs:
base_opts_str = kwargs.get("mattr")
Expand All @@ -286,6 +288,14 @@ def add_attributes(mod, annotate_target_str, **kwargs):
if "wb_pin_ocm" not in base_opts_str:
base_opts_str = f"{base_opts_str} -wb_pin_ocm=0"

if "sim" in base_opts_str:
sim_attr_found = True
base_opts_str = base_opts_str.replace("sim", "")

if "hw" in base_opts_str:
hw_attr_found = True
base_opts_str = base_opts_str.replace("hw", "")

else:
base_opts_str = "-arch=mlip -quantize=fp16 -wb_pin_ocm=0"

Expand All @@ -294,13 +304,20 @@ def add_attributes(mod, annotate_target_str, **kwargs):
elif "num_tiles" not in base_opts_str:
base_opts_str = f"{base_opts_str} -num_tiles=8"

mode_string = "sim"
if sim_attr_found:
mode_string = "sim"
elif hw_attr_found:
mode_string = "hw"

for var in mod.get_global_vars():
func_name = var.name_hint
func = mod[func_name]

if annotate_target_str in func_name:
func = func.with_attr("working_dir", working_dir)
func = func.with_attr("compiler_opts_string", base_opts_str)
func = func.with_attr("mode", mode_string)
mod.update_func(var, func)

return mod
Expand Down
26 changes: 23 additions & 3 deletions src/relay/backend/contrib/mrvl/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,7 @@ runtime::Module MrvlCompiler(const ObjectRef& ref) {

Function func = Downcast<Function>(ref);
std::string func_name = backend::GetExtSymbol(func);
const std::string mrvl_run_mode = func->GetAttr<String>("mode").value();
runtime::Module runtime_lib;

// Extract attributes from the frontend to be passed to the runtime
Expand All @@ -1485,13 +1486,32 @@ runtime::Module MrvlCompiler(const ObjectRef& ref) {
std::string modified_json = (*modifyConsts)(nodes_json_string, consts_json_string);
auto json_vec = split(modified_json, '|');

// Extract attributes from the nodes_json by key-value lookup using Python API
// These are passed to hardware runtime module for initialization
const tvm::runtime::PackedFunc* json_lookup;
json_lookup = runtime::Registry::Get("tvm.mrvl.find_value_in_KV_pair");
const std::string string_inp = (*json_lookup)(nodes_json_string, "num_subgraph_inputs");
const int num_inputs = std::stoi(string_inp);
const std::string string_out = (*json_lookup)(nodes_json_string, "num_subgraph_outputs");
const int num_outputs = std::stoi(string_out);
const std::string string_bsize = (*json_lookup)(nodes_json_string, "batch_size");
const int batch_size = std::stoi(string_bsize);

// Invoke Marvell Backend compiler to generate binary for sub graph
const auto* compile = runtime::Registry::Get("tvm.mrvl.CompileModel");
std::string bin = (*compile)(func_name, json_vec[0], json_vec[1], compiler_opt);

const auto* pf = runtime::Registry::Get("runtime.mrvl_runtime_create");
ICHECK(pf != nullptr) << "Cannot find software simulator runtime module to create";
runtime_lib = (*pf)(func_name, json_vec[0], bin);
if (mrvl_run_mode == "sim") {
const auto* pf = runtime::Registry::Get("runtime.mrvl_runtime_create");
ICHECK(pf != nullptr) << "Cannot find software simulator runtime module to create";
runtime_lib = (*pf)(func_name, json_vec[0], bin);
} else if (mrvl_run_mode == "hw") {
const auto* pf = runtime::Registry::Get("runtime.mrvl_hw_runtime_create");
ICHECK(pf != nullptr) << "Cannot find hardware runtime module to create";
runtime_lib = (*pf)(func_name, json_vec[0], bin, num_inputs, num_outputs, batch_size);
} else {
ICHECK(0) << "Unrecognized Marvell Run Mode! " << mrvl_run_mode;
}

return runtime_lib;
}
Expand Down
Loading

0 comments on commit 1fc0632

Please sign in to comment.