From a2de07c7720b9e4778440e9d79dffadd3066edfe Mon Sep 17 00:00:00 2001
From: Luke Hutton <luke.hutton@arm.com>
Date: Fri, 22 Mar 2024 16:42:12 +0000
Subject: [PATCH] [SME][AOT] Add Fixed Virtual Platform (FVP) functional
 testing infrastructure (#16749)

* [SME][AOT] Add Fixed Virtual Platform (FVP) functional testing infrastructure

This commit adds the infrastructure required for testing compiled
functions that use SME. A more in depth discussion can be found here:
https://github.com/apache/tvm-rfcs/blob/main/rfcs/0107-scalable-matrix-extension-enablement.md#testing

Specifically, this commit adds: the installation of the AArch64
Architecture Envelope Model (AEM) Fixed Virtual Platform (FVP),
supporting files for compiling and running a graph on the FVP, sample
tests which can be removed once TVM can generate SME and some
enhancements to the AOT testing infrastructure so that TVM compiled
functions can be run on the FVP.

Change-Id: I60d39fc17b826a9f5c71991d86d3791de83a54d4

* only run tests on 64bit machines

Change-Id: I182936ebb37e6ec9d9d260f71b3008743608c0dc

* update ci_cpu docker image

Change-Id: I765bbb796dcec5388d6b885119465f28d1159f53
---
 ci/jenkins/docker-images.ini                  |   2 +-
 python/tvm/testing/aot.py                     |  71 +++++++++++--
 tests/python/integration/test_arm_aprofile.py | 100 ++++++++++++++++++
 tests/python/relay/aot/aprofile_aem.mk        |  98 +++++++++++++++++
 .../aot/aprofile_extra_support_routines.c     |  25 +++++
 5 files changed, 287 insertions(+), 9 deletions(-)
 create mode 100644 tests/python/relay/aot/aprofile_aem.mk
 create mode 100644 tests/python/relay/aot/aprofile_extra_support_routines.c

diff --git a/ci/jenkins/docker-images.ini b/ci/jenkins/docker-images.ini
index ac30cbf97355..211ea029704b 100644
--- a/ci/jenkins/docker-images.ini
+++ b/ci/jenkins/docker-images.ini
@@ -19,7 +19,7 @@
 [jenkins]
 ci_arm: tlcpack/ci-arm:20240126-070121-8ade9c30e
 ci_cortexm: tlcpack/ci-cortexm:20240126-070121-8ade9c30e
-ci_cpu: tlcpack/ci-cpu:20240126-070121-8ade9c30e
+ci_cpu: tlcpack/ci_cpu:20240322-060059-89cd74c07
 ci_gpu: tlcpack/ci-gpu:20240126-070121-8ade9c30e
 ci_hexagon: tlcpack/ci-hexagon:20240126-070121-8ade9c30e
 ci_i386: tlcpack/ci-i386:20240126-070121-8ade9c30e
diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index 8d74f545a3c2..3a117624dfdb 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -179,6 +179,15 @@ def _subprocess_check_log_output(cmd, cwd, logfile):
         raise RuntimeError(f"Subprocess failed: {cmd}\nstdout:\n{stdout}")
 
 
+def _get_entrypoint_suffix(target):
+    # LLVM modules don't use the same entrypoint suffix
+    # as C source generated modules.
+    if target.kind.name == "llvm":
+        return "__tvm_main__"
+    else:
+        return "run"
+
+
 def _mangle_name(mod_name, name):
     mod_name = mangle_module_name(mod_name)
     return mod_name + "_" + name
@@ -385,7 +394,14 @@ def _emit_main_fake_packed_values(main_file):
     )
 
 
-def _emit_main_packed_call(main_file, input_map, output_list, mod_name):
+def _emit_entry_function_forward_declaration(main_file, mod_name, entrypoint_suffix):
+    main_file.write(
+        f"int {_mangle_name(mod_name, entrypoint_suffix)}"
+        f"(TVMValue[], int32_t[], int32_t, void*, int32_t, void*);\n"
+    )
+
+
+def _emit_main_packed_call(main_file, input_map, output_list, mod_name, entrypoint_suffix):
     tensors_name = _mangle_name(mod_name, "tensors")
     values_name = _mangle_name(mod_name, "values")
     typeids_name = _mangle_name(mod_name, "typeids")
@@ -420,7 +436,8 @@ def fake_tensor(source, source_index, packed_index):
         fake_tensor(_mangle_name(mod_name, "outputs"), i, i + num_inputs)
 
     main_file.write(
-        f'{_mangle_name(mod_name, "run")}({values_name}, {typeids_name}, 0, NULL, 0, NULL);\n'
+        f"{_mangle_name(mod_name, entrypoint_suffix)}"
+        f"({values_name}, {typeids_name}, 0, NULL, 0, NULL);\n"
     )
     main_file.write("\n")
 
@@ -544,6 +561,15 @@ def _create_main(
             model = compiled_model.model
             _emit_main_data(main_file, model.inputs, model.outputs, model.name)
 
+        if interface_api == "packed":
+            for compiled_model in compiled_models:
+                entrypoint_suffix = _get_entrypoint_suffix(
+                    compiled_model.executor_factory.target[0]
+                )
+                _emit_entry_function_forward_declaration(
+                    main_file, compiled_model.model.name, entrypoint_suffix
+                )
+
         _emit_main_prologue(
             main_file,
             custom_prologue,
@@ -592,7 +618,12 @@ def _create_main(
             for compiled_model in compiled_models:
                 model = compiled_model.model
                 _emit_main_data_setup(main_file, model.inputs, model.outputs, model.name)
-                _emit_main_packed_call(main_file, model.inputs, model.outputs, model.name)
+                entrypoint_suffix = _get_entrypoint_suffix(
+                    compiled_model.executor_factory.target[0]
+                )
+                _emit_main_packed_call(
+                    main_file, model.inputs, model.outputs, model.name, entrypoint_suffix
+                )
 
         for compiled_model in compiled_models:
             model = compiled_model.model
@@ -665,6 +696,7 @@ def compile_models(
     workspace_memory_pools=None,
     constant_memory_pools=None,
     schedule_name: str = None,
+    runtime: tvm.relay.backend.Runtime = Runtime("crt"),
 ) -> List[AOTCompiledTestModel]:
     """
     This method generates runtime.Modules for the tests
@@ -672,7 +704,10 @@ def compile_models(
     if not isinstance(models, list):
         models = [models]
 
-    runtime = Runtime("crt")
+    assert (
+        runtime.name == "crt"
+    ), f"Currently only 'crt' is supported by the test framework, but got {runtime.name}"
+
     executor = Executor(
         "aot",
         {
@@ -835,10 +870,12 @@ def run_and_check_body(base_path):
         makefile_dir = os.path.join(file_dir, "../../../tests/python/relay/aot")
         codegen_path = os.path.join(base_path, "codegen")
         makefile = os.path.join(makefile_dir, f"{runner.makefile}.mk")
-        fvp_dir = "/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/"
-        # TODO(@grant-arm): Remove once ci_cpu docker image has been updated to FVP_Corstone_SSE
-        if not os.path.isdir(fvp_dir):
-            fvp_dir = "/opt/arm/FVP_Corstone_SSE-300_Ethos-U55/models/Linux64_GCC-6.4/"
+
+        if runner.makefile == "aprofile_aem":
+            fvp_dir = "/opt/arm/fvp/Base_RevC_AEMvA_pkg/models/Linux64_GCC-9.3/"
+        else:
+            fvp_dir = "/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/"
+
         custom_params = " ".join(
             [f" {param}='{value}'" for param, value in runner.parameters.items()]
         )
@@ -901,11 +938,28 @@ def compile_and_run(
     debug_last_error: bool = False,
     checker: Optional[Callable[[str], bool]] = None,
     print_output_on_mismatch: bool = False,
+    runtime: tvm.relay.backend.Runtime = Runtime("crt"),
 ) -> bool:
     """This is a wrapper API to compile and run models as test for AoT
 
     Parameters
     ----------
+    interface_api : str
+        The external calling convention interface API.
+
+        Examples: "c", "packed"
+
+    use_unpacked_api : bool
+        Whether or not to use type-erased API internally for the
+        operator calling convention.
+
+        Note: This feature can be useful for embedded targets
+        when space is at a premium.
+
+        Permitted values when interface API is:
+        > "c": True
+        > "packed": True/False
+
     test_dir : str
         This path will contain build, codegen, include directories.
 
@@ -935,6 +989,7 @@ def compile_and_run(
         use_runtime_executor=use_runtime_executor,
         target=target,
         schedule_name=schedule_name,
+        runtime=runtime,
     )
 
     return run_and_check(
diff --git a/tests/python/integration/test_arm_aprofile.py b/tests/python/integration/test_arm_aprofile.py
index 006ad5f359f4..af35a1429735 100644
--- a/tests/python/integration/test_arm_aprofile.py
+++ b/tests/python/integration/test_arm_aprofile.py
@@ -16,13 +16,18 @@
 # under the License.
 """Tests for Arm(R) A-Profile Architecture."""
 import os
+import subprocess
+
 import numpy as np
 import pytest
+
 import tvm
 import tvm.testing
 from tvm import relay
 from tvm.relay.transform import ToMixedPrecision, FoldConstant
 from tvm.relay.build_module import bind_params_by_name
+from tvm.testing.aot import AOTTestModel, AOTTestRunner, generate_ref_data, compile_and_run
+from tvm.contrib import utils
 
 
 def get_mattr(dtype):
@@ -73,3 +78,98 @@ def test_conv2d(dtype):
     with tvm.transform.PassContext(opt_level=3):
         lib = tvm.relay.build(mod, target=target, params=params)
         lib.export_library(lib_path, cc="aarch64-linux-gnu-gcc")
+
+
+# AOT Test Runner using the AArch64 Architecture Envelope Model (AEM)
+# Fixed Virtual Platform (FVP) reference system.
+# See: https://developer.arm.com/Tools%20and%20Software/Fixed%20Virtual%20Platforms
+AOT_APROFILE_AEM_RUNNER = AOTTestRunner(
+    makefile="aprofile_aem",
+    pass_config={
+        "tir.usmp.enable": False,
+        "tir.disable_assert": True,  # AOT test infra creates 'fake' inputs that fail asserts
+    },
+)
+
+
+@tvm.testing.requires_x86
+@tvm.testing.skip_if_32bit
+def test_aem_simple_addition():
+    """Tests a simple addition running on the AArch64 AEM."""
+    inp = relay.var("data", shape=(1, 2, 4, 4))
+    add = relay.add(inp, relay.const(np.ones((1, 2, 4, 4))))
+    func = relay.Function([inp], add)
+    ir_mod = tvm.IRModule.from_expr(func)
+    ir_mod = tvm.relay.transform.InferType()(ir_mod)
+
+    main_func = ir_mod["main"]
+    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
+    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}
+
+    input_data = np.random.uniform(size=shape_dict["data"]).astype(type_dict["data"])
+    params = {}
+    inputs = {"data": input_data}
+    ref_outputs = generate_ref_data(ir_mod, inputs, params)
+
+    compile_and_run(
+        AOTTestModel(module=ir_mod, inputs=inputs, outputs=ref_outputs, params=params),
+        target=tvm.target.Target("llvm -mtriple=aarch64-none-elf"),
+        runtime=tvm.relay.backend.Runtime("crt", {"system-lib": True}),
+        interface_api="packed",
+        use_unpacked_api=False,
+        runner=AOT_APROFILE_AEM_RUNNER,
+    )
+
+
+@tvm.testing.requires_x86
+@tvm.testing.skip_if_32bit
+def test_aem_asm_sme():
+    """
+    Tests SME assembly runs on the AArch64 AEM. This test is used as a simple
+    sanity check until the TVM schedules are able to produce SME.
+    """
+    c_code = """
+    #include <stdio.h>
+
+    int main(void) {
+        __asm volatile(
+            "smstart\\n"
+            "smstop\\n"
+        );
+        printf("EXITTHESIM\\n");
+        return 0;
+    }
+    """
+    runner = AOT_APROFILE_AEM_RUNNER
+
+    tmpdir = utils.tempdir()
+    build_path = os.path.join(tmpdir.path, "build")
+    os.makedirs(build_path, exist_ok=True)
+
+    with open(build_path + "/test.c", "w") as f:
+        f.write(c_code)
+
+    file_dir = os.path.dirname(os.path.abspath(__file__))
+    makefile_dir = os.path.join(file_dir, "../../../tests/python/relay/aot")
+    makefile = os.path.join(makefile_dir, f"{runner.makefile}.mk")
+
+    make_command = (
+        f"make -f {makefile} build_dir={build_path}"
+        + f" TVM_ROOT={file_dir}/../../.."
+        + f" AOT_TEST_ROOT={makefile_dir}"
+        + " FVP_DIR=/opt/arm/fvp/Base_RevC_AEMvA_pkg/models/Linux64_GCC-9.3/"
+    )
+
+    compile_command = f"{make_command} aot_test_runner"
+    popen = subprocess.Popen(compile_command, cwd=build_path, shell=True, stdout=subprocess.PIPE)
+    return_code = popen.wait()
+    assert not return_code, "Failed to compile"
+
+    run_command = f"{make_command} run"
+    popen = subprocess.Popen(run_command, cwd=build_path, shell=True, stdout=subprocess.PIPE)
+    return_code = popen.wait()
+    assert not return_code, "Failed to run"
+
+
+if __name__ == "__main__":
+    tvm.testing.main()
diff --git a/tests/python/relay/aot/aprofile_aem.mk b/tests/python/relay/aot/aprofile_aem.mk
new file mode 100644
index 000000000000..54be216eb6dd
--- /dev/null
+++ b/tests/python/relay/aot/aprofile_aem.mk
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Makefile to build and run AOT tests against the AArch64
+# reference system
+
+CC = clang-16
+LD = aarch64-none-elf-gcc
+
+TARGET_ARCH = --target=aarch64-none-elf -march=armv9-a+sme
+SYS_ROOT = /opt/arm/gcc-aarch64-none-elf/aarch64-none-elf/
+
+OBJ_FILES := $(build_dir)/test.o $(build_dir)/aprofile_extra_support_routines.o
+INCLUDES = -I$(SRC_DIR) \
+           -I$(TVM_ROOT)/include \
+           -I$(build_dir)/../include
+
+ifneq ($(CODEGEN_ROOT),)
+    OBJ_FILES := $(OBJ_FILES) $(wildcard $(CODEGEN_ROOT)/host/lib/*.o)
+    INCLUDES := $(INCLUDES) -I$(CODEGEN_ROOT)/host/include
+endif
+
+ifneq ($(STANDALONE_CRT_DIR),)
+    OBJ_FILES := $(OBJ_FILES) $(build_dir)/stack_allocator.o \
+             $(build_dir)/crt_backend_api.o
+    INCLUDES := $(INCLUDES) -isystem$(STANDALONE_CRT_DIR)/include
+endif
+
+PKG_LDFLAGS = --specs=$(SYS_ROOT)lib/aem-ve.specs --sysroot $(SYS_ROOT)
+PKG_CFLAGS = $(INCLUDES) --sysroot $(SYS_ROOT) -c -O3 $(CFLAGS)
+PKG_ASFLAGS = $(INCLUDES) --sysroot $(SYS_ROOT) -c
+
+aot_test_runner: $(build_dir)/aot_test_runner
+
+$(build_dir)/aot_test_runner: $(OBJ_FILES)
+	$(LD) $(INCLUDES) $(PKG_LDFLAGS) -o $@ $^
+
+$(build_dir)/test.o: $(build_dir)/test.c
+	$(CC) $(TARGET_ARCH) $(PKG_CFLAGS) -o $@ $<
+
+# TODO(lhutton1) This is a workaround while __arm_tpidr2_save and
+# __arm_tpidr2_restore are not provided with the toolchain. More
+# information in aprofile_extra_support_routines.c.
+$(build_dir)/aprofile_extra_support_routines.o: ${AOT_TEST_ROOT}/aprofile_extra_support_routines.c
+	$(CC) $(TARGET_ARCH) $(PKG_CFLAGS) -o $@ $<
+
+$(build_dir)/stack_allocator.o: $(STANDALONE_CRT_DIR)/src/runtime/crt/memory/stack_allocator.c
+	$(CC) $(TARGET_ARCH) $(PKG_CFLAGS) -o $@ $<
+
+$(build_dir)/crt_backend_api.o: $(STANDALONE_CRT_DIR)/src/runtime/crt/common/crt_backend_api.c
+	$(CC) $(TARGET_ARCH) $(PKG_CFLAGS) -o $@ $<
+
+run: $(build_dir)/aot_test_runner
+	$(FVP_DIR)/FVP_Base_RevC-2xAEMvA \
+    -a $(build_dir)/aot_test_runner \
+    --plugin $(FVP_DIR)../../plugins/Linux64_GCC-9.3/ScalableVectorExtension.so \
+    -C SVE.ScalableVectorExtension.has_sme2=1 \
+    -C SVE.ScalableVectorExtension.has_sme=1 \
+    -C SVE.ScalableVectorExtension.has_sve2=1 \
+    -C SVE.ScalableVectorExtension.enable_at_reset=1 \
+    -C bp.secure_memory=false \
+    -C bp.terminal_0.start_telnet=0 \
+    -C bp.terminal_1.start_telnet=0 \
+    -C bp.terminal_2.start_telnet=0 \
+    -C bp.terminal_3.start_telnet=0 \
+    -C bp.vis.disable_visualisation=1 \
+    -C bp.pl011_uart0.out_file="-" \
+    -C bp.pl011_uart0.shutdown_tag=\"EXITTHESIM\" \
+    -C semihosting-enable=1
+
+# Note: It's possible to trace instructions running on the FVP by adding the option
+# --plugin /opt/arm/fvp/Base_RevC_AEMvA_pkg/plugins/Linux64_GCC-9.3/TarmacTrace.so
+
+clean:
+	rm -rf $(build_dir)/crt
+
+cleanall:
+	rm -rf $(build_dir)
+
+.SUFFIXES:
+
+.DEFAULT: aot_test_runner
+
+.PHONY: run
diff --git a/tests/python/relay/aot/aprofile_extra_support_routines.c b/tests/python/relay/aot/aprofile_extra_support_routines.c
new file mode 100644
index 000000000000..9d8fde158041
--- /dev/null
+++ b/tests/python/relay/aot/aprofile_extra_support_routines.c
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+// The support routines __arm_tpidr2_save and __arm_tpidr2_restore are not
+// yet available in the latest release of the gcc-aarch64-none-elf toolchain
+// (13.2.rel1). For now, we can provide the symbol to fix the build at least.
+// When they are provided in later releases, these declarations can be removed.
+void __arm_tpidr2_save(void) {}
+void __arm_tpidr2_restore(void) {}