From 35e1a76f2573cf3fdb46a251e0126c0b10e8be08 Mon Sep 17 00:00:00 2001 From: donglinb <65906596+donglinb@users.noreply.github.com> Date: Wed, 19 Jul 2023 11:06:19 +0800 Subject: [PATCH] Release loaded DLL libraries in destructor of class Executor (#510) * added version 15 support for Shape operator * fix Slice Operator * release loaded DLL in destructor * added HLSL test pipeline on windows --------- Co-authored-by: donglinb --- src/python/nnfusion/executor.py | 35 +- test/python/nnfusion_wsl_codegen_server.py | 87 ++++ test/python/onnx_test_win.py | 444 +++++++++++++++++++++ 3 files changed, 562 insertions(+), 4 deletions(-) create mode 100644 test/python/nnfusion_wsl_codegen_server.py create mode 100644 test/python/onnx_test_win.py diff --git a/src/python/nnfusion/executor.py b/src/python/nnfusion/executor.py index a8ebb569c..70ee8e9fc 100644 --- a/src/python/nnfusion/executor.py +++ b/src/python/nnfusion/executor.py @@ -108,13 +108,15 @@ def __init__(self, nnf_rt_dir, device=None): # prepare init/free/kernel_entry self.init_flag = False - if os.path.exists(os.path.join(nnf_rt_dir, "antares.dll")): - HLSLTensor.init_antares_lib(os.path.join(nnf_rt_dir, "antares.dll")) # dxil.dll and dxcompiler.dll must be manually imported + self.lib_dxil, self.lib_dxcompiler = None, None if os.path.exists(os.path.join(nnf_rt_dir, "dxil.dll")): - ctypes.cdll.LoadLibrary(os.path.join(nnf_rt_dir, "dxil.dll")) + self.lib_dxil = ctypes.cdll.LoadLibrary(os.path.join(nnf_rt_dir, "dxil.dll")) if os.path.exists(os.path.join(nnf_rt_dir, "dxcompiler.dll")): - ctypes.cdll.LoadLibrary(os.path.join(nnf_rt_dir, "dxcompiler.dll")) + self.lib_dxcompiler = ctypes.cdll.LoadLibrary(os.path.join(nnf_rt_dir, "dxcompiler.dll")) + # antares.dll must be loaded after dxil.dll and dxcompiler.dll + if os.path.exists(os.path.join(nnf_rt_dir, "antares.dll")): + HLSLTensor.init_antares_lib(os.path.join(nnf_rt_dir, "antares.dll")) self.libnnf = ctypes.cdll.LoadLibrary(self.libnnf_path) if hasattr(self.libnnf, "kernel_entry_host"): self.kernel_entry = self.libnnf.kernel_entry_host @@ -278,11 +280,36 @@ def __del__(self): if self.init_flag and nnf_rt_free: nnf_rt_free() self.init_flag = False + self._release_dynamic_libraries() def __call__(self, *args, **kwargs): # self.feed_tensors(*args, **kwargs) return self.feed_data(*args, **kwargs) + def _release_dynamic_libraries(self): + # There are four DLLs loaded: antares.dll, dxcompiler.dll, dxil.dll, nnfusion_rt.dll + # But the antares.dll is warpped in the class HLSLTensor and loaded only once during process + # Thus the other three DLLs are loaded for each test case and need to be explicitly released + if platform.system().lower() == "windows": + ctypes.windll.kernel32.FreeLibrary.argtypes = [ctypes.c_void_p] + # release nnfusion_rt.dll + handle = self.libnnf._handle + del self.libnnf + ctypes.windll.kernel32.FreeLibrary(handle) + # release dxil.dll + if self.lib_dxil: + handle = self.lib_dxil._handle + del self.lib_dxil + ctypes.windll.kernel32.FreeLibrary(handle) + # release dxcompiler.dll + if self.lib_dxcompiler: + handle = self.lib_dxcompiler._handle + del self.lib_dxcompiler + ctypes.windll.kernel32.FreeLibrary(handle) + elif platform.system().lower() == "linux": + pass # TODO: release libraries in linux + return + def _dict_to_pointer_list(self, inputs, outputs, strict=True): signature = [None] * (len(self.input_descs) + len(self.output_descs)) params = [None] * (len(self.input_descs) + len(self.output_descs)) diff --git a/test/python/nnfusion_wsl_codegen_server.py b/test/python/nnfusion_wsl_codegen_server.py new file mode 100644 index 000000000..78d917201 --- /dev/null +++ b/test/python/nnfusion_wsl_codegen_server.py @@ -0,0 +1,87 @@ +import os +import sys +import json +import socket +import argparse +import subprocess +from contextlib import contextmanager +from wslpath import wslpath +# pip install wslpath-python + + +@contextmanager +def cd(newdir): + prevdir = os.getcwd() + os.chdir(os.path.expanduser(newdir)) + try: + yield + finally: + os.chdir(prevdir) + + +def run(exec_path, port = 65432, host = '127.0.0.1'): + cmd_options = [ + '-f onnx', + '-p "batch_size:1"', + '-fmulti_shape=false', + '-fort_folding=false', + '-fdefault_device=HLSL', + '-fhlsl_codegen_type=cpp', + '-fantares_mode=true', + '-fblockfusion_level=0', + '-fkernel_fusion_level=0', + '-fkernel_tuning_steps=0', + '-ffold_where=0', + '-fsymbolic=0', + '-fsplit_softmax=0', + '-fhost_entry=0', + '-fir_based_fusion=1', + '-fextern_result_memory=1', + '-fuse_cpuprofiler=1', + '-ftuning_platform="win64"', + '-fantares_codegen_server=127.0.0.1:8880', + ] + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind((host, port)) + s.listen() + while True: + conn, addr = s.accept() + with conn: + print(f'Connected by {addr}') + while True: + data = conn.recv(10240) + if not data: + break + params = data.decode() + ret = { + 'ret' : True, + 'error' : '', + } + try: + params = json.loads(params) + model_path = wslpath(params['model']) + output_dir = wslpath(params['output_dir']) + with cd(output_dir): + cmd = ' '.join([exec_path, model_path] + cmd_options) + out = subprocess.run(cmd, stderr = subprocess.STDOUT, shell = True, encoding = 'utf8') + if out.returncode != 0: + ret['ret'] = False + ret['error'] = out.stderr + print('model_path:', model_path) + print('output_dir:', output_dir) + print('return code:', out.returncode) + print('stdout:', out.stdout) + print('stderr:', out.stderr) + except Exception as e: + print(e) + ret['ret'] = False + ret['error'] = str(e) + conn.sendall(bytes(json.dumps(ret), 'utf-8')) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser('NNFusion WSL Codegen Server') + parser.add_argument('exec_path', type = str, help = 'path to nnfusion executable') + parser.add_argument('--port', type = int, default = 65432, help = 'comunication port between WSL and host') + args = parser.parse_args() + run(os.path.abspath(args.exec_path), args.port) \ No newline at end of file diff --git a/test/python/onnx_test_win.py b/test/python/onnx_test_win.py new file mode 100644 index 000000000..64c5b4670 --- /dev/null +++ b/test/python/onnx_test_win.py @@ -0,0 +1,444 @@ +import os, sys, glob +from typing import IO, Any, Dict, List, Sequence, Union +from importlib import import_module +import numpy as np +import onnx +import torch +import git +import shutil +import json +import socket +import platform +import subprocess +from onnx import AttributeProto, defs, load, TensorProto, ModelProto, NodeProto, TypeProto, numpy_helper +from onnx.backend.test.case import collect_snippets +from onnx.backend.test.loader import load_model_tests +from onnx.backend.test.runner import Runner +from onnx.backend.base import Backend + +global_flag_input_as_constant = False +global_flag_float_as_half = False +global_flag_float_as_double = False + + +class TestContext: + root_dir : str = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../") + onnx_remote : str = "https://github.com/onnx/onnx.git" + onnx_repo : str = os.path.join(root_dir, "build/onnx") + onnx_tests : str = os.path.join(onnx_repo, "onnx/backend/test/data") + onnx_test_kind : str = "node" + nnfusion_bin : str = os.path.join(root_dir, "build/src/tools/nnfusion/") + nnfusion_python : str = os.path.join(root_dir, "src/python/") + nnfusion_workdir : str = "nnfusion_work" + # for testing on windows + nnfusion_port = 65432 + msbuild_bin = 'D:/Programs/"Microsoft Visual Studio"/2022/Enterprise/MSBuild/Current/Bin/MSBuild.exe' + # remain unchanged + nnfusion_argstr = "-f onnx -fmulti_shape=false -fort_folding=false -fdefault_device=CUDA -fhlsl_codegen_type=cpp -fantares_mode=true -fblockfusion_level=0 -fkernel_fusion_level=0 -fantares_codegen_server=127.0.0.1:8880 -fkernel_tuning_steps=0 -ffold_where=1 -fsymbolic=1 -fort_folding=0 -fsplit_softmax=1 -fhost_entry=0 -fir_based_fusion=1 -fextern_result_memory=1" + antares_url = "127.0.0.1:8880" + nnfusion_device = "HLSL" + nnfusion_codegen_dir = "nnfusion_rt/cuda_codegen" + + def __init__(self, keep_existing = False) -> None: + self.nnfusion_argstr = self.nnfusion_argstr.replace("127.0.0.1:8880", self.antares_url).replace("CUDA", self.nnfusion_device) + os.environ["PATH"] = os.path.abspath(self.nnfusion_bin) + ":" + os.environ["PATH"] + sys.path.insert(1, os.path.abspath(self.nnfusion_python)) + self.nnfusion = __import__('nnfusion') + if not os.path.exists(self.onnx_repo): + repo = git.Repo.clone_from(self.onnx_remote, self.onnx_repo) + if not keep_existing and os.path.exists(self.nnfusion_workdir): + shutil.rmtree(self.nnfusion_workdir) + os.makedirs(self.nnfusion_workdir, exist_ok = True) + + def test_pipeline(self, test_cases): + case_count = 0 + for case in load_model_tests(data_dir=self.onnx_tests, kind=self.onnx_test_kind): + if case.name in test_cases: + opname = case.name.split('_')[1] + case_count += 1 + print('Test [%d/%d]:'%(case_count, len(test_cases)), case.name, opname) + if os.path.exists(os.path.join(self.nnfusion_workdir, case.name)): + print('Skipped.') + continue + if global_flag_float_as_half : + case = self._test_float_to_type(case, TensorProto.FLOAT16) + case = case._replace(rtol=0.00977, atol=0.00977) + elif global_flag_float_as_double : + case = self._test_float_to_type(case, TensorProto.DOUBLE) + if global_flag_input_as_constant: + self.run_input_as_constant(case, opname) + else: + self.run(case, opname) + + def _test_float_to_type(self, model_test, float_to_type): + #float_to_type = TensorProto.FLOAT16 + new_model_dir = os.path.join(self.nnfusion_workdir, model_test.name) + if not os.path.exists(new_model_dir): + os.mkdir(new_model_dir) + # modify graph + model_pb_path = os.path.join(model_test.model_dir, "model.onnx") + new_model_pb_path = os.path.join(new_model_dir, "model.onnx") + model = onnx.load(model_pb_path) + + def change_model_float(model_pb_path, new_model_pb_path, float_to_type): + model = onnx.load(model_pb_path) + for in_tensor in model.graph.input: + if in_tensor.type.HasField("tensor_type"): + if in_tensor.type.tensor_type.elem_type == TensorProto.FLOAT: + in_tensor.type.tensor_type.elem_type = float_to_type + for out_tensor in model.graph.output: + if out_tensor.type.HasField("tensor_type"): + if out_tensor.type.tensor_type.elem_type == TensorProto.FLOAT: + out_tensor.type.tensor_type.elem_type == float_to_type + onnx.save(model, new_model_pb_path) + + def save_fp(input_file, output_file, float_to_type, tensor): + with open(input_file, "rb") as f: + protobuf_content = f.read() + if tensor.type.HasField("tensor_type") and \ + tensor.type.tensor_type.elem_type == TensorProto.FLOAT: + ts = onnx.TensorProto() + ts.ParseFromString(protobuf_content) + ndts = numpy_helper.to_array(ts) + if float_to_type == TensorProto.FLOAT16: + ndts = ndts.astype(np.float16) + elif float_to_type == TensorProto.DOUBLE: + ndts = ndts.astype(np.float64) + fp_data = numpy_helper.from_array(ndts, tensor.name) + protobuf_content = fp_data.SerializeToString() + fo = open(output_file, "wb") + fo.write(protobuf_content) + fo.close() + + for test_data_dir in glob.glob(os.path.join(model_test.model_dir, "test_data_set*")): + new_test_data_dir = os.path.join(new_model_dir, test_data_dir.split("/")[-1]) + if not os.path.exists(new_test_data_dir): + os.mkdir(new_test_data_dir) + inputs_num = len(glob.glob(os.path.join(test_data_dir, "input_*.pb"))) + for i in range(inputs_num): + input_file = os.path.join(test_data_dir, f"input_{i}.pb") + output_file = os.path.join(new_test_data_dir, f"input_{i}.pb") + save_fp(input_file, output_file, float_to_type, model.graph.input[i]) + ref_outputs_num = len( + glob.glob(os.path.join(test_data_dir, "output_*.pb")) + ) + for i in range(ref_outputs_num): + output_file = os.path.join(test_data_dir, f"output_{i}.pb") + output_output_file = os.path.join(new_test_data_dir, f"output_{i}.pb") + save_fp(output_file, output_output_file, float_to_type, model.graph.output[i]) + change_model_float(model_pb_path, new_model_pb_path, float_to_type) + return model_test._replace(model_dir=new_model_dir) + + + def _build_model(self, model_test): + import nnfusion + from nnfusion.executor import Executor + from nnfusion.session import generate_sample, codegen, modify_nnfusion_rt, build + from nnfusion.utils import cd, execute + + model_pb_path = "" + nnfusion_workdir = "" + if isinstance(model_test, str): + if not model_test.endswith(".onnx"): + nnfusion_workdir = os.path.join(self.nnfusion_workdir, model_test.split("/")[-1]) + model_pb_path = os.path.join(model_test, "model.onnx") + else: + # nnfusion_workdir = os.path.join(self.nnfusion_workdir, model_test.split("/")[-1][:-5]) + nnfusion_workdir = os.path.join(self.nnfusion_workdir, os.path.basename(model_test)[:-5]) + model_pb_path = model_test + else: + model_dir = model_test.model_dir + model_pb_path = os.path.join(model_dir, "model.onnx") + nnfusion_workdir = os.path.join(self.nnfusion_workdir, model_test.name) + if not os.path.exists(nnfusion_workdir): + os.makedirs(nnfusion_workdir, exist_ok = True) + if platform.system().lower() == 'linux': + codegen(model_pb_path, self.nnfusion_argstr, nnfusion_workdir) + rt_dir = os.path.join(nnfusion_workdir, "nnfusion_rt/cuda_codegen") + to_cst_dir = os.path.join(rt_dir, "Constant") + cur_cst_dir = os.path.join(self.root_dir, "Constant") + #if os.path.exists(cur_cst_dir): + if os.path.islink(cur_cst_dir): + os.unlink(cur_cst_dir) + if os.path.exists(to_cst_dir): + os.symlink(to_cst_dir, cur_cst_dir, True) + modify_nnfusion_rt(rt_dir) + build(rt_dir) + return Executor(rt_dir) + elif platform.system().lower() == 'windows': + # On windows use socket to communicate between host and WSL, + # and the nnfusion_wsl_codegen_server.py must be started on WSL + # before runing this testing pipeline. + params = { + 'model' : os.path.abspath(model_pb_path), + 'output_dir' : os.path.abspath(nnfusion_workdir), + } + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.connect(('127.0.0.1', self.nnfusion_port)) + s.sendall(bytes(json.dumps(params), 'utf-8')) + ret = s.recv(10240) + ret = json.loads(ret.decode()) + if not ret['ret']: + raise RuntimeError(ret['error']) + rt_dir = os.path.join(nnfusion_workdir, "nnfusion_rt/dxcompute_codegen/Direct3DWinNN") + to_cst_dir = os.path.join(rt_dir, "nnf_desktop_example/Constant") + cur_cst_dir = os.path.join(self.root_dir, "Constant") + with cd(rt_dir): + execute(f'{self.msbuild_bin} ./nnf_desktop_example.sln /p:PlatformToolset=v143 -p:Configuration=Release') + return Executor(os.path.join(rt_dir, 'x64/Release')) + raise RuntimeError('Unsupported platform.') + + + def _debug_tensor(rt) -> None: + for i in rt.get_inputs(): + print(i.name) + for i in rt.get_outputs(): + print(i.name) + + + def _assert_similar_outputs( + self, + ref_outputs: Sequence[Any], + outputs: Sequence[Any], + rtol: float, + atol: float, + ) : + try: + np.testing.assert_equal(len(outputs), len(ref_outputs)) + for i in range(len(outputs)): + if isinstance(outputs[i], (list, tuple)): + for j in range(len(outputs[i])): + f = self._assert_similar_outputs( + ref_outputs[i][j], outputs[i][j], rtol, atol + ) + if f == False: + return False + else: + np.testing.assert_equal(outputs[i].dtype, ref_outputs[i].dtype) + if ref_outputs[i].dtype == object: + np.testing.assert_array_equal(outputs[i], ref_outputs[i]) + else: + np.testing.assert_allclose( + outputs[i], ref_outputs[i], rtol=rtol, atol=atol + ) + return True + except: + return False + + + def _load_proto( + self, + proto_filename: str, + target_list: List[Union[np.ndarray, List[Any]]], + model_type_proto: TypeProto, + ) -> None: + with open(proto_filename, "rb") as f: + protobuf_content = f.read() + if model_type_proto.HasField("sequence_type"): + sequence = onnx.SequenceProto() + sequence.ParseFromString(protobuf_content) + target_list.append(numpy_helper.to_list(sequence)) + elif model_type_proto.HasField("tensor_type"): + tensor = onnx.TensorProto() + tensor.ParseFromString(protobuf_content) + target_list.append(numpy_helper.to_array(tensor)) + elif model_type_proto.HasField("optional_type"): + optional = onnx.OptionalProto() + optional.ParseFromString(protobuf_content) + target_list.append(numpy_helper.to_optional(optional)) + else: + print( + "Loading proto of that specific type (Map/Sparse Tensor) is currently not supported" + ) + + + def _load_original_proto( + self, + proto_filename: str, + target_list: List[Union[np.ndarray, List[Any]]], + model_type_proto: TypeProto, + ) -> None: + with open(proto_filename, "rb") as f: + protobuf_content = f.read() + if model_type_proto.HasField("sequence_type"): + sequence = onnx.SequenceProto() + sequence.ParseFromString(protobuf_content) + target_list.append(sequence) + elif model_type_proto.HasField("tensor_type"): + tensor = onnx.TensorProto() + tensor.ParseFromString(protobuf_content) + target_list.append(tensor) + elif model_type_proto.HasField("optional_type"): + optional = onnx.OptionalProto() + optional.ParseFromString(protobuf_content) + target_list.append(optional) + else: + print( + "Loading proto of that specific type (Map/Sparse Tensor) is currently not supported" + ) + + + def _alter_proto(self, model, inputs, target_pb_path): + for i in range(0, len(model.graph.input)): + inputs[i].name = model.graph.input[i].name + model.graph.ClearField('input') + for t in inputs: + model.graph.initializer.append(t) + onnx.save(model, target_pb_path) + + + def run_input_as_constant(self, model_test, op_name) -> None: + import nnfusion + from nnfusion.data_format import cast_pytorch_tensor, cast_hlsl_tensor, HLSLTensor + torch_device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + model_dir = model_test.model_dir + model_pb_path = os.path.join(model_dir, "model.onnx") + model = onnx.load(model_pb_path) + + for test_data_dir in glob.glob(os.path.join(model_dir, "test_data_set*")): + inputs = [] + inputs_num = len(glob.glob(os.path.join(test_data_dir, "input_*.pb"))) + for i in range(inputs_num): + input_file = os.path.join(test_data_dir, f"input_{i}.pb") + self._load_original_proto(input_file, inputs, model.graph.input[i].type) + ref_outputs = [] + ref_outputs_num = len( + glob.glob(os.path.join(test_data_dir, "output_*.pb")) + ) + for i in range(ref_outputs_num): + output_file = os.path.join(test_data_dir, f"output_{i}.pb") + self._load_proto( + output_file, ref_outputs, model.graph.output[i].type + ) + + try: + # new_pb_path = os.path.join(self.nnfusion_workdir, model_test.name + test_data_dir.split("/")[-1] + ".onnx") + new_pb_path = os.path.join(self.nnfusion_workdir, model_test.name + '.onnx') + self._alter_proto(model, inputs, new_pb_path) + rt = self._build_model(new_pb_path) + except Exception as e: + print('Error:', e) + print("@,", op_name, ",", model_test.name, ", BUILD ERROR", ", FAILED") + return + try: + nnf_inputs = dict() + nnf_outputs = dict() + nnf_torch_outputs = list() + for output_i in range(len(ref_outputs)): + name = model.graph.output[output_i].name + nnf_torch_outputs.append(torch.tensor(ref_outputs[output_i])) + nnf_torch_outputs[-1].zero_() + if self.nnfusion_device == 'CUDA': + nnf_outputs[name] = cast_pytorch_tensor(nnf_torch_outputs[-1].to(torch_device)) + elif self.nnfusion_device == 'HLSL': + nnf_outputs[name] = cast_hlsl_tensor(HLSLTensor.build_from_torch(nnf_torch_outputs[-1])) + rt.feed_data(nnf_inputs, nnf_outputs) + outputs = [nnf_outputs[output_i.name].to_pytorch_tensor().cpu().numpy() for output_i in model.graph.output]#list(prepared_model.run(inputs)) + except Exception as e: + print('Error:', e) + print("@,", op_name , ",", model_test.name, ", EXECUTION ERROR", ", FAILED") + continue + r = self._assert_similar_outputs( + ref_outputs, outputs, rtol=model_test.rtol, atol=model_test.atol + ) + print("@,", op_name, ",", model_test.name, "," + test_data_dir, ", PASS" if r else ", FAILED") + if not r: + print('expected output:', ref_outputs) + print('got output:', outputs) + + + def run(self, model_test, op_name) -> None: + import nnfusion + from nnfusion.data_format import cast_pytorch_tensor, cast_hlsl_tensor, HLSLTensor + torch_device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') + model_dir = model_test.model_dir + model_pb_path = os.path.join(model_dir, "model.onnx") + model = onnx.load(model_pb_path) + try: + rt = self._build_model(model_test) + except Exception as e: + print('Error:', e) + print("@,", op_name, ",", model_test.name, ", BUILD ERROR", ", FAILED") + return + # debug_tensor(rt) + for test_data_npz in glob.glob(os.path.join(model_dir, "test_data_*.npz")): + test_data = np.load(test_data_npz, encoding="bytes") + inputs = list(test_data["inputs"]) + outputs = [] #list(prepared_model.run(inputs)) + ref_outputs = test_data["outputs"] + self._assert_similar_outputs( + ref_outputs, outputs, rtol=model_test.rtol, atol=model_test.atol + ) + for test_data_dir in glob.glob(os.path.join(model_dir, "test_data_set*")): + inputs = [] + inputs_num = len(glob.glob(os.path.join(test_data_dir, "input_*.pb"))) + for i in range(inputs_num): + input_file = os.path.join(test_data_dir, f"input_{i}.pb") + self._load_proto(input_file, inputs, model.graph.input[i].type) + ref_outputs = [] + ref_outputs_num = len( + glob.glob(os.path.join(test_data_dir, "output_*.pb")) + ) + for i in range(ref_outputs_num): + output_file = os.path.join(test_data_dir, f"output_{i}.pb") + self._load_proto( + output_file, ref_outputs, model.graph.output[i].type + ) + try: + nnf_inputs = dict() + nnf_torch_inputs = list() + for input_i in range(len(inputs)): + name = model.graph.input[input_i].name + nnf_torch_inputs.append(torch.tensor(inputs[input_i])) + if self.nnfusion_device == 'CUDA': + nnf_inputs[name] = cast_pytorch_tensor(nnf_torch_inputs[-1].to(torch_device)) + elif self.nnfusion_device == 'HLSL': + nnf_inputs[name] = cast_hlsl_tensor(HLSLTensor.build_from_torch(nnf_torch_inputs[-1])) + nnf_outputs = dict() + nnf_torch_outputs = list() + for output_i in range(len(ref_outputs)): + name = model.graph.output[output_i].name + nnf_torch_outputs.append(torch.tensor(ref_outputs[output_i])) + nnf_torch_outputs[-1].zero_() + if self.nnfusion_device == 'CUDA': + nnf_outputs[name] = cast_pytorch_tensor(nnf_torch_outputs[-1].to(torch_device)) + elif self.nnfusion_device == 'HLSL': + nnf_outputs[name] = cast_hlsl_tensor(HLSLTensor.build_from_torch(nnf_torch_outputs[-1])) + rt.feed_data(nnf_inputs, nnf_outputs) + outputs = [nnf_outputs[output_i.name].to_pytorch_tensor().cpu().numpy() for output_i in model.graph.output]#list(prepared_model.run(inputs)) + except Exception as e: + print('Error:', e) + print("@,", op_name , ",", model_test.name, ", EXECUTION ERROR", ", FAILED") + continue + r = self._assert_similar_outputs( + ref_outputs, outputs, rtol=model_test.rtol, atol=model_test.atol + ) + print("@,", op_name, ",", model_test.name, "," + test_data_dir, ", PASS" if r else ", FAILED") + if not r: + print('expected output:', ref_outputs) + print('got output:', outputs) + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser( prog = 'Test with ONNX Test cases') + parser.add_argument('-n', '--name', default="test_cos") + parser.add_argument('-f', '--file', default="user_pass_list.csv") + parser.add_argument('-m', '--mode', default="name") + parser.add_argument("-i", "--input_as_constant", action="store_true", default=False) + parser.add_argument("-a", "--float_as_half", action="store_true", default=False) + parser.add_argument("-d", "--float_as_double", action="store_true", default=False) + parser.add_argument("-r", "--resume", action = "store_true", default = False) + args = parser.parse_args() + print('Process ID:', os.getpid()) + global_flag_input_as_constant = args.input_as_constant + global_flag_float_as_half = args.float_as_half + global_flag_float_as_double = args.float_as_double + if args.mode == "name": + test_cases = args.name.lower().split(",") + test_cases = [v.strip() for v in test_cases] + elif args.mode == "file": + f = open(args.file).readlines() + test_cases = [v.strip().lower().split(',')[0].strip() for v in f if v != '\n'] + test_context = TestContext(args.resume) + test_context.test_pipeline(set(test_cases))