Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix workspace return type #410

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions src/nnfusion/engine/memory_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,9 +290,9 @@ LanguageUnit_p nnfusion::MemoryAllocator::emit_memory_alloc()
auto& lu = *_lu;
if (m_max_allocated > 0)
{
lu << "CUDA_SAFE_CALL(cudaSetDevice(" << m_device_id << "));\n";
if (!FLAGS_ffunction_codegen)
{
lu << "CUDA_SAFE_CALL(cudaSetDevice(" << m_device_id << "));\n";
lu << "CUDA_SAFE_CALL(cudaMalloc((void**)&" << this->get_name() << "_memory_pool,"
<< m_max_allocated << "));\n";
lu << "CUDA_SAFE_CALL(cudaMemset((void*)" << this->get_name() << "_memory_pool, 0, "
Expand Down Expand Up @@ -332,17 +332,22 @@ LanguageUnit_p nnfusion::MemoryAllocator::emit_memory_free()
return _lu;

auto& lu = *_lu;
lu << "CUDA_SAFE_CALL(cudaSetDevice(" << m_device_id << "));\n";

if (!FLAGS_ffunction_codegen)
{
lu << "CUDA_SAFE_CALL(cudaSetDevice(" << m_device_id << "));\n";
lu << "CUDA_SAFE_CALL(cudaFree(" << this->get_name() + "_memory_pool));\n";
}

return _lu;
}

LanguageUnit_p nnfusion::MemoryAllocator::emit_memory_set(int value)
{
LanguageUnit_p _lu(new LanguageUnit(this->get_name() + "_memset"));
auto& lu = *_lu;
lu << "CUDA_SAFE_CALL(cudaSetDevice(" << m_device_id << "));\n";
if (!FLAGS_ffunction_codegen)
lu << "CUDA_SAFE_CALL(cudaSetDevice(" << m_device_id << "));\n";
lu << "CUDA_SAFE_CALL(cudaMemset((void*)" << this->get_name() + "_memory_pool, " << value
<< ", " << m_max_allocated << "));\n";
return _lu;
Expand Down
2 changes: 1 addition & 1 deletion src/nnfusion/engine/pass/codegen/base_codegen_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ LanguageUnit_p BaseCodegenPass::codegen_workspace_size(std::shared_ptr<Translati
total_alloc += allocator.second->max_allocated();
}

*lu_workspace << "int get_workspace_size()\n{\n";
*lu_workspace << "uint64_t get_workspace_size()\n{\n";
*lu_workspace << " return " << total_alloc << ";\n";
*lu_workspace << "}\n";
return lu_workspace;
Expand Down
2 changes: 1 addition & 1 deletion src/nnfusion/engine/pass/codegen/cpu_codegen_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ void CpuCodegenPass::create_header_file(std::shared_ptr<InterpreterContext> ctx,
// if (device_type() == CUDA_GPU || device_type() == ROCM_GPU)
// lu_header << header::cuda->get_code();
lu_header << "extern \"C\" int get_device_type();\n";
lu_header << "extern \"C\" int get_workspace_size();\n";
lu_header << "extern \"C\" uint64_t get_workspace_size();\n";
lu_header << "extern \"C\" int kernel_entry(";
std::string params = get_kernel_entry_paras(tu);
lu_header << params;
Expand Down
4 changes: 2 additions & 2 deletions src/nnfusion/engine/pass/codegen/cuda_codegen_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1116,7 +1116,7 @@ void CudaCodegenPass::create_header_file(std::shared_ptr<InterpreterContext> ctx

lu_header << header::cuda_fp16->get_code();
lu_header << "extern \"C\" int get_device_type();\n";
lu_header << "extern \"C\" int get_workspace_size();\n";
lu_header << "extern \"C\" uint64_t get_workspace_size();\n";
lu_header << "extern \"C\" int kernel_entry";
if (FLAGS_fhost_entry)
lu_header << "_host";
Expand Down Expand Up @@ -1371,7 +1371,7 @@ cmake_minimum_required(VERSION 3.5)

SET(SRC "nnfusion_rt.cu" CACHE STRING "codegen source file")
SET(TARGET_NAME "nnfusion_naive_rt" CACHE STRING "codegen target name")
SET(CUDA_ARCH "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75" CACHE STRING "target architecture")
SET(CUDA_ARCH "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_86,code=sm_86" CACHE STRING "target architecture")

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
Expand Down
2 changes: 1 addition & 1 deletion src/nnfusion/engine/pass/codegen/hlsl_cpp_codegen_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ void HLSLCPPCodegenPass::create_header_file(std::shared_ptr<InterpreterContext>
auto& lu_header = *lup_header;

lu_header << "extern \"C\" RUNTIME_API int get_device_type();\n";
lu_header << "extern \"C\" RUNTIME_API int get_workspace_size();\n";
lu_header << "extern \"C\" RUNTIME_API uint64_t get_workspace_size();\n";
lu_header << "extern \"C\" RUNTIME_API int kernel_entry";
if (FLAGS_fhost_entry)
lu_header << "_host";
Expand Down
1 change: 1 addition & 0 deletions src/python/nnfusion/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,7 @@ def _maybe_reserve_mem(self, device):
if get_workspace_size is None:
return None

get_workspace_size.restype = ctypes.c_uint64
n_byte = get_workspace_size()
if not n_byte:
return None
Expand Down