Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug fix] Register Fusion Pass fuse policy assign wrong output edges #514

Open
wants to merge 7 commits into
base: xbox
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/nnfusion/core/kernels/cuda_gpu/cuda_langunit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,15 @@ __device__ __forceinline__ half load(const half* __restrict__ in, int i=0, boo
}
return v;
}
__device__ __forceinline__ int16_t load(const int16_t* __restrict__ in, int i=0, bool b=true)
{
int16_t v = 0;
if (b)
{
v = __ldg(in + i);
}
return v;
}
__device__ __forceinline__ int32_t load(const int32_t* __restrict__ in, int i=0, bool b=true)
{
int32_t v = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/nnfusion/engine/pass/codegen/cuda_codegen_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1480,7 +1480,7 @@ void CudaCodegenPass::create_cmake_file(std::shared_ptr<InterpreterContext> ctx,
cmake_minimum_required(VERSION 3.5)
SET(SRC "nnfusion_rt.cu" CACHE STRING "codegen source file")
SET(TARGET_NAME "nnfusion_naive_rt" CACHE STRING "codegen target name")
SET(CUDA_ARCH "-gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" CACHE STRING "target architecture")
SET(CUDA_ARCH "-gencode=arch=compute_60,code=compute_60 -gencode=arch=compute_61,code=compute_61 -gencode=arch=compute_70,code=compute_70 -gencode=arch=compute_75,code=compute_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86" CACHE STRING "target architecture")
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()
Expand Down
2 changes: 1 addition & 1 deletion src/nnfusion/engine/pass/graph/register_fusion_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,7 @@ class ApplyFusionResult
auto out_node = out_edge->get_dst();
if (node_set.count(out_node))
continue;
m_graph->add_edge(fused_node, out_id, out_node, out_edge->get_dst_input());
m_graph->add_edge(fused_node, i, out_node, out_edge->get_dst_input());
}
}
// cleanup
Expand Down