Skip to content
This repository has been archived by the owner on Oct 25, 2023. It is now read-only.

Commit

Permalink
Profile all cutlass kernels and use correct shapes (#70)
Browse files Browse the repository at this point in the history
This PR fixed shapes used for profiling cutlass and updated config to
profile all cutlasses (this will take more tuning time for kernel
selection)

cc @jwfromm
  • Loading branch information
vinx13 authored Mar 28, 2023
1 parent c1ba119 commit caa7ee3
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 2 deletions.
3 changes: 2 additions & 1 deletion python/tvm/contrib/cutlass/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from tvm import relax, relay, runtime
from tvm._ffi.registry import register_func
from tvm.contrib.nvcc import get_cuda_version
from tvm.topi.utils import get_const_tuple

from .gen_conv2d import CutlassConv2DProfiler
from .gen_gemm import CutlassGemmProfiler
Expand Down Expand Up @@ -545,7 +546,7 @@ def _extract_relax_function_signature(f):

for i, arg in enumerate(f.params):
sinfo = arg.struct_info
signature["arg%d_shape" % i] = list(sinfo.shape)
signature["arg%d_shape" % i] = get_const_tuple(sinfo.shape)
signature["arg%d_dtype" % i] = sinfo.dtype

ret_sinfo = f.ret_struct_info
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/octo/compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def offload_cutlass(mod: tvm.IRModule, target: tvm.target.Target) -> tvm.IRModul

# Construct CUTLASS codegen pass.
cutlass_codegen_pass = relax.transform.RunCodegen(
{"cutlass": {"sm": sm, "find_first_valid": True}}
{"cutlass": {"sm": sm, "find_first_valid": False}}
)

# Generate code for matched cutlass kernels.
Expand Down

0 comments on commit caa7ee3

Please sign in to comment.