diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index b82826089d3fe3..8f4eddb5142740 100644 --- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -3126,11 +3126,12 @@ foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in { // NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where // we cannot specify floating-point literals in isel patterns. Therefore, we -// use an integer selp to select either 1 or 0 and then cvt to floating-point. +// use an integer selp to select either 1 (or -1 in case of signed) or 0 +// and then cvt to floating-point. // sint -> f16 def : Pat<(f16 (sint_to_fp Int1Regs:$a)), - (CVT_f16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; + (CVT_f16_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>; def : Pat<(f16 (sint_to_fp Int16Regs:$a)), (CVT_f16_s16 Int16Regs:$a, CvtRN)>; def : Pat<(f16 (sint_to_fp Int32Regs:$a)), @@ -3170,7 +3171,7 @@ def : Pat<(bf16 (uint_to_fp Int64Regs:$a)), // sint -> f32 def : Pat<(f32 (sint_to_fp Int1Regs:$a)), - (CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; + (CVT_f32_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>; def : Pat<(f32 (sint_to_fp Int16Regs:$a)), (CVT_f32_s16 Int16Regs:$a, CvtRN)>; def : Pat<(f32 (sint_to_fp Int32Regs:$a)), @@ -3190,7 +3191,7 @@ def : Pat<(f32 (uint_to_fp Int64Regs:$a)), // sint -> f64 def : Pat<(f64 (sint_to_fp Int1Regs:$a)), - (CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>; + (CVT_f64_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>; def : Pat<(f64 (sint_to_fp Int16Regs:$a)), (CVT_f64_s16 Int16Regs:$a, CvtRN)>; def : Pat<(f64 (sint_to_fp Int32Regs:$a)), diff --git a/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll b/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll index 6920be5cc4e9e3..a0f07afafa4593 100644 --- a/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll +++ b/llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll @@ -2,37 +2,55 @@ ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} ; CHECK-LABEL: foo -; CHECK: setp -; CHECK: selp -; CHECK: cvt.rn.f32.u32 +; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1; +; CHECK: selp.u32 %[[R:r[0-9]+]], 1, 0, %[[P]]; +; CHECK: cvt.rn.f32.u32 %f{{.*}}, %[[R]] define float @foo(i1 %a) { %ret = uitofp i1 %a to float ret float %ret } ; CHECK-LABEL: foo2 -; CHECK: setp -; CHECK: selp -; CHECK: cvt.rn.f32.s32 +; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1; +; CHECK: selp.s32 %[[R:r[0-9]+]], -1, 0, %[[P]]; +; CHECK: cvt.rn.f32.s32 %f{{.*}}, %[[R]] define float @foo2(i1 %a) { %ret = sitofp i1 %a to float ret float %ret } ; CHECK-LABEL: foo3 -; CHECK: setp -; CHECK: selp -; CHECK: cvt.rn.f64.u32 +; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1; +; CHECK: selp.u32 %[[R:r[0-9]+]], 1, 0, %[[P]]; +; CHECK: cvt.rn.f64.u32 %fd{{.*}}, %[[R]] define double @foo3(i1 %a) { %ret = uitofp i1 %a to double ret double %ret } ; CHECK-LABEL: foo4 -; CHECK: setp -; CHECK: selp -; CHECK: cvt.rn.f64.s32 +; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1; +; CHECK: selp.s32 %[[R:r[0-9]+]], -1, 0, %[[P]]; +; CHECK: cvt.rn.f64.s32 %fd{{.*}}, %[[R]] define double @foo4(i1 %a) { %ret = sitofp i1 %a to double ret double %ret } + +; CHECK-LABEL: foo5 +; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1; +; CHECK: selp.u32 %[[R:r[0-9]+]], 1, 0, %[[P]]; +; CHECK: cvt.rn.f16.u32 %{{.*}}, %[[R]] +define half @foo5(i1 %a) { + %ret = uitofp i1 %a to half + ret half %ret +} + +; CHECK-LABEL: foo6 +; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1; +; CHECK: selp.s32 %[[R:r[0-9]+]], -1, 0, %[[P]]; +; CHECK: cvt.rn.f16.s32 %{{.*}}, %[[R]] +define half @foo6(i1 %a) { + %ret = sitofp i1 %a to half + ret half %ret +}