Skip to content

Commit

Permalink
[NVPTX] Fix ISel patterns for i1 sint_to_fp (#110866)
Browse files Browse the repository at this point in the history
NVPTX has ZeroOrNegativeOneBooleanContent, therefore we need to use -1
as the constant for i1 sint_to_fp operations in instruction selection.
  • Loading branch information
LewisCrawford authored Oct 8, 2024
1 parent 5589096 commit cc5ddae
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 16 deletions.
9 changes: 5 additions & 4 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3126,11 +3126,12 @@ foreach ta = [v2f16, v2bf16, v2i16, v4i8, i32] in {

// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
// we cannot specify floating-point literals in isel patterns. Therefore, we
// use an integer selp to select either 1 or 0 and then cvt to floating-point.
// use an integer selp to select either 1 (or -1 in case of signed) or 0
// and then cvt to floating-point.

// sint -> f16
def : Pat<(f16 (sint_to_fp Int1Regs:$a)),
(CVT_f16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
(CVT_f16_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>;
def : Pat<(f16 (sint_to_fp Int16Regs:$a)),
(CVT_f16_s16 Int16Regs:$a, CvtRN)>;
def : Pat<(f16 (sint_to_fp Int32Regs:$a)),
Expand Down Expand Up @@ -3170,7 +3171,7 @@ def : Pat<(bf16 (uint_to_fp Int64Regs:$a)),

// sint -> f32
def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
(CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
(CVT_f32_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>;
def : Pat<(f32 (sint_to_fp Int16Regs:$a)),
(CVT_f32_s16 Int16Regs:$a, CvtRN)>;
def : Pat<(f32 (sint_to_fp Int32Regs:$a)),
Expand All @@ -3190,7 +3191,7 @@ def : Pat<(f32 (uint_to_fp Int64Regs:$a)),

// sint -> f64
def : Pat<(f64 (sint_to_fp Int1Regs:$a)),
(CVT_f64_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
(CVT_f64_s32 (SELP_s32ii -1, 0, Int1Regs:$a), CvtRN)>;
def : Pat<(f64 (sint_to_fp Int16Regs:$a)),
(CVT_f64_s16 Int16Regs:$a, CvtRN)>;
def : Pat<(f64 (sint_to_fp Int32Regs:$a)),
Expand Down
42 changes: 30 additions & 12 deletions llvm/test/CodeGen/NVPTX/i1-int-to-fp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,55 @@
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %}

; CHECK-LABEL: foo
; CHECK: setp
; CHECK: selp
; CHECK: cvt.rn.f32.u32
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: selp.u32 %[[R:r[0-9]+]], 1, 0, %[[P]];
; CHECK: cvt.rn.f32.u32 %f{{.*}}, %[[R]]
define float @foo(i1 %a) {
%ret = uitofp i1 %a to float
ret float %ret
}

; CHECK-LABEL: foo2
; CHECK: setp
; CHECK: selp
; CHECK: cvt.rn.f32.s32
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: selp.s32 %[[R:r[0-9]+]], -1, 0, %[[P]];
; CHECK: cvt.rn.f32.s32 %f{{.*}}, %[[R]]
define float @foo2(i1 %a) {
%ret = sitofp i1 %a to float
ret float %ret
}

; CHECK-LABEL: foo3
; CHECK: setp
; CHECK: selp
; CHECK: cvt.rn.f64.u32
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: selp.u32 %[[R:r[0-9]+]], 1, 0, %[[P]];
; CHECK: cvt.rn.f64.u32 %fd{{.*}}, %[[R]]
define double @foo3(i1 %a) {
%ret = uitofp i1 %a to double
ret double %ret
}

; CHECK-LABEL: foo4
; CHECK: setp
; CHECK: selp
; CHECK: cvt.rn.f64.s32
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: selp.s32 %[[R:r[0-9]+]], -1, 0, %[[P]];
; CHECK: cvt.rn.f64.s32 %fd{{.*}}, %[[R]]
define double @foo4(i1 %a) {
%ret = sitofp i1 %a to double
ret double %ret
}

; CHECK-LABEL: foo5
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: selp.u32 %[[R:r[0-9]+]], 1, 0, %[[P]];
; CHECK: cvt.rn.f16.u32 %{{.*}}, %[[R]]
define half @foo5(i1 %a) {
%ret = uitofp i1 %a to half
ret half %ret
}

; CHECK-LABEL: foo6
; CHECK: setp.eq.b16 %[[P:p[0-9]+]], %{{.*}}, 1;
; CHECK: selp.s32 %[[R:r[0-9]+]], -1, 0, %[[P]];
; CHECK: cvt.rn.f16.s32 %{{.*}}, %[[R]]
define half @foo6(i1 %a) {
%ret = sitofp i1 %a to half
ret half %ret
}

0 comments on commit cc5ddae

Please sign in to comment.