Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -2326,6 +2326,12 @@ class SelectionDAG {
/// +nan are considered positive, -0.0, -inf and -nan are not.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const;

/// Check if a use of a float value is insensitive to signed zeros.
LLVM_ABI bool canIgnoreSignBitOfZero(const SDUse &Use) const;

/// Check if at most two uses of a value are insensitive to signed zeros.
LLVM_ABI bool canIgnoreSignBitOfZero(SDValue Op) const;

/// Test whether two SDValues are known to compare equal. This
/// is true if they are the same value, or if one is negative zero and the
/// other positive zero.
Expand Down
17 changes: 11 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17869,7 +17869,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
if (N1C->isNegative() || Flags.hasNoSignedZeros())
if (N1C->isNegative() || Flags.hasNoSignedZeros() ||
DAG.canIgnoreSignBitOfZero(SDValue(N, 0)))
return N0;

if (SDValue NewSel = foldBinOpIntoSelect(N))
Expand Down Expand Up @@ -18081,7 +18082,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) {
if (!N1CFP->isNegative() || Flags.hasNoSignedZeros() ||
DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
return N0;
}
}
Expand All @@ -18094,7 +18096,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {

// (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() || Flags.hasNoSignedZeros()) {
if (N0CFP->isNegative() || Flags.hasNoSignedZeros() ||
DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) {
// We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
// flushed to zero, unless all users treat denorms as zero (DAZ).
// FIXME: This transform will change the sign of a NaN and the behavior
Expand Down Expand Up @@ -18744,7 +18747,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}

// Fold X/Sqrt(X) -> Sqrt(X)
if (Flags.hasNoSignedZeros() && Flags.hasAllowReassociation())
if ((Flags.hasNoSignedZeros() || DAG.canIgnoreSignBitOfZero(SDValue(N, 0))) &&
Flags.hasAllowReassociation())
if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
return N1;

Expand Down Expand Up @@ -18795,8 +18799,9 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::FDIV, VT) &&
TLI.isOperationLegalOrCustom(ISD::FTRUNC, VT) &&
DAG.isKnownToBeAPowerOfTwoFP(N1)) {
bool NeedsCopySign =
!Flags.hasNoSignedZeros() && !DAG.cannotBeOrderedNegativeFP(N0);
bool NeedsCopySign = !Flags.hasNoSignedZeros() &&
!DAG.canIgnoreSignBitOfZero(SDValue(N, 0)) &&
!DAG.cannotBeOrderedNegativeFP(N0);
SDValue Div = DAG.getNode(ISD::FDIV, DL, VT, N0, N1);
SDValue Rnd = DAG.getNode(ISD::FTRUNC, DL, VT, Div);
SDValue MLA;
Expand Down
40 changes: 40 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6118,6 +6118,46 @@ bool SelectionDAG::cannotBeOrderedNegativeFP(SDValue Op) const {
llvm_unreachable("covered opcode switch");
}

bool SelectionDAG::canIgnoreSignBitOfZero(const SDUse &Use) const {
assert(Use.getValueType().isFloatingPoint());
const SDNode *User = Use.getUser();
unsigned OperandNo = Use.getOperandNo();
// Check if this use is insensitive to the sign of zero
switch (User->getOpcode()) {
case ISD::SETCC:
// Comparisons: IEEE-754 specifies +0.0 == -0.0.
case ISD::FABS:
// fabs always produces +0.0.
return true;
case ISD::FCOPYSIGN:
// copysign overwrites the sign bit of the first operand.
return OperandNo == 0;
case ISD::FADD:
case ISD::FSUB: {
// Arithmetic with non-zero constants fixes the uncertainty around the
// sign bit.
SDValue Other = User->getOperand(1 - OperandNo);
return isKnownNeverZeroFloat(Other);
}
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
// fp-to-int conversions normalize signed zeros.
return true;
default:
return false;
}
}

bool SelectionDAG::canIgnoreSignBitOfZero(SDValue Op) const {
// FIXME: Limit the amount of checked uses to not introduce a compile-time
// regression. Ideally, this should be implemented as a demanded-bits
// optimization that stems from the users.
if (Op->use_size() > 2)
return false;
return all_of(Op->uses(),
[&](const SDUse &Use) { return canIgnoreSignBitOfZero(Use); });
}

bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
// Check the obvious case.
if (A == B) return true;
Expand Down
72 changes: 72 additions & 0 deletions llvm/test/CodeGen/AArch64/ignore-signed-zero.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=aarch64 | FileCheck %s

; Test that nsz constraint can be bypassed when all uses are sign-insensitive.

define i1 @test_fadd_neg_zero_fcmp(float %x) {
; CHECK-LABEL: test_fadd_neg_zero_fcmp:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s1, #1.00000000
; CHECK-NEXT: fcmp s0, s1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%add = fadd float %x, -0.0
%cmp = fcmp oeq float %add, 1.0
ret i1 %cmp
}

define float @test_fsub_zero_fabs(float %x) {
; CHECK-LABEL: test_fsub_zero_fabs:
; CHECK: // %bb.0:
; CHECK-NEXT: fabs s0, s0
; CHECK-NEXT: ret
%sub = fsub float %x, 0.0
%abs = call float @llvm.fabs.f32(float %sub)
ret float %abs
}

define float @test_fsub_neg_zero_copysign(float %x, float %y) {
; CHECK-LABEL: test_fsub_neg_zero_copysign:
; CHECK: // %bb.0:
; CHECK-NEXT: mvni v2.4s, #128, lsl #24
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: ret
%sub = fsub float -0.0, %x
%copysign = call float @llvm.copysign.f32(float %sub, float %y)
ret float %copysign
}

define i1 @test_div_sqrt_fcmp(float %x) {
; CHECK-LABEL: test_div_sqrt_fcmp:
; CHECK: // %bb.0:
; CHECK-NEXT: fsqrt s0, s0
; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: cset w0, gt
; CHECK-NEXT: ret
%sqrt = call float @llvm.sqrt.f32(float %x)
%div = fdiv reassoc float %x, %sqrt
%cmp = fcmp ogt float %div, 0.0
ret i1 %cmp
}

define float @test_frem_fabs(float %x) {
; CHECK-LABEL: test_frem_fabs:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s1, #0.50000000
; CHECK-NEXT: fmov s2, #-2.00000000
; CHECK-NEXT: fmul s1, s0, s1
; CHECK-NEXT: frintz s1, s1
; CHECK-NEXT: fmadd s0, s1, s2, s0
; CHECK-NEXT: fabs s0, s0
; CHECK-NEXT: ret
%rem = frem float %x, 2.0
%abs = call float @llvm.fabs.f32(float %rem)
ret float %abs
}

declare float @llvm.fabs.f32(float)
declare float @llvm.copysign.f32(float, float)
declare float @llvm.sqrt.f32(float)
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ define amdgpu_kernel void @test_fold_canonicalize_fabs_value_f32(ptr addrspace(1
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
%load = load float, ptr addrspace(1) %gep, align 4
%v0 = fadd float %load, 0.0
%v0 = fadd float %load, 1.0
%v = tail call float @llvm.fabs.f32(float %v0)
%canonicalized = tail call float @llvm.canonicalize.f32(float %v)
store float %canonicalized, ptr addrspace(1) %gep, align 4
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/swdev380865.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,13 @@ define amdgpu_kernel void @_Z6kernelILi4000ELi1EEvPd(ptr addrspace(1) %x.coerce)
; CHECK-NEXT: v_mov_b32_e32 v1, s7
; CHECK-NEXT: .LBB0_1: ; %for.cond4.preheader
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], 0
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_mov_b32 s7, 0x40140000
; CHECK-NEXT: s_add_i32 s1, s1, s0
; CHECK-NEXT: s_cmpk_lt_i32 s1, 0xa00
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[6:7]
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_mov_b32 s7, 0x40180000
; CHECK-NEXT: s_add_i32 s1, s1, s0
; CHECK-NEXT: s_cmpk_lt_i32 s1, 0xa00
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[6:7]
; CHECK-NEXT: s_mov_b32 s6, 0
; CHECK-NEXT: s_mov_b32 s7, 0x401c0000
Expand Down
Loading