Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -2322,6 +2322,10 @@ class SelectionDAG {
/// +nan are considered positive, -0.0, -inf and -nan are not.
LLVM_ABI bool cannotBeOrderedNegativeFP(SDValue Op) const;

/// Check if all uses of a floating-point value are insensitive to signed
/// zeros.
LLVM_ABI bool allUsesSignedZeroInsensitive(SDValue Op) const;

/// Test whether two SDValues are known to compare equal. This
/// is true if they are the same value, or if one is negative zero and the
/// other positive zero.
Expand Down
95 changes: 73 additions & 22 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
Expand Down Expand Up @@ -18873,6 +18874,8 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
const TargetLowering &TLI) {
// We can fold the fpto[us]i -> [us]itofp pattern into a single ftrunc.
// Additionally, if there are clamps ([us]min or [us]max) around
// the fpto[us]i, we can fold those into fminnum/fmaxnum around the ftrunc.
// If NoSignedZerosFPMath is enabled, this is a direct replacement.
// Otherwise, for strict math, we must handle edge cases:
// 1. For unsigned conversions, use FABS to handle negative cases. Take -0.0
Expand All @@ -18884,28 +18887,69 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
return SDValue();

// fptosi/fptoui round towards zero, so converting from FP to integer and
// back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
SDValue N0 = N->getOperand(0);
if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
N0.getOperand(0).getValueType() == VT) {
if (DAG.getTarget().Options.NoSignedZerosFPMath)
return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
}
bool IsUnsigned = N->getOpcode() == ISD::UINT_TO_FP;
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
assert(IsSigned || IsUnsigned);

if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
N0.getOperand(0).getValueType() == VT) {
if (DAG.getTarget().Options.NoSignedZerosFPMath)
return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
DAG.allUsesSignedZeroInsensitive(SDValue(N, 0));
// For signed conversions: The optimization changes signed zero behavior.
if (IsSigned && !IsSignedZeroSafe)
return SDValue();
// For unsigned conversions, we need FABS to canonicalize -0.0 to +0.0
// (unless outputting a signed zero is OK).
if (IsUnsigned && !IsSignedZeroSafe && !TLI.isFAbsFree(VT))
return SDValue();

// Strict math: use FABS to handle negative inputs correctly.
if (TLI.isFAbsFree(VT)) {
SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::FTRUNC, DL, VT, Abs);
}
// Collect potential clamp operations (innermost to outermost) and peel.
struct ClampOp {
unsigned Opcode;
SDValue Constant;
};
SmallVector<ClampOp, 2> Clamps;
unsigned MinOp = IsUnsigned ? ISD::UMIN : ISD::SMIN;
unsigned MaxOp = IsUnsigned ? ISD::UMAX : ISD::SMAX;
SDValue IntVal = N->getOperand(0);
constexpr unsigned MaxClampLevels = 2;
for (unsigned Level = 0; Level < MaxClampLevels; ++Level) {
if (!IntVal.hasOneUse() ||
(IntVal.getOpcode() != MinOp && IntVal.getOpcode() != MaxOp))
break;
unsigned FPClampOp =
(IntVal.getOpcode() == MinOp) ? ISD::FMINNUM : ISD::FMAXNUM;
if (!TLI.isOperationLegal(FPClampOp, VT))
return SDValue();
auto *IntConstNode = dyn_cast<ConstantSDNode>(IntVal.getOperand(1));
if (!IntConstNode)
return SDValue();
APFloat FPConst(VT.getFltSemantics());
APInt IntConst = IntConstNode->getAPIntValue();
FPConst.convertFromAPInt(IntConst, IsSigned, APFloat::rmNearestTiesToEven);
// Verify roundtrip exactness.
APSInt RoundTrip(IntConst.getBitWidth(), IsUnsigned);
bool IsExact;
if (FPConst.convertToInteger(RoundTrip, APFloat::rmTowardZero, &IsExact) !=
APFloat::opOK ||
!IsExact || static_cast<const APInt &>(RoundTrip) != IntConst)
return SDValue();
Clamps.push_back({FPClampOp, DAG.getConstantFP(FPConst, DL, VT)});
IntVal = IntVal.getOperand(0);
}

return SDValue();
// Check that the sequence ends with a FPTo[us]i of the right type.
unsigned FPToIntOp = IsUnsigned ? ISD::FP_TO_UINT : ISD::FP_TO_SINT;
if (IntVal.getOpcode() != FPToIntOp ||
IntVal.getOperand(0).getValueType() != VT)
return SDValue();

SDValue Result = IntVal.getOperand(0);
if (IsUnsigned && !IsSignedZeroSafe && TLI.isFAbsFree(VT))
Result = DAG.getNode(ISD::FABS, DL, VT, Result);
Result = DAG.getNode(ISD::FTRUNC, DL, VT, Result);
// Apply clamps, if any, in reverse order (innermost first).
for (auto I = Clamps.rbegin(), E = Clamps.rend(); I != E; ++I)
Result = DAG.getNode(I->Opcode, DL, VT, Result, I->Constant);
return Result;
}

SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
Expand Down Expand Up @@ -19332,10 +19376,17 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
// know it was called from a context with a nsz flag if the input fsub does
// not.
if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
N0.hasOneUse()) {
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
N0.getOperand(0));
if (N0.getOpcode() == ISD::FSUB && N0.hasOneUse()) {
SDValue X = N0.getOperand(0);
SDValue Y = N0.getOperand(1);

// Safe if NoSignedZeros, or if we can prove X != Y (avoiding the -0.0 vs
// +0.0 issue) For now, we use a conservative check: if either operand is
// known never zero, then X - Y can't produce a signed zero from X == Y.
if (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(X) ||
DAG.isKnownNeverZeroFloat(Y)) {
return DAG.getNode(ISD::FSUB, SDLoc(N), VT, Y, X);
}
}

if (SimplifyDemandedBits(SDValue(N, 0)))
Expand Down
29 changes: 29 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6075,6 +6075,35 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
Op, [](ConstantFPSDNode *C) { return !C->isZero(); });
}

bool SelectionDAG::allUsesSignedZeroInsensitive(SDValue Op) const {
assert(Op.getValueType().isFloatingPoint());
return all_of(Op->uses(), [&](SDUse &Use) {
SDNode *User = Use.getUser();
unsigned OperandNo = Use.getOperandNo();

// Check if this use is insensitive to the sign of zero
switch (User->getOpcode()) {
case ISD::SETCC:
// Comparisons: IEEE-754 specifies +0.0 == -0.0.
case ISD::FABS:
// fabs always produces +0.0.
return true;
case ISD::FCOPYSIGN:
// copysign overwrites the sign bit of the first operand.
return OperandNo == 0;
case ISD::FADD:
case ISD::FSUB: {
// Arithmetic with non-zero constants fixes the uncertainty around the
// sign bit.
SDValue Other = User->getOperand(1 - OperandNo);
return isKnownNeverZeroFloat(Other);
}
default:
return false;
}
});
}

bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
Expand Down
Loading