Skip to content

Commit 5da0445

Browse files
authored
[LV] Consolidate shouldOptimizeForSize and remove unused BFI/PSI. NFC (#168697)
#158690 plans on passing BFI as a lazy lambda to avoid computing BlockFrequencyInfo when not needed. In preparation for that, this PR removes BFI and PSI from some constructors that aren't used. It also consolidates the two calls to llvm::shouldOptimizeForSize so that the result is computed once and passed where needed. This also renames OptForSize in LoopVectorizationLegality to clarify that it's to prevent runtime SCEV checks, see https://reviews.llvm.org/D68082
1 parent 9eee396 commit 5da0445

File tree

3 files changed

+49
-68
lines changed

3 files changed

+49
-68
lines changed

llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -251,18 +251,15 @@ struct HistogramInfo {
251251
/// induction variable and the different reduction variables.
252252
class LoopVectorizationLegality {
253253
public:
254-
LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE,
255-
DominatorTree *DT, TargetTransformInfo *TTI,
256-
TargetLibraryInfo *TLI, Function *F,
257-
LoopAccessInfoManager &LAIs, LoopInfo *LI,
258-
OptimizationRemarkEmitter *ORE,
259-
LoopVectorizationRequirements *R,
260-
LoopVectorizeHints *H, DemandedBits *DB,
261-
AssumptionCache *AC, BlockFrequencyInfo *BFI,
262-
ProfileSummaryInfo *PSI, AAResults *AA)
254+
LoopVectorizationLegality(
255+
Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT,
256+
TargetTransformInfo *TTI, TargetLibraryInfo *TLI, Function *F,
257+
LoopAccessInfoManager &LAIs, LoopInfo *LI, OptimizationRemarkEmitter *ORE,
258+
LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
259+
AssumptionCache *AC, bool AllowRuntimeSCEVChecks, AAResults *AA)
263260
: TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT), LAIs(LAIs),
264-
ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC), BFI(BFI), PSI(PSI),
265-
AA(AA) {}
261+
ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
262+
AllowRuntimeSCEVChecks(AllowRuntimeSCEVChecks), AA(AA) {}
266263

267264
/// ReductionList contains the reduction descriptors for all
268265
/// of the reductions that were found in the loop.
@@ -720,9 +717,8 @@ class LoopVectorizationLegality {
720717
/// Hold potentially faulting loads.
721718
SmallPtrSet<const Instruction *, 4> PotentiallyFaultingLoads;
722719

723-
/// BFI and PSI are used to check for profile guided size optimizations.
724-
BlockFrequencyInfo *BFI;
725-
ProfileSummaryInfo *PSI;
720+
/// Whether or not creating SCEV predicates is allowed.
721+
bool AllowRuntimeSCEVChecks;
726722

727723
// Alias Analysis results used to check for possible aliasing with loads
728724
// used in uncountable exit conditions.

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -460,10 +460,8 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
460460
const auto &Strides =
461461
LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
462462

463-
bool CanAddPredicate = !llvm::shouldOptimizeForSize(
464-
TheLoop->getHeader(), PSI, BFI, PGSOQueryType::IRPass);
465463
int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, *DT, Strides,
466-
CanAddPredicate, false)
464+
AllowRuntimeSCEVChecks, false)
467465
.value_or(0);
468466
if (Stride == 1 || Stride == -1)
469467
return Stride;

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 38 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -501,12 +501,11 @@ class InnerLoopVectorizer {
501501
LoopInfo *LI, DominatorTree *DT,
502502
const TargetTransformInfo *TTI, AssumptionCache *AC,
503503
ElementCount VecWidth, unsigned UnrollFactor,
504-
LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
505-
ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
506-
VPlan &Plan)
504+
LoopVectorizationCostModel *CM,
505+
GeneratedRTChecks &RTChecks, VPlan &Plan)
507506
: OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TTI(TTI), AC(AC),
508507
VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
509-
Cost(CM), BFI(BFI), PSI(PSI), RTChecks(RTChecks), Plan(Plan),
508+
Cost(CM), RTChecks(RTChecks), Plan(Plan),
510509
VectorPHVPBB(cast<VPBasicBlock>(
511510
Plan.getVectorLoopRegion()->getSinglePredecessor())) {}
512511

@@ -583,10 +582,6 @@ class InnerLoopVectorizer {
583582
/// The profitablity analysis.
584583
LoopVectorizationCostModel *Cost;
585584

586-
/// BFI and PSI are used to check for profile guided size optimizations.
587-
BlockFrequencyInfo *BFI;
588-
ProfileSummaryInfo *PSI;
589-
590585
/// Structure to hold information about generated runtime checks, responsible
591586
/// for cleaning the checks, if vectorization turns out unprofitable.
592587
GeneratedRTChecks &RTChecks;
@@ -635,11 +630,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
635630
Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
636631
DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
637632
EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
638-
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
639633
GeneratedRTChecks &Checks, VPlan &Plan, ElementCount VecWidth,
640634
ElementCount MinProfitableTripCount, unsigned UnrollFactor)
641635
: InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, VecWidth,
642-
UnrollFactor, CM, BFI, PSI, Checks, Plan),
636+
UnrollFactor, CM, Checks, Plan),
643637
EPI(EPI), MinProfitableTripCount(MinProfitableTripCount) {}
644638

645639
/// Holds and updates state information required to vectorize the main loop
@@ -665,10 +659,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
665659
AssumptionCache *AC,
666660
EpilogueLoopVectorizationInfo &EPI,
667661
LoopVectorizationCostModel *CM,
668-
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
669662
GeneratedRTChecks &Check, VPlan &Plan)
670663
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
671-
BFI, PSI, Check, Plan, EPI.MainLoopVF,
664+
Check, Plan, EPI.MainLoopVF,
672665
EPI.MainLoopVF, EPI.MainLoopUF) {}
673666
/// Implements the interface for creating a vectorized skeleton using the
674667
/// *main loop* strategy (i.e., the first pass of VPlan execution).
@@ -698,14 +691,15 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
698691
// their epilogues.
699692
class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
700693
public:
701-
EpilogueVectorizerEpilogueLoop(
702-
Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
703-
DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
704-
EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
705-
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
706-
GeneratedRTChecks &Checks, VPlan &Plan)
694+
EpilogueVectorizerEpilogueLoop(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
695+
LoopInfo *LI, DominatorTree *DT,
696+
const TargetTransformInfo *TTI,
697+
AssumptionCache *AC,
698+
EpilogueLoopVectorizationInfo &EPI,
699+
LoopVectorizationCostModel *CM,
700+
GeneratedRTChecks &Checks, VPlan &Plan)
707701
: InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
708-
BFI, PSI, Checks, Plan, EPI.EpilogueVF,
702+
Checks, Plan, EPI.EpilogueVF,
709703
EPI.EpilogueVF, EPI.EpilogueUF) {}
710704
/// Implements the interface for creating a vectorized skeleton using the
711705
/// *epilogue loop* strategy (i.e., the second pass of VPlan execution).
@@ -881,18 +875,13 @@ class LoopVectorizationCostModel {
881875
AssumptionCache *AC,
882876
OptimizationRemarkEmitter *ORE, const Function *F,
883877
const LoopVectorizeHints *Hints,
884-
InterleavedAccessInfo &IAI,
885-
ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
878+
InterleavedAccessInfo &IAI, bool OptForSize)
886879
: ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
887880
TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
888-
Hints(Hints), InterleaveInfo(IAI) {
881+
Hints(Hints), InterleaveInfo(IAI), OptForSize(OptForSize) {
889882
if (TTI.supportsScalableVectors() || ForceTargetSupportsScalableVectors)
890883
initializeVScaleForTuning();
891884
CostKind = F->hasMinSize() ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput;
892-
// Query this against the original loop and save it here because the profile
893-
// of the original loop header may change as the transformation happens.
894-
OptForSize = llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
895-
PGSOQueryType::IRPass);
896885
}
897886

898887
/// \return An upper bound for the vectorization factors (both fixed and
@@ -9076,20 +9065,13 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
90769065
// predication, and 4) a TTI hook that analyses whether the loop is suitable
90779066
// for predication.
90789067
static ScalarEpilogueLowering getScalarEpilogueLowering(
9079-
Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI,
9080-
BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
9068+
Function *F, Loop *L, LoopVectorizeHints &Hints, bool OptForSize,
9069+
TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
90819070
LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) {
90829071
// 1) OptSize takes precedence over all other options, i.e. if this is set,
90839072
// don't look at hints or options, and don't request a scalar epilogue.
9084-
// (For PGSO, as shouldOptimizeForSize isn't currently accessible from
9085-
// LoopAccessInfo (due to code dependency and not being able to reliably get
9086-
// PSI/BFI from a loop analysis under NPM), we cannot suppress the collection
9087-
// of strides in LoopAccessInfo::analyzeLoop() and vectorize without
9088-
// versioning when the vectorization is forced, unlike hasOptSize. So revert
9089-
// back to the old way and vectorize with versioning when forced. See D81345.)
9090-
if (F->hasOptSize() || (llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
9091-
PGSOQueryType::IRPass) &&
9092-
Hints.getForce() != LoopVectorizeHints::FK_Enabled))
9073+
if (F->hasOptSize() ||
9074+
(OptForSize && Hints.getForce() != LoopVectorizeHints::FK_Enabled))
90939075
return CM_ScalarEpilogueNotAllowedOptSize;
90949076

90959077
// 2) If set, obey the directives
@@ -9128,8 +9110,7 @@ static bool processLoopInVPlanNativePath(
91289110
Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
91299111
LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
91309112
TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
9131-
OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
9132-
ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints,
9113+
OptimizationRemarkEmitter *ORE, bool OptForSize, LoopVectorizeHints &Hints,
91339114
LoopVectorizationRequirements &Requirements) {
91349115

91359116
if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
@@ -9141,10 +9122,10 @@ static bool processLoopInVPlanNativePath(
91419122
InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI());
91429123

91439124
ScalarEpilogueLowering SEL =
9144-
getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, *LVL, &IAI);
9125+
getScalarEpilogueLowering(F, L, Hints, OptForSize, TTI, TLI, *LVL, &IAI);
91459126

91469127
LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
9147-
&Hints, IAI, PSI, BFI);
9128+
&Hints, IAI, OptForSize);
91489129
// Use the planner for outer loop vectorization.
91499130
// TODO: CM is not used at this point inside the planner. Turn CM into an
91509131
// optional argument if we don't need it in the future.
@@ -9170,7 +9151,7 @@ static bool processLoopInVPlanNativePath(
91709151
{
91719152
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
91729153
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, /*UF=*/1, &CM,
9173-
BFI, PSI, Checks, BestPlan);
9154+
Checks, BestPlan);
91749155
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
91759156
<< L->getHeader()->getParent()->getName() << "\"\n");
91769157
LVP.addMinimumIterationCheck(BestPlan, VF.Width, /*UF=*/1,
@@ -9831,10 +9812,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98319812

98329813
PredicatedScalarEvolution PSE(*SE, *L);
98339814

9815+
// Query this against the original loop and save it here because the profile
9816+
// of the original loop header may change as the transformation happens.
9817+
bool OptForSize = llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
9818+
PGSOQueryType::IRPass);
9819+
98349820
// Check if it is legal to vectorize the loop.
98359821
LoopVectorizationRequirements Requirements;
98369822
LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, F, *LAIs, LI, ORE,
9837-
&Requirements, &Hints, DB, AC, BFI, PSI, AA);
9823+
&Requirements, &Hints, DB, AC,
9824+
/*AllowRuntimeSCEVChecks=*/!OptForSize, AA);
98389825
if (!LVL.canVectorize(EnableVPlanNativePath)) {
98399826
LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
98409827
Hints.emitRemarkWithHints();
@@ -9862,7 +9849,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98629849
// pipeline.
98639850
if (!L->isInnermost())
98649851
return processLoopInVPlanNativePath(L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
9865-
ORE, BFI, PSI, Hints, Requirements);
9852+
ORE, OptForSize, Hints, Requirements);
98669853

98679854
assert(L->isInnermost() && "Inner loop expected.");
98689855

@@ -9892,7 +9879,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98929879
// Check the function attributes and profiles to find out if this function
98939880
// should be optimized for size.
98949881
ScalarEpilogueLowering SEL =
9895-
getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, LVL, &IAI);
9882+
getScalarEpilogueLowering(F, L, Hints, OptForSize, TTI, TLI, LVL, &IAI);
98969883

98979884
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
98989885
// count by optimizing for size, to minimize overheads.
@@ -9965,7 +9952,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
99659952

99669953
// Use the cost model.
99679954
LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
9968-
F, &Hints, IAI, PSI, BFI);
9955+
F, &Hints, IAI, OptForSize);
99699956
// Use the planner for vectorization.
99709957
LoopVectorizationPlanner LVP(L, LI, DT, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
99719958
ORE);
@@ -10167,16 +10154,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1016710154
preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan);
1016810155
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1,
1016910156
BestEpiPlan);
10170-
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM, BFI,
10171-
PSI, Checks, *BestMainPlan);
10157+
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
10158+
Checks, *BestMainPlan);
1017210159
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
1017310160
*BestMainPlan, MainILV, DT, false);
1017410161
++LoopsVectorized;
1017510162

1017610163
// Second pass vectorizes the epilogue and adjusts the control flow
1017710164
// edges from the first pass.
1017810165
EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
10179-
BFI, PSI, Checks, BestEpiPlan);
10166+
Checks, BestEpiPlan);
1018010167
SmallVector<Instruction *> InstsToMove = preparePlanForEpilogueVectorLoop(
1018110168
BestEpiPlan, L, ExpandedSCEVs, EPI, CM, *PSE.getSE());
1018210169
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, DT,
@@ -10185,8 +10172,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1018510172
Checks, InstsToMove);
1018610173
++LoopsEpilogueVectorized;
1018710174
} else {
10188-
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, BFI, PSI,
10189-
Checks, BestPlan);
10175+
InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, IC, &CM, Checks,
10176+
BestPlan);
1019010177
// TODO: Move to general VPlan pipeline once epilogue loops are also
1019110178
// supported.
1019210179
VPlanTransforms::runPass(

0 commit comments

Comments
 (0)