@@ -501,12 +501,11 @@ class InnerLoopVectorizer {
501501 LoopInfo *LI, DominatorTree *DT,
502502 const TargetTransformInfo *TTI, AssumptionCache *AC,
503503 ElementCount VecWidth, unsigned UnrollFactor,
504- LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
505- ProfileSummaryInfo *PSI, GeneratedRTChecks &RTChecks,
506- VPlan &Plan)
504+ LoopVectorizationCostModel *CM,
505+ GeneratedRTChecks &RTChecks, VPlan &Plan)
507506 : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TTI(TTI), AC(AC),
508507 VF (VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()),
509- Cost(CM), BFI(BFI), PSI(PSI), RTChecks(RTChecks), Plan(Plan),
508+ Cost(CM), RTChecks(RTChecks), Plan(Plan),
510509 VectorPHVPBB(cast<VPBasicBlock>(
511510 Plan.getVectorLoopRegion()->getSinglePredecessor())) {}
512511
@@ -583,10 +582,6 @@ class InnerLoopVectorizer {
583582 // / The profitablity analysis.
584583 LoopVectorizationCostModel *Cost;
585584
586- // / BFI and PSI are used to check for profile guided size optimizations.
587- BlockFrequencyInfo *BFI;
588- ProfileSummaryInfo *PSI;
589-
590585 // / Structure to hold information about generated runtime checks, responsible
591586 // / for cleaning the checks, if vectorization turns out unprofitable.
592587 GeneratedRTChecks &RTChecks;
@@ -635,11 +630,10 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
635630 Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
636631 DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
637632 EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
638- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
639633 GeneratedRTChecks &Checks, VPlan &Plan, ElementCount VecWidth,
640634 ElementCount MinProfitableTripCount, unsigned UnrollFactor)
641635 : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, VecWidth,
642- UnrollFactor, CM, BFI, PSI, Checks, Plan),
636+ UnrollFactor, CM, Checks, Plan),
643637 EPI (EPI), MinProfitableTripCount(MinProfitableTripCount) {}
644638
645639 // / Holds and updates state information required to vectorize the main loop
@@ -665,10 +659,9 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
665659 AssumptionCache *AC,
666660 EpilogueLoopVectorizationInfo &EPI,
667661 LoopVectorizationCostModel *CM,
668- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
669662 GeneratedRTChecks &Check, VPlan &Plan)
670663 : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
671- BFI, PSI, Check, Plan, EPI.MainLoopVF,
664+ Check, Plan, EPI.MainLoopVF,
672665 EPI.MainLoopVF, EPI.MainLoopUF) {}
673666 // / Implements the interface for creating a vectorized skeleton using the
674667 // / *main loop* strategy (i.e., the first pass of VPlan execution).
@@ -698,14 +691,15 @@ class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
698691// their epilogues.
699692class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
700693public:
701- EpilogueVectorizerEpilogueLoop (
702- Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
703- DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
704- EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
705- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
706- GeneratedRTChecks &Checks, VPlan &Plan)
694+ EpilogueVectorizerEpilogueLoop (Loop *OrigLoop, PredicatedScalarEvolution &PSE,
695+ LoopInfo *LI, DominatorTree *DT,
696+ const TargetTransformInfo *TTI,
697+ AssumptionCache *AC,
698+ EpilogueLoopVectorizationInfo &EPI,
699+ LoopVectorizationCostModel *CM,
700+ GeneratedRTChecks &Checks, VPlan &Plan)
707701 : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
708- BFI, PSI, Checks, Plan, EPI.EpilogueVF,
702+ Checks, Plan, EPI.EpilogueVF,
709703 EPI.EpilogueVF, EPI.EpilogueUF) {}
710704 // / Implements the interface for creating a vectorized skeleton using the
711705 // / *epilogue loop* strategy (i.e., the second pass of VPlan execution).
@@ -881,18 +875,13 @@ class LoopVectorizationCostModel {
881875 AssumptionCache *AC,
882876 OptimizationRemarkEmitter *ORE, const Function *F,
883877 const LoopVectorizeHints *Hints,
884- InterleavedAccessInfo &IAI,
885- ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
878+ InterleavedAccessInfo &IAI, bool OptForSize)
886879 : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
887880 TTI (TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
888- Hints(Hints), InterleaveInfo(IAI) {
881+ Hints(Hints), InterleaveInfo(IAI), OptForSize(OptForSize) {
889882 if (TTI.supportsScalableVectors () || ForceTargetSupportsScalableVectors)
890883 initializeVScaleForTuning ();
891884 CostKind = F->hasMinSize () ? TTI::TCK_CodeSize : TTI::TCK_RecipThroughput;
892- // Query this against the original loop and save it here because the profile
893- // of the original loop header may change as the transformation happens.
894- OptForSize = llvm::shouldOptimizeForSize (L->getHeader (), PSI, BFI,
895- PGSOQueryType::IRPass);
896885 }
897886
898887 // / \return An upper bound for the vectorization factors (both fixed and
@@ -9076,20 +9065,13 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
90769065// predication, and 4) a TTI hook that analyses whether the loop is suitable
90779066// for predication.
90789067static ScalarEpilogueLowering getScalarEpilogueLowering (
9079- Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI ,
9080- BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
9068+ Function *F, Loop *L, LoopVectorizeHints &Hints, bool OptForSize ,
9069+ TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
90819070 LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) {
90829071 // 1) OptSize takes precedence over all other options, i.e. if this is set,
90839072 // don't look at hints or options, and don't request a scalar epilogue.
9084- // (For PGSO, as shouldOptimizeForSize isn't currently accessible from
9085- // LoopAccessInfo (due to code dependency and not being able to reliably get
9086- // PSI/BFI from a loop analysis under NPM), we cannot suppress the collection
9087- // of strides in LoopAccessInfo::analyzeLoop() and vectorize without
9088- // versioning when the vectorization is forced, unlike hasOptSize. So revert
9089- // back to the old way and vectorize with versioning when forced. See D81345.)
9090- if (F->hasOptSize () || (llvm::shouldOptimizeForSize (L->getHeader (), PSI, BFI,
9091- PGSOQueryType::IRPass) &&
9092- Hints.getForce () != LoopVectorizeHints::FK_Enabled))
9073+ if (F->hasOptSize () ||
9074+ (OptForSize && Hints.getForce () != LoopVectorizeHints::FK_Enabled))
90939075 return CM_ScalarEpilogueNotAllowedOptSize;
90949076
90959077 // 2) If set, obey the directives
@@ -9128,8 +9110,7 @@ static bool processLoopInVPlanNativePath(
91289110 Loop *L, PredicatedScalarEvolution &PSE, LoopInfo *LI, DominatorTree *DT,
91299111 LoopVectorizationLegality *LVL, TargetTransformInfo *TTI,
91309112 TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC,
9131- OptimizationRemarkEmitter *ORE, BlockFrequencyInfo *BFI,
9132- ProfileSummaryInfo *PSI, LoopVectorizeHints &Hints,
9113+ OptimizationRemarkEmitter *ORE, bool OptForSize, LoopVectorizeHints &Hints,
91339114 LoopVectorizationRequirements &Requirements) {
91349115
91359116 if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount ())) {
@@ -9141,10 +9122,10 @@ static bool processLoopInVPlanNativePath(
91419122 InterleavedAccessInfo IAI (PSE, L, DT, LI, LVL->getLAI ());
91429123
91439124 ScalarEpilogueLowering SEL =
9144- getScalarEpilogueLowering (F, L, Hints, PSI, BFI , TTI, TLI, *LVL, &IAI);
9125+ getScalarEpilogueLowering (F, L, Hints, OptForSize , TTI, TLI, *LVL, &IAI);
91459126
91469127 LoopVectorizationCostModel CM (SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
9147- &Hints, IAI, PSI, BFI );
9128+ &Hints, IAI, OptForSize );
91489129 // Use the planner for outer loop vectorization.
91499130 // TODO: CM is not used at this point inside the planner. Turn CM into an
91509131 // optional argument if we don't need it in the future.
@@ -9170,7 +9151,7 @@ static bool processLoopInVPlanNativePath(
91709151 {
91719152 GeneratedRTChecks Checks (PSE, DT, LI, TTI, F->getDataLayout (), CM.CostKind );
91729153 InnerLoopVectorizer LB (L, PSE, LI, DT, TTI, AC, VF.Width , /* UF=*/ 1 , &CM,
9173- BFI, PSI, Checks, BestPlan);
9154+ Checks, BestPlan);
91749155 LLVM_DEBUG (dbgs () << " Vectorizing outer loop in \" "
91759156 << L->getHeader ()->getParent ()->getName () << " \"\n " );
91769157 LVP.addMinimumIterationCheck (BestPlan, VF.Width , /* UF=*/ 1 ,
@@ -9831,10 +9812,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98319812
98329813 PredicatedScalarEvolution PSE (*SE, *L);
98339814
9815+ // Query this against the original loop and save it here because the profile
9816+ // of the original loop header may change as the transformation happens.
9817+ bool OptForSize = llvm::shouldOptimizeForSize (L->getHeader (), PSI, BFI,
9818+ PGSOQueryType::IRPass);
9819+
98349820 // Check if it is legal to vectorize the loop.
98359821 LoopVectorizationRequirements Requirements;
98369822 LoopVectorizationLegality LVL (L, PSE, DT, TTI, TLI, F, *LAIs, LI, ORE,
9837- &Requirements, &Hints, DB, AC, BFI, PSI, AA);
9823+ &Requirements, &Hints, DB, AC,
9824+ /* AllowRuntimeSCEVChecks=*/ !OptForSize, AA);
98389825 if (!LVL.canVectorize (EnableVPlanNativePath)) {
98399826 LLVM_DEBUG (dbgs () << " LV: Not vectorizing: Cannot prove legality.\n " );
98409827 Hints.emitRemarkWithHints ();
@@ -9862,7 +9849,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98629849 // pipeline.
98639850 if (!L->isInnermost ())
98649851 return processLoopInVPlanNativePath (L, PSE, LI, DT, &LVL, TTI, TLI, DB, AC,
9865- ORE, BFI, PSI , Hints, Requirements);
9852+ ORE, OptForSize , Hints, Requirements);
98669853
98679854 assert (L->isInnermost () && " Inner loop expected." );
98689855
@@ -9892,7 +9879,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98929879 // Check the function attributes and profiles to find out if this function
98939880 // should be optimized for size.
98949881 ScalarEpilogueLowering SEL =
9895- getScalarEpilogueLowering (F, L, Hints, PSI, BFI , TTI, TLI, LVL, &IAI);
9882+ getScalarEpilogueLowering (F, L, Hints, OptForSize , TTI, TLI, LVL, &IAI);
98969883
98979884 // Check the loop for a trip count threshold: vectorize loops with a tiny trip
98989885 // count by optimizing for size, to minimize overheads.
@@ -9965,7 +9952,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
99659952
99669953 // Use the cost model.
99679954 LoopVectorizationCostModel CM (SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
9968- F, &Hints, IAI, PSI, BFI );
9955+ F, &Hints, IAI, OptForSize );
99699956 // Use the planner for vectorization.
99709957 LoopVectorizationPlanner LVP (L, LI, DT, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
99719958 ORE);
@@ -10167,16 +10154,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1016710154 preparePlanForMainVectorLoop (*BestMainPlan, BestEpiPlan);
1016810155 EpilogueLoopVectorizationInfo EPI (VF.Width , IC, EpilogueVF.Width , 1 ,
1016910156 BestEpiPlan);
10170- EpilogueVectorizerMainLoop MainILV (L, PSE, LI, DT, TTI, AC, EPI, &CM, BFI,
10171- PSI, Checks, *BestMainPlan);
10157+ EpilogueVectorizerMainLoop MainILV (L, PSE, LI, DT, TTI, AC, EPI, &CM,
10158+ Checks, *BestMainPlan);
1017210159 auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
1017310160 *BestMainPlan, MainILV, DT, false );
1017410161 ++LoopsVectorized;
1017510162
1017610163 // Second pass vectorizes the epilogue and adjusts the control flow
1017710164 // edges from the first pass.
1017810165 EpilogueVectorizerEpilogueLoop EpilogILV (L, PSE, LI, DT, TTI, AC, EPI, &CM,
10179- BFI, PSI, Checks, BestEpiPlan);
10166+ Checks, BestEpiPlan);
1018010167 SmallVector<Instruction *> InstsToMove = preparePlanForEpilogueVectorLoop (
1018110168 BestEpiPlan, L, ExpandedSCEVs, EPI, CM, *PSE.getSE ());
1018210169 LVP.executePlan (EPI.EpilogueVF , EPI.EpilogueUF , BestEpiPlan, EpilogILV, DT,
@@ -10185,8 +10172,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1018510172 Checks, InstsToMove);
1018610173 ++LoopsEpilogueVectorized;
1018710174 } else {
10188- InnerLoopVectorizer LB (L, PSE, LI, DT, TTI, AC, VF.Width , IC, &CM, BFI, PSI ,
10189- Checks, BestPlan);
10175+ InnerLoopVectorizer LB (L, PSE, LI, DT, TTI, AC, VF.Width , IC, &CM, Checks ,
10176+ BestPlan);
1019010177 // TODO: Move to general VPlan pipeline once epilogue loops are also
1019110178 // supported.
1019210179 VPlanTransforms::runPass (
0 commit comments