47#define DEBUG_TYPE "select-optimize"
50 "Number of select groups considered for conversion to branch");
52 "Number of select groups converted due to expensive cold operand");
54 "Number of select groups converted due to high-predictability");
56 "Number of select groups not converted due to unpredictability");
58 "Number of select groups not converted due to cold basic block");
60 "Number of select groups converted due to loop-level analysis");
61STATISTIC(NumSelectsConverted,
"Number of selects converted");
68 "cold-operand-threshold",
69 cl::desc(
"Maximum frequency of path for an operand to be considered cold."),
73 "cold-operand-max-cost-multiplier",
74 cl::desc(
"Maximum cost multiplier of TCC_expensive for the dependence "
75 "slice of a cold operand to be considered inexpensive."),
80 cl::desc(
"Gradient gain threshold (%)."),
85 cl::desc(
"Minimum gain per loop (in cycles) threshold."),
89 "select-opti-loop-relative-gain-threshold",
91 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
96 cl::desc(
"Default mispredict rate (initialized to 25%)."));
101 cl::desc(
"Disable loop-level heuristics."));
105class SelectOptimizeImpl {
117 SelectOptimizeImpl() =
default;
122 using Scaled64 = ScaledNumber<uint64_t>;
128 Scaled64 NonPredCost;
139 bool Inverted =
false;
146 SelectLike(Instruction *I,
bool Inverted =
false,
unsigned CondIdx = 0)
147 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
154 unsigned getConditionOpIndex() {
return CondIdx; };
160 Value *getTrueValue(
bool HonorInverts =
true)
const {
161 if (Inverted && HonorInverts)
162 return getFalseValue(
false);
164 return Sel->getTrueValue();
176 Value *getFalseValue(
bool HonorInverts =
true)
const {
177 if (Inverted && HonorInverts)
178 return getTrueValue(
false);
180 return Sel->getFalseValue();
185 return BO->getOperand(1 - CondIdx);
193 Scaled64 getOpCostOnBranch(
194 bool IsTrue,
const DenseMap<const Instruction *, CostInfo> &InstCostMap,
195 const TargetTransformInfo *
TTI) {
196 auto *
V = IsTrue ? getTrueValue() : getFalseValue();
199 auto It = InstCostMap.
find(
IV);
200 return It != InstCostMap.
end() ? It->second.NonPredCost
211 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
212 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
215 auto It = InstCostMap.find(OpI);
216 if (It != InstCostMap.end())
217 TotalCost += It->second.NonPredCost;
231 using SelectGroups = SmallVector<SelectGroup, 2>;
235 bool optimizeSelects(Function &
F);
241 void optimizeSelectsBase(Function &
F, SelectGroups &ProfSIGroups);
242 void optimizeSelectsInnerLoops(Function &
F, SelectGroups &ProfSIGroups);
246 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
249 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
253 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
254 SelectGroups &ProfSIGroups);
255 void findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups,
256 SelectGroups &ProfSIGroups);
260 bool isConvertToBranchProfitableBase(
const SelectGroup &ASI);
265 bool hasExpensiveColdOperand(
const SelectGroup &ASI);
270 void getExclBackwardsSlice(Instruction *
I, std::stack<Instruction *> &Slice,
271 Instruction *SI,
bool ForSinking =
false);
274 bool isSelectHighlyPredictable(
const SelectLike SI);
278 bool checkLoopHeuristics(
const Loop *L,
const CostInfo LoopDepth[2]);
282 bool computeLoopCosts(
const Loop *L,
const SelectGroups &SIGroups,
283 DenseMap<const Instruction *, CostInfo> &InstCostMap,
287 SmallDenseMap<const Instruction *, SelectLike, 2>
288 getSImap(
const SelectGroups &SIGroups);
292 SmallDenseMap<const Instruction *, const SelectGroup *, 2>
293 getSGmap(
const SelectGroups &SIGroups);
296 std::optional<uint64_t> computeInstCost(
const Instruction *
I);
299 Scaled64 getMispredictionCost(
const SelectLike SI,
const Scaled64 CondCost);
302 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
303 const SelectLike SI);
306 bool isSelectKindSupported(
const SelectLike SI);
310 SelectOptimizeImpl Impl;
315 SelectOptimize() : FunctionPass(ID) {}
321 return Impl.runOnFunction(
F, *
this);
324 void getAnalysisUsage(AnalysisUsage &AU)
const override {
330 AU.
addRequired<OptimizationRemarkEmitterWrapperPass>();
338 SelectOptimizeImpl Impl(TM);
339 return Impl.run(
F,
FAM);
342char SelectOptimize::ID = 0;
371 if (!
TTI->enableSelectOptimize())
375 .getCachedResult<ProfileSummaryAnalysis>(*
F.getParent());
378 "analysis to be available");
387 TSchedModel.
init(TSI);
408 if (!
TTI->enableSelectOptimize())
415 TSchedModel.
init(TSI);
421 return optimizeSelects(
F);
424bool SelectOptimizeImpl::optimizeSelects(
Function &
F) {
426 SelectGroups ProfSIGroups;
428 optimizeSelectsBase(
F, ProfSIGroups);
430 optimizeSelectsInnerLoops(
F, ProfSIGroups);
434 convertProfitableSIGroups(ProfSIGroups);
437 return !ProfSIGroups.empty();
440void SelectOptimizeImpl::optimizeSelectsBase(
Function &
F,
441 SelectGroups &ProfSIGroups) {
443 SelectGroups SIGroups;
447 if (L &&
L->isInnermost())
449 collectSelectGroups(BB, SIGroups);
453 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
456void SelectOptimizeImpl::optimizeSelectsInnerLoops(
Function &
F,
457 SelectGroups &ProfSIGroups) {
460 for (
unsigned long i = 0; i <
Loops.size(); ++i)
464 if (!
L->isInnermost())
467 SelectGroups SIGroups;
469 collectSelectGroups(*BB, SIGroups);
471 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
487 SelectOptimizeImpl::SelectLike &
SI,
bool isTrue,
490 Value *V = isTrue ?
SI.getTrueValue() :
SI.getFalseValue();
493 if (
auto It = OptSelects.find(
IV); It != OptSelects.end())
494 return isTrue ? It->second.first : It->second.second;
499 assert((BO->getOpcode() == Instruction::Add ||
500 BO->getOpcode() == Instruction::Or ||
501 BO->getOpcode() == Instruction::Sub) &&
502 "Only currently handling Add, Or and Sub binary operators.");
504 auto *CBO = BO->clone();
505 auto CondIdx =
SI.getConditionOpIndex();
508 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
511 "Unexpected opcode");
515 unsigned OtherIdx = 1 - CondIdx;
517 if (
auto It = OptSelects.find(
IV); It != OptSelects.end())
518 CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second);
520 CBO->insertBefore(
B->getTerminator()->getIterator());
524void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
525 for (SelectGroup &ASI : ProfSIGroups) {
565 typedef std::stack<Instruction *>::size_type StackSizeType;
566 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
568 bool SelectWithProfileIsInverted =
false;
569 for (SelectLike &
SI : ASI.Selects) {
575 std::stack<Instruction *> TrueSlice;
576 getExclBackwardsSlice(TI, TrueSlice,
SI.getI(),
true);
577 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
582 std::stack<Instruction *> FalseSlice;
583 getExclBackwardsSlice(FI, FalseSlice,
SI.getI(),
true);
584 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
591 if (
hasProfMD(*
SI.getI()) && ASI.Condition == SelectCondition) {
592 SelectWithProfile =
SI.getI();
595 SelectWithProfile =
SI.getI();
596 SelectWithProfileIsInverted =
true;
610 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
611 for (
auto &S : TrueSlices) {
613 TrueSlicesInterleaved.
push_back(S.top());
618 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
619 for (
auto &S : FalseSlices) {
621 FalseSlicesInterleaved.
push_back(S.top());
628 SelectLike &
SI = ASI.Selects.front();
629 SelectLike &LastSI = ASI.Selects.back();
637 SplitPt.setHeadBit(
true);
646 auto DIt =
SI.getI()->getIterator();
647 auto NIt = ASI.Selects.begin();
648 while (&*DIt != LastSI.getI()) {
649 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
656 for (
auto *DI : SinkInstrs)
657 DI->moveBeforePreserving(InsertionPoint);
674 std::next(LastSI.getI()->getIterator()));
679 BasicBlock *TrueBlock =
nullptr, *FalseBlock =
nullptr;
680 UncondBrInst *TrueBranch =
nullptr, *FalseBranch =
nullptr;
682 auto HasSelectLike = [](SelectGroup &SG,
bool IsTrue) {
683 for (
auto &SL : SG.Selects) {
684 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) ==
nullptr)
689 if (!TrueSlicesInterleaved.
empty() || HasSelectLike(ASI,
true)) {
693 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
694 for (
Instruction *TrueInst : TrueSlicesInterleaved)
695 TrueInst->moveBefore(TrueBranch->getIterator());
697 if (!FalseSlicesInterleaved.
empty() || HasSelectLike(ASI,
false)) {
702 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
703 for (
Instruction *FalseInst : FalseSlicesInterleaved)
704 FalseInst->moveBefore(FalseBranch->getIterator());
708 if (TrueBlock == FalseBlock) {
709 assert(TrueBlock ==
nullptr &&
710 "Unexpected basic block transform while optimizing select");
715 FalseBranch->setDebugLoc(
SI.getI()->getDebugLoc());
724 if (TrueBlock ==
nullptr) {
727 TrueBlock = StartBlock;
728 }
else if (FalseBlock ==
nullptr) {
731 FalseBlock = StartBlock;
738 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() +
".frozen");
745 InsertionPoint = EndBlock->
begin();
746 for (SelectLike &
SI : ASI.Selects) {
754 for (
auto &SG : ProfSIGroups) {
755 if (SG.Condition ==
SI.getI())
759 SI.getI()->replaceAllUsesWith(PN);
766 ++NumSelectsConverted;
770 CondBr->
copyMetadata(*SelectWithProfile, {llvm::LLVMContext::MD_prof});
771 if (SelectWithProfileIsInverted)
778 for (SelectLike &
SI : ASI.Selects)
779 SI.getI()->eraseFromParent();
783void SelectOptimizeImpl::collectSelectGroups(
BasicBlock &BB,
784 SelectGroups &SIGroups) {
794 struct SelectLikeInfo {
798 unsigned ConditionIdx;
809 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](
Instruction *
I) {
812 return SelectInfo.
end();
817 Cond->getType()->isIntegerTy(1)) {
819 return SelectInfo.
insert({
I, {
Cond,
true, Inverted, 0}}).first;
823 return SelectInfo.
insert({
I, {
Cond,
true,
true, 0}}).first;
829 return SelectInfo.
insert({
I, {
Cond,
false, Inverted, 0}}).first;
834 I->getType()->getIntegerBitWidth() == Shift->
getZExtValue() + 1) {
835 for (
auto *CmpI : SeenCmp) {
836 auto Pred = CmpI->getPredicate();
837 if (Val != CmpI->getOperand(0))
849 return SelectInfo.
insert({
I, {CmpI,
true, Inverted, 0}}).first;
852 return SelectInfo.
end();
860 auto MatchZExtOrSExtPattern =
862 auto MatchShiftPattern =
867 if ((
match(
I, MatchZExtOrSExtPattern) &&
X->getType()->isIntegerTy(1)) ||
868 (
match(
I, MatchShiftPattern) &&
869 X->getType()->getIntegerBitWidth() == Shift->
getZExtValue() + 1)) {
870 if (
I->getOpcode() != Instruction::Add &&
871 I->getOpcode() != Instruction::Sub &&
872 I->getOpcode() != Instruction::Or)
873 return SelectInfo.
end();
875 if (
I->getOpcode() == Instruction::Or &&
I->getType()->isIntegerTy(1))
876 return SelectInfo.
end();
882 unsigned Idx =
I->getOpcode() == Instruction::Sub ? 1 : 0;
883 for (; Idx < 2; Idx++) {
884 auto *
Op =
I->getOperand(Idx);
885 auto It = SelectInfo.
find(
Op);
886 if (It != SelectInfo.
end() && It->second.IsAuxiliary) {
887 Cond = It->second.Cond;
888 bool Inverted = It->second.IsInverted;
889 return SelectInfo.
insert({
I, {
Cond,
false, Inverted, Idx}}).first;
893 return SelectInfo.
end();
896 bool AlreadyProcessed =
false;
899 while (BBIt != BB.
end()) {
901 if (
I->isDebugOrPseudoInst())
904 if (!AlreadyProcessed)
905 It = ProcessSelectInfo(
I);
907 AlreadyProcessed =
false;
909 if (It == SelectInfo.
end() || It->second.IsAuxiliary)
912 if (!
TTI->shouldTreatInstructionLikeSelect(
I))
917 if (!
Cond->getType()->isIntegerTy(1))
920 SelectGroup SIGroup = {
Cond, {}};
921 SIGroup.Selects.emplace_back(
I, It->second.IsInverted,
922 It->second.ConditionIdx);
926 if (!isSelectKindSupported(SIGroup.Selects.front()))
929 while (BBIt != BB.
end()) {
938 It = ProcessSelectInfo(NI);
939 if (It == SelectInfo.
end()) {
940 AlreadyProcessed =
true;
945 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
946 if (
Cond != CurrCond) {
947 AlreadyProcessed =
true;
952 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
956 dbgs() <<
"New Select group (" << SIGroup.Selects.size() <<
") with\n";
957 for (
auto &
SI : SIGroup.Selects)
958 dbgs() <<
" " << *
SI.getI() <<
"\n";
961 SIGroups.push_back(SIGroup);
965void SelectOptimizeImpl::findProfitableSIGroupsBase(
966 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
967 for (SelectGroup &ASI : SIGroups) {
968 ++NumSelectOptAnalyzed;
969 if (isConvertToBranchProfitableBase(ASI))
970 ProfSIGroups.push_back(ASI);
980void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
981 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
982 NumSelectOptAnalyzed += SIGroups.size();
994 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
995 {Scaled64::getZero(), Scaled64::getZero()}};
996 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
997 !checkLoopHeuristics(L, LoopCost)) {
1001 for (SelectGroup &ASI : SIGroups) {
1004 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
1005 for (SelectLike &
SI : ASI.Selects) {
1006 const auto &ICM = InstCostMap[
SI.getI()];
1007 SelectCost = std::max(SelectCost, ICM.PredCost);
1008 BranchCost = std::max(BranchCost, ICM.NonPredCost);
1010 if (BranchCost < SelectCost) {
1012 ASI.Selects.front().getI());
1013 OR <<
"Profitable to convert to branch (loop analysis). BranchCost="
1014 << BranchCost.toString() <<
", SelectCost=" << SelectCost.toString()
1017 ++NumSelectConvertedLoop;
1018 ProfSIGroups.push_back(ASI);
1021 ASI.Selects.front().getI());
1022 ORmiss <<
"Select is more profitable (loop analysis). BranchCost="
1023 << BranchCost.toString()
1024 <<
", SelectCost=" << SelectCost.toString() <<
". ";
1030bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1031 const SelectGroup &ASI) {
1032 const SelectLike &
SI = ASI.Selects.front();
1041 ORmiss <<
"Not converted to branch because of cold basic block. ";
1047 if (
SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
1049 ORmiss <<
"Not converted to branch because of unpredictable branch. ";
1057 ++NumSelectConvertedHighPred;
1058 OR <<
"Converted to branch because of highly predictable branch. ";
1065 if (hasExpensiveColdOperand(ASI)) {
1066 ++NumSelectConvertedExpColdOperand;
1067 OR <<
"Converted to branch because of expensive cold operand.";
1075 auto *BB =
SI.getI()->getParent();
1077 if (L && !
L->isInnermost() &&
L->getLoopLatch() == BB &&
1078 ASI.Selects.size() >= 3) {
1079 OR <<
"Converted to branch because select group in the latch block is big.";
1084 ORmiss <<
"Not profitable to convert to branch (base heuristic).";
1091 return (Numerator + (Denominator / 2)) / Denominator;
1101bool SelectOptimizeImpl::hasExpensiveColdOperand(
const SelectGroup &ASI) {
1102 bool ColdOperand =
false;
1103 uint64_t TrueWeight, FalseWeight, TotalWeight;
1105 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
1106 TotalWeight = TrueWeight + FalseWeight;
1111 ASI.Selects.front().getI());
1112 ORmiss <<
"Profile data available but missing branch-weights metadata for "
1113 "select instruction. ";
1120 for (SelectLike
SI : ASI.Selects) {
1123 if (TrueWeight < FalseWeight) {
1125 HotWeight = FalseWeight;
1128 HotWeight = TrueWeight;
1131 std::stack<Instruction *> ColdSlice;
1132 getExclBackwardsSlice(ColdI, ColdSlice,
SI.getI());
1134 while (!ColdSlice.empty()) {
1135 SliceCost +=
TTI->getInstructionCost(ColdSlice.top(),
1161 while (&*It !=
SI) {
1162 if (It->mayWriteToMemory())
1175void SelectOptimizeImpl::getExclBackwardsSlice(
Instruction *
I,
1176 std::stack<Instruction *> &Slice,
1180 std::queue<Instruction *> Worklist;
1182 while (!Worklist.empty()) {
1190 if (!
II->hasOneUse())
1196 if (ForSinking && (
II->isTerminator() ||
II->mayHaveSideEffects() ||
1222bool SelectOptimizeImpl::isSelectHighlyPredictable(
const SelectLike
SI) {
1223 uint64_t TrueWeight, FalseWeight;
1225 uint64_t
Max = std::max(TrueWeight, FalseWeight);
1226 uint64_t Sum = TrueWeight + FalseWeight;
1229 if (Probability >
TTI->getPredictableBranchThreshold())
1236bool SelectOptimizeImpl::checkLoopHeuristics(
const Loop *L,
1237 const CostInfo LoopCost[2]) {
1245 &*
L->getHeader()->getFirstNonPHIIt());
1247 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1248 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1249 ORmissL <<
"No select conversion in the loop due to no reduction of loop's "
1255 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1256 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1263 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1264 ORmissL <<
"No select conversion in the loop due to small reduction of "
1265 "loop's critical path. Gain="
1266 << Gain[1].toString()
1267 <<
", RelativeGain=" << RelativeGain.toString() <<
"%. ";
1277 if (Gain[1] > Gain[0]) {
1278 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1279 (LoopCost[1].PredCost - LoopCost[0].PredCost);
1281 ORmissL <<
"No select conversion in the loop due to small gradient gain. "
1283 << GradientGain.toString() <<
"%. ";
1289 else if (Gain[1] < Gain[0]) {
1291 <<
"No select conversion in the loop due to negative gradient gain. ";
1305bool SelectOptimizeImpl::computeLoopCosts(
1306 const Loop *L,
const SelectGroups &SIGroups,
1308 LLVM_DEBUG(
dbgs() <<
"Calculating Latency / IPredCost / INonPredCost of loop "
1309 <<
L->getHeader()->getName() <<
"\n");
1310 const auto SImap = getSImap(SIGroups);
1311 const auto SGmap = getSGmap(SIGroups);
1314 const unsigned Iterations = 2;
1315 for (
unsigned Iter = 0; Iter < Iterations; ++Iter) {
1317 CostInfo &MaxCost = LoopCost[Iter];
1320 if (
I.isDebugOrPseudoInst())
1323 Scaled64 IPredCost = Scaled64::getZero(),
1324 INonPredCost = Scaled64::getZero();
1329 for (
const Use &U :
I.operands()) {
1333 if (
auto It = InstCostMap.
find(UI); It != InstCostMap.
end()) {
1334 IPredCost = std::max(IPredCost, It->second.PredCost);
1335 INonPredCost = std::max(INonPredCost, It->second.NonPredCost);
1338 auto ILatency = computeInstCost(&
I);
1341 ORmissL <<
"Invalid instruction cost preventing analysis and "
1342 "optimization of the inner-most loop containing this "
1347 IPredCost += Scaled64::get(*ILatency);
1348 INonPredCost += Scaled64::get(*ILatency);
1356 if (
auto It = SImap.find(&
I); It != SImap.end()) {
1357 auto SI = It->second;
1358 const auto *SG = SGmap.at(&
I);
1359 Scaled64 TrueOpCost =
SI.getOpCostOnBranch(
true, InstCostMap,
TTI);
1360 Scaled64 FalseOpCost =
SI.getOpCostOnBranch(
false, InstCostMap,
TTI);
1361 Scaled64 PredictedPathCost =
1362 getPredictedPathCost(TrueOpCost, FalseOpCost,
SI);
1364 Scaled64 CondCost = Scaled64::getZero();
1366 if (
auto It = InstCostMap.
find(CI); It != InstCostMap.
end())
1367 CondCost = It->second.NonPredCost;
1368 Scaled64 MispredictCost = getMispredictionCost(
SI, CondCost);
1370 INonPredCost = PredictedPathCost + MispredictCost;
1373 << INonPredCost <<
" for " <<
I <<
"\n");
1375 InstCostMap[&
I] = {IPredCost, INonPredCost};
1376 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1377 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1381 <<
" MaxCost = " << MaxCost.PredCost <<
" "
1382 << MaxCost.NonPredCost <<
"\n");
1388SelectOptimizeImpl::getSImap(
const SelectGroups &SIGroups) {
1390 for (
const SelectGroup &ASI : SIGroups)
1391 for (
const SelectLike &
SI : ASI.Selects)
1397SelectOptimizeImpl::getSGmap(
const SelectGroups &SIGroups) {
1399 for (
const SelectGroup &ASI : SIGroups)
1400 for (
const SelectLike &
SI : ASI.Selects)
1405std::optional<uint64_t>
1406SelectOptimizeImpl::computeInstCost(
const Instruction *
I) {
1410 return std::optional<uint64_t>(ICost.
getValue());
1411 return std::nullopt;
1415SelectOptimizeImpl::getMispredictionCost(
const SelectLike
SI,
1416 const Scaled64 CondCost) {
1424 if (isSelectHighlyPredictable(
SI))
1430 Scaled64 MispredictCost =
1431 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1432 Scaled64::get(MispredictRate);
1433 MispredictCost /= Scaled64::get(100);
1435 return MispredictCost;
1441SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1442 const SelectLike
SI) {
1443 Scaled64 PredPathCost;
1444 uint64_t TrueWeight, FalseWeight;
1446 uint64_t SumWeight = TrueWeight + FalseWeight;
1447 if (SumWeight != 0) {
1448 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1449 FalseCost * Scaled64::get(FalseWeight);
1450 PredPathCost /= Scaled64::get(SumWeight);
1451 return PredPathCost;
1456 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1457 FalseCost * Scaled64::get(3) + TrueCost);
1458 PredPathCost /= Scaled64::get(4);
1459 return PredPathCost;
1462bool SelectOptimizeImpl::isSelectKindSupported(
const SelectLike
SI) {
1464 if (
SI.getType()->isVectorTy())
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static bool runOnFunction(Function &F, bool PostInlining)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
print mir2vec MIR2Vec Vocabulary Printer Pass
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
static const uint32_t IV[8]
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ ICMP_SGT
signed greater than
@ ICMP_SGE
signed greater or equal
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
std::string getMsg() const
FunctionPass class - This class is used to implement most global optimizations.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void swapProfMetadata()
If the instruction has "branch_weights" MD_prof metadata and the MDNode has three operands (including...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Analysis pass that exposes the LoopInfo for a function.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Pass interface - Implemented by all 'passes'.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Simple representation of a scaled number.
static ScaledNumber get(uint64_t N)
static ScaledNumber getZero()
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
bool insert(const value_type &X)
Insert a new element into the SetVector.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
virtual bool isSelectSupported(SelectSupportKind) const
SelectSupportKind
Enum that describes what type of support for selects the target has.
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
const MCSchedModel * getMCSchedModel() const
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
bool isIntegerTy() const
True if this is an instance of IntegerType.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
cl::opt< bool > ProfcheckDisableMetadataFixes
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI bool hasProfMD(const Instruction &I)
Checks if an Instruction has MD_prof Metadata.
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
unsigned MispredictPenalty