LLVM 23.0.0git
SelectOptimize.cpp
Go to the documentation of this file.
1//===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts selects to conditional jumps when profitable.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/SetVector.h"
16#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/Passes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Dominators.h"
30#include "llvm/IR/Function.h"
31#include "llvm/IR/IRBuilder.h"
32#include "llvm/IR/Instruction.h"
36#include "llvm/Pass.h"
40#include <algorithm>
41#include <queue>
42#include <stack>
43
44using namespace llvm;
45using namespace llvm::PatternMatch;
46
47#define DEBUG_TYPE "select-optimize"
48
49STATISTIC(NumSelectOptAnalyzed,
50 "Number of select groups considered for conversion to branch");
51STATISTIC(NumSelectConvertedExpColdOperand,
52 "Number of select groups converted due to expensive cold operand");
53STATISTIC(NumSelectConvertedHighPred,
54 "Number of select groups converted due to high-predictability");
55STATISTIC(NumSelectUnPred,
56 "Number of select groups not converted due to unpredictability");
57STATISTIC(NumSelectColdBB,
58 "Number of select groups not converted due to cold basic block");
59STATISTIC(NumSelectConvertedLoop,
60 "Number of select groups converted due to loop-level analysis");
61STATISTIC(NumSelectsConverted, "Number of selects converted");
62
63namespace llvm {
65}
66
68 "cold-operand-threshold",
69 cl::desc("Maximum frequency of path for an operand to be considered cold."),
70 cl::init(20), cl::Hidden);
71
73 "cold-operand-max-cost-multiplier",
74 cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
75 "slice of a cold operand to be considered inexpensive."),
77
79 GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
80 cl::desc("Gradient gain threshold (%)."),
81 cl::init(25), cl::Hidden);
82
84 GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
85 cl::desc("Minimum gain per loop (in cycles) threshold."),
87
89 "select-opti-loop-relative-gain-threshold",
91 "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
93
95 "mispredict-default-rate", cl::Hidden, cl::init(25),
96 cl::desc("Default mispredict rate (initialized to 25%)."));
97
98static cl::opt<bool>
99 DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
100 cl::init(false),
101 cl::desc("Disable loop-level heuristics."));
102
103namespace {
104
105class SelectOptimizeImpl {
106 const TargetMachine *TM = nullptr;
107 const TargetSubtargetInfo *TSI = nullptr;
108 const TargetLowering *TLI = nullptr;
109 const TargetTransformInfo *TTI = nullptr;
110 const LoopInfo *LI = nullptr;
112 ProfileSummaryInfo *PSI = nullptr;
113 OptimizationRemarkEmitter *ORE = nullptr;
114 TargetSchedModel TSchedModel;
115
116public:
117 SelectOptimizeImpl() = default;
118 SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
119 PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
120 bool runOnFunction(Function &F, Pass &P);
121
122 using Scaled64 = ScaledNumber<uint64_t>;
123
124 struct CostInfo {
125 /// Predicated cost (with selects as conditional moves).
126 Scaled64 PredCost;
127 /// Non-predicated cost (with selects converted to branches).
128 Scaled64 NonPredCost;
129 };
130
131 /// SelectLike is an abstraction over SelectInst and other operations that can
132 /// act like selects. For example Or(Zext(icmp), X) can be treated like
133 /// select(icmp, X|1, X).
134 class SelectLike {
135 /// The select (/or) instruction.
136 Instruction *I;
137 /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as
138 /// opposed to the original condition.
139 bool Inverted = false;
140
141 /// The index of the operand that depends on condition. Only for select-like
142 /// instruction such as Or/Add.
143 unsigned CondIdx;
144
145 public:
146 SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)
147 : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
148
149 Instruction *getI() { return I; }
150 const Instruction *getI() const { return I; }
151
152 Type *getType() const { return I->getType(); }
153
154 unsigned getConditionOpIndex() { return CondIdx; };
155
156 /// Return the true value for the SelectLike instruction. Note this may not
157 /// exist for all SelectLike instructions. For example, for `or(zext(c), x)`
158 /// the true value would be `or(x,1)`. As this value does not exist, nullptr
159 /// is returned.
160 Value *getTrueValue(bool HonorInverts = true) const {
161 if (Inverted && HonorInverts)
162 return getFalseValue(/*HonorInverts=*/false);
163 if (auto *Sel = dyn_cast<SelectInst>(I))
164 return Sel->getTrueValue();
165 // Or(zext) case - The true value is Or(X), so return nullptr as the value
166 // does not yet exist.
167 if (isa<BinaryOperator>(I))
168 return nullptr;
169
170 llvm_unreachable("Unhandled case in getTrueValue");
171 }
172
173 /// Return the false value for the SelectLike instruction. For example the
174 /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is
175 /// `select(c, x|1, x)`)
176 Value *getFalseValue(bool HonorInverts = true) const {
177 if (Inverted && HonorInverts)
178 return getTrueValue(/*HonorInverts=*/false);
179 if (auto *Sel = dyn_cast<SelectInst>(I))
180 return Sel->getFalseValue();
181 // We are on the branch where the condition is zero, which means BinOp
182 // does not perform any computation, and we can simply return the operand
183 // that is not related to the condition
184 if (auto *BO = dyn_cast<BinaryOperator>(I))
185 return BO->getOperand(1 - CondIdx);
186
187 llvm_unreachable("Unhandled case in getFalseValue");
188 }
189
190 /// Return the NonPredCost cost of the op on \p isTrue branch, given the
191 /// costs in \p InstCostMap. This may need to be generated for select-like
192 /// instructions.
193 Scaled64 getOpCostOnBranch(
194 bool IsTrue, const DenseMap<const Instruction *, CostInfo> &InstCostMap,
195 const TargetTransformInfo *TTI) {
196 auto *V = IsTrue ? getTrueValue() : getFalseValue();
197 if (V) {
198 if (auto *IV = dyn_cast<Instruction>(V)) {
199 auto It = InstCostMap.find(IV);
200 return It != InstCostMap.end() ? It->second.NonPredCost
202 }
203 return Scaled64::getZero();
204 }
205 // If getTrue(False)Value() return nullptr, it means we are dealing with
206 // select-like instructions on the branch where the actual computation is
207 // happening. In that case the cost is equal to the cost of computation +
208 // cost of non-dependant on condition operand
211 {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
212 {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
213 auto TotalCost = Scaled64::get(Cost.getValue());
214 if (auto *OpI = dyn_cast<Instruction>(I->getOperand(1 - CondIdx))) {
215 auto It = InstCostMap.find(OpI);
216 if (It != InstCostMap.end())
217 TotalCost += It->second.NonPredCost;
218 }
219 return TotalCost;
220 }
221 };
222
223private:
224 // Select groups consist of consecutive select-like instructions with the same
225 // condition. Between select-likes could be any number of auxiliary
226 // instructions related to the condition like not, zext, ashr/lshr
227 struct SelectGroup {
228 Value *Condition;
230 };
231 using SelectGroups = SmallVector<SelectGroup, 2>;
232
233 // Converts select instructions of a function to conditional jumps when deemed
234 // profitable. Returns true if at least one select was converted.
235 bool optimizeSelects(Function &F);
236
237 // Heuristics for determining which select instructions can be profitably
238 // conveted to branches. Separate heuristics for selects in inner-most loops
239 // and the rest of code regions (base heuristics for non-inner-most loop
240 // regions).
241 void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
242 void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
243
244 // Converts to branches the select groups that were deemed
245 // profitable-to-convert.
246 void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
247
248 // Splits selects of a given basic block into select groups.
249 void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
250
251 // Determines for which select groups it is profitable converting to branches
252 // (base and inner-most-loop heuristics).
253 void findProfitableSIGroupsBase(SelectGroups &SIGroups,
254 SelectGroups &ProfSIGroups);
255 void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
256 SelectGroups &ProfSIGroups);
257
258 // Determines if a select group should be converted to a branch (base
259 // heuristics).
260 bool isConvertToBranchProfitableBase(const SelectGroup &ASI);
261
262 // Returns true if there are expensive instructions in the cold value
263 // operand's (if any) dependence slice of any of the selects of the given
264 // group.
265 bool hasExpensiveColdOperand(const SelectGroup &ASI);
266
267 // For a given source instruction, collect its backwards dependence slice
268 // consisting of instructions exclusively computed for producing the operands
269 // of the source instruction.
270 void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
271 Instruction *SI, bool ForSinking = false);
272
273 // Returns true if the condition of the select is highly predictable.
274 bool isSelectHighlyPredictable(const SelectLike SI);
275
276 // Loop-level checks to determine if a non-predicated version (with branches)
277 // of the given loop is more profitable than its predicated version.
278 bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
279
280 // Computes instruction and loop-critical-path costs for both the predicated
281 // and non-predicated version of the given loop.
282 bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
283 DenseMap<const Instruction *, CostInfo> &InstCostMap,
284 CostInfo *LoopCost);
285
286 // Returns a set of all the select instructions in the given select groups.
287 SmallDenseMap<const Instruction *, SelectLike, 2>
288 getSImap(const SelectGroups &SIGroups);
289
290 // Returns a map from select-like instructions to the corresponding select
291 // group.
292 SmallDenseMap<const Instruction *, const SelectGroup *, 2>
293 getSGmap(const SelectGroups &SIGroups);
294
295 // Returns the latency cost of a given instruction.
296 std::optional<uint64_t> computeInstCost(const Instruction *I);
297
298 // Returns the misprediction cost of a given select when converted to branch.
299 Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost);
300
301 // Returns the cost of a branch when the prediction is correct.
302 Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
303 const SelectLike SI);
304
305 // Returns true if the target architecture supports lowering a given select.
306 bool isSelectKindSupported(const SelectLike SI);
307};
308
309class SelectOptimize : public FunctionPass {
310 SelectOptimizeImpl Impl;
311
312public:
313 static char ID;
314
315 SelectOptimize() : FunctionPass(ID) {}
316
317 bool runOnFunction(Function &F) override {
318 if (skipFunction(F))
319 return false;
320
321 return Impl.runOnFunction(F, *this);
322 }
323
324 void getAnalysisUsage(AnalysisUsage &AU) const override {
325 AU.addRequired<ProfileSummaryInfoWrapperPass>();
326 AU.addRequired<TargetPassConfig>();
327 AU.addRequired<TargetTransformInfoWrapperPass>();
328 AU.addRequired<LoopInfoWrapperPass>();
329 AU.addRequired<BlockFrequencyInfoWrapperPass>();
330 AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
331 }
332};
333
334} // namespace
335
338 SelectOptimizeImpl Impl(TM);
339 return Impl.run(F, FAM);
340}
341
342char SelectOptimize::ID = 0;
343
344INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
345 false)
352INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
353 false)
354
355FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
356
357PreservedAnalyses SelectOptimizeImpl::run(Function &F,
359 TSI = TM->getSubtargetImpl(F);
360 TLI = TSI->getTargetLowering();
361
362 // If none of the select types are supported then skip this pass.
363 // This is an optimization pass. Legality issues will be handled by
364 // instruction selection.
368 return PreservedAnalyses::all();
369
370 TTI = &FAM.getResult<TargetIRAnalysis>(F);
371 if (!TTI->enableSelectOptimize())
372 return PreservedAnalyses::all();
373
375 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
376 if (!PSI)
377 reportFatalUsageError("this pass requires the profile-summary module "
378 "analysis to be available");
379 BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
380
381 // When optimizing for size, selects are preferable over branches.
382 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
383 return PreservedAnalyses::all();
384
385 LI = &FAM.getResult<LoopAnalysis>(F);
386 ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
387 TSchedModel.init(TSI);
388
389 bool Changed = optimizeSelects(F);
391}
392
393bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
394 TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
395 TSI = TM->getSubtargetImpl(F);
396 TLI = TSI->getTargetLowering();
397
398 // If none of the select types are supported then skip this pass.
399 // This is an optimization pass. Legality issues will be handled by
400 // instruction selection.
404 return false;
405
406 TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
407
408 if (!TTI->enableSelectOptimize())
409 return false;
410
411 LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
412 BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
413 PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
414 ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
415 TSchedModel.init(TSI);
416
417 // When optimizing for size, selects are preferable over branches.
418 if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
419 return false;
420
421 return optimizeSelects(F);
422}
423
424bool SelectOptimizeImpl::optimizeSelects(Function &F) {
425 // Determine for which select groups it is profitable converting to branches.
426 SelectGroups ProfSIGroups;
427 // Base heuristics apply only to non-loops and outer loops.
428 optimizeSelectsBase(F, ProfSIGroups);
429 // Separate heuristics for inner-most loops.
430 optimizeSelectsInnerLoops(F, ProfSIGroups);
431
432 // Convert to branches the select groups that were deemed
433 // profitable-to-convert.
434 convertProfitableSIGroups(ProfSIGroups);
435
436 // Code modified if at least one select group was converted.
437 return !ProfSIGroups.empty();
438}
439
440void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
441 SelectGroups &ProfSIGroups) {
442 // Collect all the select groups.
443 SelectGroups SIGroups;
444 for (BasicBlock &BB : F) {
445 // Base heuristics apply only to non-loops and outer loops.
446 Loop *L = LI->getLoopFor(&BB);
447 if (L && L->isInnermost())
448 continue;
449 collectSelectGroups(BB, SIGroups);
450 }
451
452 // Determine for which select groups it is profitable converting to branches.
453 findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
454}
455
456void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
457 SelectGroups &ProfSIGroups) {
459 // Need to check size on each iteration as we accumulate child loops.
460 for (unsigned long i = 0; i < Loops.size(); ++i)
461 llvm::append_range(Loops, Loops[i]->getSubLoops());
462
463 for (Loop *L : Loops) {
464 if (!L->isInnermost())
465 continue;
466
467 SelectGroups SIGroups;
468 for (BasicBlock *BB : L->getBlocks())
469 collectSelectGroups(*BB, SIGroups);
470
471 findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
472 }
473}
474
475/// Returns optimised value on \p IsTrue branch. For SelectInst that would be
476/// either True or False value. For (BinaryOperator) instructions, where the
477/// condition may be skipped, the operation will use a non-conditional operand.
478/// For example, for `or(V,zext(cond))` this function would return V.
479/// However, if the conditional operand on \p IsTrue branch matters, we create a
480/// clone of instruction at the end of that branch \p B and replace the
481/// condition operand with a constant.
482///
483/// Also /p OptSelects contains previously optimised select-like instructions.
484/// If the current value uses one of the optimised values, we can optimise it
485/// further by replacing it with the corresponding value on the given branch
487 SelectOptimizeImpl::SelectLike &SI, bool isTrue,
488 SmallDenseMap<Instruction *, std::pair<Value *, Value *>, 2> &OptSelects,
489 BasicBlock *B) {
490 Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();
491 if (V) {
492 if (auto *IV = dyn_cast<Instruction>(V))
493 if (auto It = OptSelects.find(IV); It != OptSelects.end())
494 return isTrue ? It->second.first : It->second.second;
495 return V;
496 }
497
498 auto *BO = cast<BinaryOperator>(SI.getI());
499 assert((BO->getOpcode() == Instruction::Add ||
500 BO->getOpcode() == Instruction::Or ||
501 BO->getOpcode() == Instruction::Sub) &&
502 "Only currently handling Add, Or and Sub binary operators.");
503
504 auto *CBO = BO->clone();
505 auto CondIdx = SI.getConditionOpIndex();
506 auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx));
507 if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) {
508 CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
509 } else {
510 assert((isa<AShrOperator>(AuxI) || isa<SExtInst>(AuxI)) &&
511 "Unexpected opcode");
512 CBO->setOperand(CondIdx, ConstantInt::getAllOnesValue(CBO->getType()));
513 }
514
515 unsigned OtherIdx = 1 - CondIdx;
516 if (auto *IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) {
517 if (auto It = OptSelects.find(IV); It != OptSelects.end())
518 CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second);
519 }
520 CBO->insertBefore(B->getTerminator()->getIterator());
521 return CBO;
522}
523
524void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
525 for (SelectGroup &ASI : ProfSIGroups) {
526 // The code transformation here is a modified version of the sinking
527 // transformation in CodeGenPrepare::optimizeSelectInst with a more
528 // aggressive strategy of which instructions to sink.
529 //
530 // TODO: eliminate the redundancy of logic transforming selects to branches
531 // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
532 // selects for all cases (with and without profile information).
533
534 // Transform a sequence like this:
535 // start:
536 // %cmp = cmp uge i32 %a, %b
537 // %sel = select i1 %cmp, i32 %c, i32 %d
538 //
539 // Into:
540 // start:
541 // %cmp = cmp uge i32 %a, %b
542 // %cmp.frozen = freeze %cmp
543 // br i1 %cmp.frozen, label %select.true, label %select.false
544 // select.true:
545 // br label %select.end
546 // select.false:
547 // br label %select.end
548 // select.end:
549 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
550 //
551 // %cmp should be frozen, otherwise it may introduce undefined behavior.
552 // In addition, we may sink instructions that produce %c or %d into the
553 // destination(s) of the new branch.
554 // If the true or false blocks do not contain a sunken instruction, that
555 // block and its branch may be optimized away. In that case, one side of the
556 // first branch will point directly to select.end, and the corresponding PHI
557 // predecessor block will be the start block.
558
559 // Find all the instructions that can be soundly sunk to the true/false
560 // blocks. These are instructions that are computed solely for producing the
561 // operands of the select instructions in the group and can be sunk without
562 // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
563 // with side effects).
564 SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
565 typedef std::stack<Instruction *>::size_type StackSizeType;
566 StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
567 Instruction *SelectWithProfile = nullptr;
568 bool SelectWithProfileIsInverted = false;
569 for (SelectLike &SI : ASI.Selects) {
570 if (!isa<SelectInst>(SI.getI()))
571 continue;
572 // For each select, compute the sinkable dependence chains of the true and
573 // false operands.
574 if (auto *TI = dyn_cast_or_null<Instruction>(SI.getTrueValue())) {
575 std::stack<Instruction *> TrueSlice;
576 getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true);
577 maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
578 TrueSlices.push_back(TrueSlice);
579 }
580 if (auto *FI = dyn_cast_or_null<Instruction>(SI.getFalseValue())) {
581 if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) {
582 std::stack<Instruction *> FalseSlice;
583 getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true);
584 maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
585 FalseSlices.push_back(FalseSlice);
586 }
587 }
588 // Also see if the select has profile data that we can propagate later
589 // to the conditional branch.
590 Value *SelectCondition = cast<SelectInst>(SI.getI())->getCondition();
591 if (hasProfMD(*SI.getI()) && ASI.Condition == SelectCondition) {
592 SelectWithProfile = SI.getI();
593 } else if (hasProfMD(*SI.getI()) &&
594 match(SelectCondition, m_Not(m_Value(ASI.Condition)))) {
595 SelectWithProfile = SI.getI();
596 SelectWithProfileIsInverted = true;
597 }
598 }
599 // In the case of multiple select instructions in the same group, the order
600 // of non-dependent instructions (instructions of different dependence
601 // slices) in the true/false blocks appears to affect performance.
602 // Interleaving the slices seems to experimentally be the optimal approach.
603 // This interleaving scheduling allows for more ILP (with a natural downside
604 // of increasing a bit register pressure) compared to a simple ordering of
605 // one whole chain after another. One would expect that this ordering would
606 // not matter since the scheduling in the backend of the compiler would
607 // take care of it, but apparently the scheduler fails to deliver optimal
608 // ILP with a naive ordering here.
609 SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
610 for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
611 for (auto &S : TrueSlices) {
612 if (!S.empty()) {
613 TrueSlicesInterleaved.push_back(S.top());
614 S.pop();
615 }
616 }
617 }
618 for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
619 for (auto &S : FalseSlices) {
620 if (!S.empty()) {
621 FalseSlicesInterleaved.push_back(S.top());
622 S.pop();
623 }
624 }
625 }
626
627 // We split the block containing the select(s) into two blocks.
628 SelectLike &SI = ASI.Selects.front();
629 SelectLike &LastSI = ASI.Selects.back();
630 BasicBlock *StartBlock = SI.getI()->getParent();
631 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI()));
632 // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With
633 // RemoveDIs turned on, SplitPt would instead point to the next
634 // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs,
635 // tell splitBasicBlock that we want to include any DbgVariableRecords
636 // attached to SplitPt in the splice.
637 SplitPt.setHeadBit(true);
638 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
639 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
640 // Delete the unconditional branch that was just created by the split.
641 StartBlock->getTerminator()->eraseFromParent();
642
643 // Move any debug/pseudo and auxiliary instructions that were in-between the
644 // select group to the newly-created end block.
646 auto DIt = SI.getI()->getIterator();
647 auto NIt = ASI.Selects.begin();
648 while (&*DIt != LastSI.getI()) {
649 if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
650 ++NIt;
651 else
652 SinkInstrs.push_back(&*DIt);
653 DIt++;
654 }
655 auto InsertionPoint = EndBlock->getFirstInsertionPt();
656 for (auto *DI : SinkInstrs)
657 DI->moveBeforePreserving(InsertionPoint);
658
659 // Duplicate implementation for DbgRecords, the non-instruction debug-info
660 // format. Helper lambda for moving DbgRecords to the end block.
661 auto TransferDbgRecords = [&](Instruction &I) {
662 for (auto &DbgRecord :
663 llvm::make_early_inc_range(I.getDbgRecordRange())) {
666 EndBlock->getFirstInsertionPt());
667 }
668 };
669
670 // Iterate over all instructions in between SI and LastSI, not including
671 // SI itself. These are all the variable assignments that happen "in the
672 // middle" of the select group.
673 auto R = make_range(std::next(SI.getI()->getIterator()),
674 std::next(LastSI.getI()->getIterator()));
675 llvm::for_each(R, TransferDbgRecords);
676
677 // These are the new basic blocks for the conditional branch.
678 // At least one will become an actual new basic block.
679 BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
680 UncondBrInst *TrueBranch = nullptr, *FalseBranch = nullptr;
681 // Checks if select-like instruction would materialise on the given branch
682 auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {
683 for (auto &SL : SG.Selects) {
684 if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr)
685 return true;
686 }
687 return false;
688 };
689 if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {
690 TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink",
691 EndBlock->getParent(), EndBlock);
692 TrueBranch = UncondBrInst::Create(EndBlock, TrueBlock);
693 TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
694 for (Instruction *TrueInst : TrueSlicesInterleaved)
695 TrueInst->moveBefore(TrueBranch->getIterator());
696 }
697 if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) {
698 FalseBlock =
699 BasicBlock::Create(EndBlock->getContext(), "select.false.sink",
700 EndBlock->getParent(), EndBlock);
701 FalseBranch = UncondBrInst::Create(EndBlock, FalseBlock);
702 FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
703 for (Instruction *FalseInst : FalseSlicesInterleaved)
704 FalseInst->moveBefore(FalseBranch->getIterator());
705 }
706 // If there was nothing to sink, then arbitrarily choose the 'false' side
707 // for a new input value to the PHI.
708 if (TrueBlock == FalseBlock) {
709 assert(TrueBlock == nullptr &&
710 "Unexpected basic block transform while optimizing select");
711
712 FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false",
713 EndBlock->getParent(), EndBlock);
714 auto *FalseBranch = UncondBrInst::Create(EndBlock, FalseBlock);
715 FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());
716 }
717
718 // Insert the real conditional branch based on the original condition.
719 // If we did not create a new block for one of the 'true' or 'false' paths
720 // of the condition, it means that side of the branch goes to the end block
721 // directly and the path originates from the start block from the point of
722 // view of the new PHI.
723 BasicBlock *TT, *FT;
724 if (TrueBlock == nullptr) {
725 TT = EndBlock;
726 FT = FalseBlock;
727 TrueBlock = StartBlock;
728 } else if (FalseBlock == nullptr) {
729 TT = TrueBlock;
730 FT = EndBlock;
731 FalseBlock = StartBlock;
732 } else {
733 TT = TrueBlock;
734 FT = FalseBlock;
735 }
736 IRBuilder<> IB(SI.getI());
737 auto *CondFr =
738 IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen");
739
741
742 // Use reverse iterator because later select may use the value of the
743 // earlier select, and we need to propagate value through earlier select
744 // to get the PHI operand.
745 InsertionPoint = EndBlock->begin();
746 for (SelectLike &SI : ASI.Selects) {
747 // The select itself is replaced with a PHI Node.
748 PHINode *PN = PHINode::Create(SI.getType(), 2, "");
749 PN->insertBefore(InsertionPoint);
750 PN->takeName(SI.getI());
751 // Current instruction might be a condition of some other group, so we
752 // need to replace it there to avoid dangling pointer
753 if (PN->getType()->isIntegerTy(1)) {
754 for (auto &SG : ProfSIGroups) {
755 if (SG.Condition == SI.getI())
756 SG.Condition = PN;
757 }
758 }
759 SI.getI()->replaceAllUsesWith(PN);
760 auto *TV = getTrueOrFalseValue(SI, true, INS, TrueBlock);
761 auto *FV = getTrueOrFalseValue(SI, false, INS, FalseBlock);
762 INS[PN] = {TV, FV};
763 PN->addIncoming(TV, TrueBlock);
764 PN->addIncoming(FV, FalseBlock);
765 PN->setDebugLoc(SI.getI()->getDebugLoc());
766 ++NumSelectsConverted;
767 }
768 Instruction *CondBr = IB.CreateCondBr(CondFr, TT, FT, SI.getI());
769 if (!ProfcheckDisableMetadataFixes && SelectWithProfile) {
770 CondBr->copyMetadata(*SelectWithProfile, {llvm::LLVMContext::MD_prof});
771 if (SelectWithProfileIsInverted)
772 CondBr->swapProfMetadata();
773 } else {
775 }
776
777 // Remove the old select instructions, now that they are not longer used.
778 for (SelectLike &SI : ASI.Selects)
779 SI.getI()->eraseFromParent();
780 }
781}
782
783void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
784 SelectGroups &SIGroups) {
785 // Represents something that can be considered as select instruction.
786 // Auxiliary instruction are instructions that depends on a condition and have
787 // zero or some constant value on True/False branch, such as:
788 // * ZExt(1bit)
789 // * SExt(1bit)
790 // * Not(1bit)
791 // * A(L)Shr(Val), ValBitSize - 1, where there is a condition like `Val <= 0`
792 // earlier in the BB. For conditions that check the sign of the Val compiler
793 // may generate shifts instead of ZExt/SExt.
794 struct SelectLikeInfo {
795 Value *Cond;
796 bool IsAuxiliary;
797 bool IsInverted;
798 unsigned ConditionIdx;
799 };
800
802 // Keeps visited comparisons to help identify AShr/LShr variants of auxiliary
803 // instructions.
805
806 // Check if the instruction is SelectLike or might be part of SelectLike
807 // expression, put information into SelectInfo and return the iterator to the
808 // inserted position.
809 auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {
810 if (auto *Cmp = dyn_cast<CmpInst>(I)) {
811 SeenCmp.insert(Cmp);
812 return SelectInfo.end();
813 }
814
815 Value *Cond;
817 Cond->getType()->isIntegerTy(1)) {
818 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
819 return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first;
820 }
821
822 if (match(I, m_Not(m_Value(Cond)))) {
823 return SelectInfo.insert({I, {Cond, true, true, 0}}).first;
824 }
825
826 // Select instruction are what we are usually looking for.
827 if (match(I, m_Select(m_Value(Cond), m_Value(), m_Value()))) {
828 bool Inverted = match(Cond, m_Not(m_Value(Cond)));
829 return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;
830 }
831 Value *Val;
832 ConstantInt *Shift;
833 if (match(I, m_Shr(m_Value(Val), m_ConstantInt(Shift))) &&
834 I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {
835 for (auto *CmpI : SeenCmp) {
836 auto Pred = CmpI->getPredicate();
837 if (Val != CmpI->getOperand(0))
838 continue;
839 if ((Pred == CmpInst::ICMP_SGT &&
840 match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||
841 (Pred == CmpInst::ICMP_SGE &&
842 match(CmpI->getOperand(1), m_Zero())) ||
843 (Pred == CmpInst::ICMP_SLT &&
844 match(CmpI->getOperand(1), m_Zero())) ||
845 (Pred == CmpInst::ICMP_SLE &&
846 match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {
847 bool Inverted =
848 Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
849 return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;
850 }
851 }
852 return SelectInfo.end();
853 }
854
855 // An BinOp(Aux(X), Y) can also be treated like a select, with condition X
856 // and values Y|1 and Y.
857 // `Aux` can be either `ZExt(1bit)`, `SExt(1bit)` or `XShr(Val), ValBitSize
858 // - 1` `BinOp` can be Add, Sub, Or
859 Value *X;
860 auto MatchZExtOrSExtPattern =
862 auto MatchShiftPattern =
864
865 // This check is unnecessary, but it prevents costly access to the
866 // SelectInfo map.
867 if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) ||
868 (match(I, MatchShiftPattern) &&
869 X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {
870 if (I->getOpcode() != Instruction::Add &&
871 I->getOpcode() != Instruction::Sub &&
872 I->getOpcode() != Instruction::Or)
873 return SelectInfo.end();
874
875 if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))
876 return SelectInfo.end();
877
878 // Iterate through operands and find dependant on recognised sign
879 // extending auxiliary select-like instructions. The operand index does
880 // not matter for Add and Or. However, for Sub, we can only safely
881 // transform when the operand is second.
882 unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;
883 for (; Idx < 2; Idx++) {
884 auto *Op = I->getOperand(Idx);
885 auto It = SelectInfo.find(Op);
886 if (It != SelectInfo.end() && It->second.IsAuxiliary) {
887 Cond = It->second.Cond;
888 bool Inverted = It->second.IsInverted;
889 return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first;
890 }
891 }
892 }
893 return SelectInfo.end();
894 };
895
896 bool AlreadyProcessed = false;
897 BasicBlock::iterator BBIt = BB.begin();
899 while (BBIt != BB.end()) {
900 Instruction *I = &*BBIt++;
901 if (I->isDebugOrPseudoInst())
902 continue;
903
904 if (!AlreadyProcessed)
905 It = ProcessSelectInfo(I);
906 else
907 AlreadyProcessed = false;
908
909 if (It == SelectInfo.end() || It->second.IsAuxiliary)
910 continue;
911
912 if (!TTI->shouldTreatInstructionLikeSelect(I))
913 continue;
914
915 Value *Cond = It->second.Cond;
916 // Vector conditions are not supported.
917 if (!Cond->getType()->isIntegerTy(1))
918 continue;
919
920 SelectGroup SIGroup = {Cond, {}};
921 SIGroup.Selects.emplace_back(I, It->second.IsInverted,
922 It->second.ConditionIdx);
923
924 // If the select type is not supported, no point optimizing it.
925 // Instruction selection will take care of it.
926 if (!isSelectKindSupported(SIGroup.Selects.front()))
927 continue;
928
929 while (BBIt != BB.end()) {
930 Instruction *NI = &*BBIt;
931 // Debug/pseudo instructions should be skipped and not prevent the
932 // formation of a select group.
933 if (NI->isDebugOrPseudoInst()) {
934 ++BBIt;
935 continue;
936 }
937
938 It = ProcessSelectInfo(NI);
939 if (It == SelectInfo.end()) {
940 AlreadyProcessed = true;
941 break;
942 }
943
944 // Auxiliary with same condition
945 auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
946 if (Cond != CurrCond) {
947 AlreadyProcessed = true;
948 break;
949 }
950
951 if (!IsAux)
952 SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
953 ++BBIt;
954 }
955 LLVM_DEBUG({
956 dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n";
957 for (auto &SI : SIGroup.Selects)
958 dbgs() << " " << *SI.getI() << "\n";
959 });
960
961 SIGroups.push_back(SIGroup);
962 }
963}
964
965void SelectOptimizeImpl::findProfitableSIGroupsBase(
966 SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
967 for (SelectGroup &ASI : SIGroups) {
968 ++NumSelectOptAnalyzed;
969 if (isConvertToBranchProfitableBase(ASI))
970 ProfSIGroups.push_back(ASI);
971 }
972}
973
976 LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
977 ORE->emit(Rem);
978}
979
980void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
981 const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
982 NumSelectOptAnalyzed += SIGroups.size();
983 // For each select group in an inner-most loop,
984 // a branch is more preferable than a select/conditional-move if:
985 // i) conversion to branches for all the select groups of the loop satisfies
986 // loop-level heuristics including reducing the loop's critical path by
987 // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
988 // ii) the total cost of the select group is cheaper with a branch compared
989 // to its predicated version. The cost is in terms of latency and the cost
990 // of a select group is the cost of its most expensive select instruction
991 // (assuming infinite resources and thus fully leveraging available ILP).
992
994 CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
995 {Scaled64::getZero(), Scaled64::getZero()}};
996 if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
997 !checkLoopHeuristics(L, LoopCost)) {
998 return;
999 }
1000
1001 for (SelectGroup &ASI : SIGroups) {
1002 // Assuming infinite resources, the cost of a group of instructions is the
1003 // cost of the most expensive instruction of the group.
1004 Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
1005 for (SelectLike &SI : ASI.Selects) {
1006 const auto &ICM = InstCostMap[SI.getI()];
1007 SelectCost = std::max(SelectCost, ICM.PredCost);
1008 BranchCost = std::max(BranchCost, ICM.NonPredCost);
1009 }
1010 if (BranchCost < SelectCost) {
1011 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti",
1012 ASI.Selects.front().getI());
1013 OR << "Profitable to convert to branch (loop analysis). BranchCost="
1014 << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
1015 << ". ";
1016 EmitAndPrintRemark(ORE, OR);
1017 ++NumSelectConvertedLoop;
1018 ProfSIGroups.push_back(ASI);
1019 } else {
1020 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
1021 ASI.Selects.front().getI());
1022 ORmiss << "Select is more profitable (loop analysis). BranchCost="
1023 << BranchCost.toString()
1024 << ", SelectCost=" << SelectCost.toString() << ". ";
1025 EmitAndPrintRemark(ORE, ORmiss);
1026 }
1027 }
1028}
1029
1030bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1031 const SelectGroup &ASI) {
1032 const SelectLike &SI = ASI.Selects.front();
1033 LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()
1034 << "\n");
1035 OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI());
1036 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI());
1037
1038 // Skip cold basic blocks. Better to optimize for size for cold blocks.
1039 if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) {
1040 ++NumSelectColdBB;
1041 ORmiss << "Not converted to branch because of cold basic block. ";
1042 EmitAndPrintRemark(ORE, ORmiss);
1043 return false;
1044 }
1045
1046 // If unpredictable, branch form is less profitable.
1047 if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
1048 ++NumSelectUnPred;
1049 ORmiss << "Not converted to branch because of unpredictable branch. ";
1050 EmitAndPrintRemark(ORE, ORmiss);
1051 return false;
1052 }
1053
1054 // If highly predictable, branch form is more profitable, unless a
1055 // predictable select is inexpensive in the target architecture.
1056 if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
1057 ++NumSelectConvertedHighPred;
1058 OR << "Converted to branch because of highly predictable branch. ";
1059 EmitAndPrintRemark(ORE, OR);
1060 return true;
1061 }
1062
1063 // Look for expensive instructions in the cold operand's (if any) dependence
1064 // slice of any of the selects in the group.
1065 if (hasExpensiveColdOperand(ASI)) {
1066 ++NumSelectConvertedExpColdOperand;
1067 OR << "Converted to branch because of expensive cold operand.";
1068 EmitAndPrintRemark(ORE, OR);
1069 return true;
1070 }
1071
1072 // If latch has a select group with several elements, it is usually profitable
1073 // to convert it to branches. We let `optimizeSelectsInnerLoops` decide if
1074 // conversion is profitable for innermost loops.
1075 auto *BB = SI.getI()->getParent();
1076 auto *L = LI->getLoopFor(BB);
1077 if (L && !L->isInnermost() && L->getLoopLatch() == BB &&
1078 ASI.Selects.size() >= 3) {
1079 OR << "Converted to branch because select group in the latch block is big.";
1080 EmitAndPrintRemark(ORE, OR);
1081 return true;
1082 }
1083
1084 ORmiss << "Not profitable to convert to branch (base heuristic).";
1085 EmitAndPrintRemark(ORE, ORmiss);
1086 return false;
1087}
1088
1090 uint64_t Denominator) {
1091 return (Numerator + (Denominator / 2)) / Denominator;
1092}
1093
1094static bool extractBranchWeights(const SelectOptimizeImpl::SelectLike SI,
1095 uint64_t &TrueVal, uint64_t &FalseVal) {
1096 if (isa<SelectInst>(SI.getI()))
1097 return extractBranchWeights(*SI.getI(), TrueVal, FalseVal);
1098 return false;
1099}
1100
1101bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) {
1102 bool ColdOperand = false;
1103 uint64_t TrueWeight, FalseWeight, TotalWeight;
1104 if (extractBranchWeights(ASI.Selects.front(), TrueWeight, FalseWeight)) {
1105 uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
1106 TotalWeight = TrueWeight + FalseWeight;
1107 // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
1108 ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
1109 } else if (PSI->hasProfileSummary()) {
1110 OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
1111 ASI.Selects.front().getI());
1112 ORmiss << "Profile data available but missing branch-weights metadata for "
1113 "select instruction. ";
1114 EmitAndPrintRemark(ORE, ORmiss);
1115 }
1116 if (!ColdOperand)
1117 return false;
1118 // Check if the cold path's dependence slice is expensive for any of the
1119 // selects of the group.
1120 for (SelectLike SI : ASI.Selects) {
1121 Instruction *ColdI = nullptr;
1122 uint64_t HotWeight;
1123 if (TrueWeight < FalseWeight) {
1124 ColdI = dyn_cast_or_null<Instruction>(SI.getTrueValue());
1125 HotWeight = FalseWeight;
1126 } else {
1127 ColdI = dyn_cast_or_null<Instruction>(SI.getFalseValue());
1128 HotWeight = TrueWeight;
1129 }
1130 if (ColdI) {
1131 std::stack<Instruction *> ColdSlice;
1132 getExclBackwardsSlice(ColdI, ColdSlice, SI.getI());
1133 InstructionCost SliceCost = 0;
1134 while (!ColdSlice.empty()) {
1135 SliceCost += TTI->getInstructionCost(ColdSlice.top(),
1137 ColdSlice.pop();
1138 }
1139 // The colder the cold value operand of the select is the more expensive
1140 // the cmov becomes for computing the cold value operand every time. Thus,
1141 // the colder the cold operand is the more its cost counts.
1142 // Get nearest integer cost adjusted for coldness.
1143 InstructionCost AdjSliceCost =
1144 divideNearest(SliceCost * HotWeight, TotalWeight);
1145 if (AdjSliceCost >=
1147 return true;
1148 }
1149 }
1150 return false;
1151}
1152
1153// Check if it is safe to move LoadI next to the SI.
1154// Conservatively assume it is safe only if there is no instruction
1155// modifying memory in-between the load and the select instruction.
1157 // Assume loads from different basic blocks are unsafe to move.
1158 if (LoadI->getParent() != SI->getParent())
1159 return false;
1160 auto It = LoadI->getIterator();
1161 while (&*It != SI) {
1162 if (It->mayWriteToMemory())
1163 return false;
1164 It++;
1165 }
1166 return true;
1167}
1168
1169// For a given source instruction, collect its backwards dependence slice
1170// consisting of instructions exclusively computed for the purpose of producing
1171// the operands of the source instruction. As an approximation
1172// (sufficiently-accurate in practice), we populate this set with the
1173// instructions of the backwards dependence slice that only have one-use and
1174// form an one-use chain that leads to the source instruction.
1175void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
1176 std::stack<Instruction *> &Slice,
1177 Instruction *SI,
1178 bool ForSinking) {
1180 std::queue<Instruction *> Worklist;
1181 Worklist.push(I);
1182 while (!Worklist.empty()) {
1183 Instruction *II = Worklist.front();
1184 Worklist.pop();
1185
1186 // Avoid cycles.
1187 if (!Visited.insert(II).second)
1188 continue;
1189
1190 if (!II->hasOneUse())
1191 continue;
1192
1193 // Cannot soundly sink instructions with side-effects.
1194 // Terminator or phi instructions cannot be sunk.
1195 // Avoid sinking other select instructions (should be handled separetely).
1196 if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
1198 continue;
1199
1200 // Avoid sinking loads in order not to skip state-modifying instructions,
1201 // that may alias with the loaded address.
1202 // Only allow sinking of loads within the same basic block that are
1203 // conservatively proven to be safe.
1204 if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
1205 continue;
1206
1207 // Avoid considering instructions with less frequency than the source
1208 // instruction (i.e., avoid colder code regions of the dependence slice).
1209 if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
1210 continue;
1211
1212 // Eligible one-use instruction added to the dependence slice.
1213 Slice.push(II);
1214
1215 // Explore all the operands of the current instruction to expand the slice.
1216 for (Value *Op : II->operand_values())
1217 if (auto *OpI = dyn_cast<Instruction>(Op))
1218 Worklist.push(OpI);
1219 }
1220}
1221
1222bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) {
1223 uint64_t TrueWeight, FalseWeight;
1224 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1225 uint64_t Max = std::max(TrueWeight, FalseWeight);
1226 uint64_t Sum = TrueWeight + FalseWeight;
1227 if (Sum != 0) {
1228 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
1229 if (Probability > TTI->getPredictableBranchThreshold())
1230 return true;
1231 }
1232 }
1233 return false;
1234}
1235
1236bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
1237 const CostInfo LoopCost[2]) {
1238 // Loop-level checks to determine if a non-predicated version (with branches)
1239 // of the loop is more profitable than its predicated version.
1240
1242 return true;
1243
1244 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
1245 &*L->getHeader()->getFirstNonPHIIt());
1246
1247 if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1248 LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1249 ORmissL << "No select conversion in the loop due to no reduction of loop's "
1250 "critical path. ";
1251 EmitAndPrintRemark(ORE, ORmissL);
1252 return false;
1253 }
1254
1255 Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1256 LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1257
1258 // Profitably converting to branches need to reduce the loop's critical path
1259 // by at least some threshold (absolute gain of GainCycleThreshold cycles and
1260 // relative gain of 12.5%).
1261 if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
1262 Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
1263 Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1264 ORmissL << "No select conversion in the loop due to small reduction of "
1265 "loop's critical path. Gain="
1266 << Gain[1].toString()
1267 << ", RelativeGain=" << RelativeGain.toString() << "%. ";
1268 EmitAndPrintRemark(ORE, ORmissL);
1269 return false;
1270 }
1271
1272 // If the loop's critical path involves loop-carried dependences, the gradient
1273 // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
1274 // This check ensures that the latency reduction for the loop's critical path
1275 // keeps decreasing with sufficient rate beyond the two analyzed loop
1276 // iterations.
1277 if (Gain[1] > Gain[0]) {
1278 Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1279 (LoopCost[1].PredCost - LoopCost[0].PredCost);
1280 if (GradientGain < Scaled64::get(GainGradientThreshold)) {
1281 ORmissL << "No select conversion in the loop due to small gradient gain. "
1282 "GradientGain="
1283 << GradientGain.toString() << "%. ";
1284 EmitAndPrintRemark(ORE, ORmissL);
1285 return false;
1286 }
1287 }
1288 // If the gain decreases it is not profitable to convert.
1289 else if (Gain[1] < Gain[0]) {
1290 ORmissL
1291 << "No select conversion in the loop due to negative gradient gain. ";
1292 EmitAndPrintRemark(ORE, ORmissL);
1293 return false;
1294 }
1295
1296 // Non-predicated version of the loop is more profitable than its
1297 // predicated version.
1298 return true;
1299}
1300
1301// Computes instruction and loop-critical-path costs for both the predicated
1302// and non-predicated version of the given loop.
1303// Returns false if unable to compute these costs due to invalid cost of loop
1304// instruction(s).
1305bool SelectOptimizeImpl::computeLoopCosts(
1306 const Loop *L, const SelectGroups &SIGroups,
1307 DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
1308 LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
1309 << L->getHeader()->getName() << "\n");
1310 const auto SImap = getSImap(SIGroups);
1311 const auto SGmap = getSGmap(SIGroups);
1312 // Compute instruction and loop-critical-path costs across two iterations for
1313 // both predicated and non-predicated version.
1314 const unsigned Iterations = 2;
1315 for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
1316 // Cost of the loop's critical path.
1317 CostInfo &MaxCost = LoopCost[Iter];
1318 for (BasicBlock *BB : L->getBlocks()) {
1319 for (const Instruction &I : *BB) {
1320 if (I.isDebugOrPseudoInst())
1321 continue;
1322 // Compute the predicated and non-predicated cost of the instruction.
1323 Scaled64 IPredCost = Scaled64::getZero(),
1324 INonPredCost = Scaled64::getZero();
1325
1326 // Assume infinite resources that allow to fully exploit the available
1327 // instruction-level parallelism.
1328 // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
1329 for (const Use &U : I.operands()) {
1330 auto UI = dyn_cast<Instruction>(U.get());
1331 if (!UI)
1332 continue;
1333 if (auto It = InstCostMap.find(UI); It != InstCostMap.end()) {
1334 IPredCost = std::max(IPredCost, It->second.PredCost);
1335 INonPredCost = std::max(INonPredCost, It->second.NonPredCost);
1336 }
1337 }
1338 auto ILatency = computeInstCost(&I);
1339 if (!ILatency) {
1340 OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
1341 ORmissL << "Invalid instruction cost preventing analysis and "
1342 "optimization of the inner-most loop containing this "
1343 "instruction. ";
1344 EmitAndPrintRemark(ORE, ORmissL);
1345 return false;
1346 }
1347 IPredCost += Scaled64::get(*ILatency);
1348 INonPredCost += Scaled64::get(*ILatency);
1349
1350 // For a select that can be converted to branch,
1351 // compute its cost as a branch (non-predicated cost).
1352 //
1353 // BranchCost = PredictedPathCost + MispredictCost
1354 // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
1355 // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
1356 if (auto It = SImap.find(&I); It != SImap.end()) {
1357 auto SI = It->second;
1358 const auto *SG = SGmap.at(&I);
1359 Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI);
1360 Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI);
1361 Scaled64 PredictedPathCost =
1362 getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
1363
1364 Scaled64 CondCost = Scaled64::getZero();
1365 if (auto *CI = dyn_cast<Instruction>(SG->Condition))
1366 if (auto It = InstCostMap.find(CI); It != InstCostMap.end())
1367 CondCost = It->second.NonPredCost;
1368 Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
1369
1370 INonPredCost = PredictedPathCost + MispredictCost;
1371 }
1372 LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
1373 << INonPredCost << " for " << I << "\n");
1374
1375 InstCostMap[&I] = {IPredCost, INonPredCost};
1376 MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1377 MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1378 }
1379 }
1380 LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
1381 << " MaxCost = " << MaxCost.PredCost << " "
1382 << MaxCost.NonPredCost << "\n");
1383 }
1384 return true;
1385}
1386
1388SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) {
1390 for (const SelectGroup &ASI : SIGroups)
1391 for (const SelectLike &SI : ASI.Selects)
1392 SImap.try_emplace(SI.getI(), SI);
1393 return SImap;
1394}
1395
1397SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) {
1399 for (const SelectGroup &ASI : SIGroups)
1400 for (const SelectLike &SI : ASI.Selects)
1401 SImap.try_emplace(SI.getI(), &ASI);
1402 return SImap;
1403}
1404
1405std::optional<uint64_t>
1406SelectOptimizeImpl::computeInstCost(const Instruction *I) {
1407 InstructionCost ICost =
1408 TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
1409 if (ICost.isValid())
1410 return std::optional<uint64_t>(ICost.getValue());
1411 return std::nullopt;
1412}
1413
1415SelectOptimizeImpl::getMispredictionCost(const SelectLike SI,
1416 const Scaled64 CondCost) {
1417 uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
1418
1419 // Account for the default misprediction rate when using a branch
1420 // (conservatively set to 25% by default).
1421 uint64_t MispredictRate = MispredictDefaultRate;
1422 // If the select condition is obviously predictable, then the misprediction
1423 // rate is zero.
1424 if (isSelectHighlyPredictable(SI))
1425 MispredictRate = 0;
1426
1427 // CondCost is included to account for cases where the computation of the
1428 // condition is part of a long dependence chain (potentially loop-carried)
1429 // that would delay detection of a misprediction and increase its cost.
1430 Scaled64 MispredictCost =
1431 std::max(Scaled64::get(MispredictPenalty), CondCost) *
1432 Scaled64::get(MispredictRate);
1433 MispredictCost /= Scaled64::get(100);
1434
1435 return MispredictCost;
1436}
1437
1438// Returns the cost of a branch when the prediction is correct.
1439// TrueCost * TrueProbability + FalseCost * FalseProbability.
1441SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1442 const SelectLike SI) {
1443 Scaled64 PredPathCost;
1444 uint64_t TrueWeight, FalseWeight;
1445 if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1446 uint64_t SumWeight = TrueWeight + FalseWeight;
1447 if (SumWeight != 0) {
1448 PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1449 FalseCost * Scaled64::get(FalseWeight);
1450 PredPathCost /= Scaled64::get(SumWeight);
1451 return PredPathCost;
1452 }
1453 }
1454 // Without branch weight metadata, we assume 75% for the one path and 25% for
1455 // the other, and pick the result with the biggest cost.
1456 PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1457 FalseCost * Scaled64::get(3) + TrueCost);
1458 PredPathCost /= Scaled64::get(4);
1459 return PredPathCost;
1460}
1461
1462bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) {
1464 if (SI.getType()->isVectorTy())
1466 else
1468 return TLI->isSelectSupported(SelectKind);
1469}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Hexagon Hardware Loops
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
print mir2vec MIR2Vec Vocabulary Printer Pass
Definition MIR2Vec.cpp:598
uint64_t IntrinsicInst * II
#define P(N)
FunctionAnalysisManager FAM
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
const GCNTargetMachine & getTM(const GCNSubtarget *STI)
static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI)
static cl::opt< unsigned > ColdOperandMaxCostMultiplier("cold-operand-max-cost-multiplier", cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " "slice of a cold operand to be considered inexpensive."), cl::init(1), cl::Hidden)
static cl::opt< unsigned > ColdOperandThreshold("cold-operand-threshold", cl::desc("Maximum frequency of path for an operand to be considered cold."), cl::init(20), cl::Hidden)
static cl::opt< bool > DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, cl::init(false), cl::desc("Disable loop-level heuristics."))
static cl::opt< unsigned > GainCycleThreshold("select-opti-loop-cycle-gain-threshold", cl::desc("Minimum gain per loop (in cycles) threshold."), cl::init(4), cl::Hidden)
static cl::opt< unsigned > MispredictDefaultRate("mispredict-default-rate", cl::Hidden, cl::init(25), cl::desc("Default mispredict rate (initialized to 25%)."))
static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, DiagnosticInfoOptimizationBase &Rem)
static cl::opt< unsigned > GainGradientThreshold("select-opti-loop-gradient-gain-threshold", cl::desc("Gradient gain threshold (%)."), cl::init(25), cl::Hidden)
static cl::opt< unsigned > GainRelativeThreshold("select-opti-loop-relative-gain-threshold", cl::desc("Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), cl::init(8), cl::Hidden)
This file contains the declaration of the SelectOptimizePass class, its corresponding pass name is se...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static const uint32_t IV[8]
Definition blake3_impl.h:83
AnalysisUsage & addRequired()
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
Analysis pass which computes BlockFrequencyInfo.
Legacy analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:135
iterator end()
Definition DenseMap.h:143
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
Common features for diagnostics dealing with optimization remarks that are used by both IR and MIR pa...
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2868
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void swapProfMetadata()
If the instruction has "branch_weights" MD_prof metadata and the MDNode has three operands (including...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
iterator end() const
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:612
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
OptimizationRemarkEmitter legacy analysis pass.
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool hasProfileSummary() const
Returns true if profile summary is available.
bool isColdBlock(const BBType *BB, BFIT *BFI) const
Returns true if BasicBlock BB is considered cold.
Simple representation of a scaled number.
static ScaledNumber get(uint64_t N)
static ScaledNumber getZero()
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetTransformInfo.
virtual bool isSelectSupported(SelectSupportKind) const
SelectSupportKind
Enum that describes what type of support for selects the target has.
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Provide an instruction scheduling machine model to CodeGen passes.
const MCSchedModel * getMCSchedModel() const
LLVM_ABI void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel=true, bool EnableSItins=true)
Initialize the machine model for instruction scheduling.
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetLowering * getTargetLowering() const
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_Value()
Match an arbitrary value and ignore it.
AnyBinaryOp_match< LHS, RHS, true > m_c_BinOp(const LHS &L, const RHS &R)
Matches a BinaryOperator with LHS and RHS in either order.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731
InstructionCost Cost
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI FunctionPass * createSelectOptimizePass()
This pass converts conditional moves to conditional jumps when profitable.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr T divideNearest(U Numerator, V Denominator)
Returns (Numerator / Denominator) rounded by round-half-up.
Definition MathExtras.h:458
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
TargetTransformInfo TTI
LLVM_ABI bool hasProfMD(const Instruction &I)
Checks if an Instruction has MD_prof Metadata.
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
unsigned MispredictPenalty
Definition MCSchedule.h:317