LLVM 23.0.0git
VPlanConstruction.cpp
Go to the documentation of this file.
1//===-- VPlanConstruction.cpp - Transforms for initial VPlan construction -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements transforms for initial VPlan construction.
11///
12//===----------------------------------------------------------------------===//
13
15#include "VPlan.h"
16#include "VPlanAnalysis.h"
17#include "VPlanCFG.h"
18#include "VPlanDominatorTree.h"
19#include "VPlanHelpers.h"
20#include "VPlanPatternMatch.h"
21#include "VPlanTransforms.h"
22#include "VPlanUtils.h"
29#include "llvm/IR/InstrTypes.h"
30#include "llvm/IR/MDBuilder.h"
33
34#define DEBUG_TYPE "vplan"
35
36using namespace llvm;
37using namespace VPlanPatternMatch;
38
39namespace {
40// Class that is used to build the plain CFG for the incoming IR.
41class PlainCFGBuilder {
42 // The outermost loop of the input loop nest considered for vectorization.
43 Loop *TheLoop;
44
45 // Loop Info analysis.
46 LoopInfo *LI;
47
48 // Loop versioning for alias metadata.
49 LoopVersioning *LVer;
50
51 // Vectorization plan that we are working on.
52 std::unique_ptr<VPlan> Plan;
53
54 // Builder of the VPlan instruction-level representation.
55 VPBuilder VPIRBuilder;
56
57 // NOTE: The following maps are intentionally destroyed after the plain CFG
58 // construction because subsequent VPlan-to-VPlan transformation may
59 // invalidate them.
60 // Map incoming BasicBlocks to their newly-created VPBasicBlocks.
62 // Map incoming Value definitions to their newly-created VPValues.
63 DenseMap<Value *, VPValue *> IRDef2VPValue;
64
65 // Hold phi node's that need to be fixed once the plain CFG has been built.
67
68 // Utility functions.
69 void setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB);
70 void fixHeaderPhis();
71 VPBasicBlock *getOrCreateVPBB(BasicBlock *BB);
72#ifndef NDEBUG
73 bool isExternalDef(Value *Val);
74#endif
75 VPValue *getOrCreateVPOperand(Value *IRVal);
76 void createVPInstructionsForVPBB(VPBasicBlock *VPBB, BasicBlock *BB);
77
78public:
79 PlainCFGBuilder(Loop *Lp, LoopInfo *LI, LoopVersioning *LVer)
80 : TheLoop(Lp), LI(LI), LVer(LVer), Plan(std::make_unique<VPlan>(Lp)) {}
81
82 /// Build plain CFG for TheLoop and connect it to Plan's entry.
83 std::unique_ptr<VPlan> buildPlainCFG();
84};
85} // anonymous namespace
86
87// Set predecessors of \p VPBB in the same order as they are in \p BB. \p VPBB
88// must have no predecessors.
89void PlainCFGBuilder::setVPBBPredsFromBB(VPBasicBlock *VPBB, BasicBlock *BB) {
90 // Collect VPBB predecessors.
92 for (BasicBlock *Pred : predecessors(BB))
93 VPBBPreds.push_back(getOrCreateVPBB(Pred));
94 VPBB->setPredecessors(VPBBPreds);
95}
96
97static bool isHeaderBB(BasicBlock *BB, Loop *L) {
98 return L && BB == L->getHeader();
99}
100
101// Add operands to VPInstructions representing phi nodes from the input IR.
102void PlainCFGBuilder::fixHeaderPhis() {
103 for (auto *Phi : PhisToFix) {
104 assert(IRDef2VPValue.count(Phi) && "Missing VPInstruction for PHINode.");
105 VPValue *VPVal = IRDef2VPValue[Phi];
106 assert(isa<VPPhi>(VPVal) && "Expected VPPhi for phi node.");
107 auto *PhiR = cast<VPPhi>(VPVal);
108 assert(PhiR->getNumOperands() == 0 && "Expected VPPhi with no operands.");
109 assert(isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent())) &&
110 "Expected Phi in header block.");
111 assert(Phi->getNumOperands() == 2 &&
112 "header phi must have exactly 2 operands");
113 for (BasicBlock *Pred : predecessors(Phi->getParent()))
114 PhiR->addOperand(
115 getOrCreateVPOperand(Phi->getIncomingValueForBlock(Pred)));
116 }
117}
118
119// Create a new empty VPBasicBlock for an incoming BasicBlock or retrieve an
120// existing one if it was already created.
121VPBasicBlock *PlainCFGBuilder::getOrCreateVPBB(BasicBlock *BB) {
122 if (auto *VPBB = BB2VPBB.lookup(BB)) {
123 // Retrieve existing VPBB.
124 return VPBB;
125 }
126
127 // Create new VPBB.
128 StringRef Name = BB->getName();
129 LLVM_DEBUG(dbgs() << "Creating VPBasicBlock for " << Name << "\n");
130 VPBasicBlock *VPBB = Plan->createVPBasicBlock(Name);
131 BB2VPBB[BB] = VPBB;
132 return VPBB;
133}
134
135#ifndef NDEBUG
136// Return true if \p Val is considered an external definition. An external
137// definition is either:
138// 1. A Value that is not an Instruction. This will be refined in the future.
139// 2. An Instruction that is outside of the IR region represented in VPlan,
140// i.e., is not part of the loop nest.
141bool PlainCFGBuilder::isExternalDef(Value *Val) {
142 // All the Values that are not Instructions are considered external
143 // definitions for now.
145 if (!Inst)
146 return true;
147
148 // Check whether Instruction definition is in loop body.
149 return !TheLoop->contains(Inst);
150}
151#endif
152
153// Create a new VPValue or retrieve an existing one for the Instruction's
154// operand \p IRVal. This function must only be used to create/retrieve VPValues
155// for *Instruction's operands* and not to create regular VPInstruction's. For
156// the latter, please, look at 'createVPInstructionsForVPBB'.
157VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
158 auto VPValIt = IRDef2VPValue.find(IRVal);
159 if (VPValIt != IRDef2VPValue.end())
160 // Operand has an associated VPInstruction or VPValue that was previously
161 // created.
162 return VPValIt->second;
163
164 // Operand doesn't have a previously created VPInstruction/VPValue. This
165 // means that operand is:
166 // A) a definition external to VPlan,
167 // B) any other Value without specific representation in VPlan.
168 // For now, we use VPValue to represent A and B and classify both as external
169 // definitions. We may introduce specific VPValue subclasses for them in the
170 // future.
171 assert(isExternalDef(IRVal) && "Expected external definition as operand.");
172
173 // A and B: Create VPValue and add it to the pool of external definitions and
174 // to the Value->VPValue map.
175 VPValue *NewVPVal = Plan->getOrAddLiveIn(IRVal);
176 IRDef2VPValue[IRVal] = NewVPVal;
177 return NewVPVal;
178}
179
180// Create new VPInstructions in a VPBasicBlock, given its BasicBlock
181// counterpart. This function must be invoked in RPO so that the operands of a
182// VPInstruction in \p BB have been visited before (except for Phi nodes).
183void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
184 BasicBlock *BB) {
185 VPIRBuilder.setInsertPoint(VPBB);
186 // TODO: Model and preserve debug intrinsics in VPlan.
187 for (Instruction &InstRef : BB->instructionsWithoutDebug(false)) {
188 Instruction *Inst = &InstRef;
189
190 // There shouldn't be any VPValue for Inst at this point. Otherwise, we
191 // visited Inst when we shouldn't, breaking the RPO traversal order.
192 assert(!IRDef2VPValue.count(Inst) &&
193 "Instruction shouldn't have been visited.");
194
195 if (auto *Br = dyn_cast<BranchInst>(Inst)) {
196 // Conditional branch instruction are represented using BranchOnCond
197 // recipes.
198 if (Br->isConditional()) {
199 VPValue *Cond = getOrCreateVPOperand(Br->getCondition());
200 VPIRBuilder.createNaryOp(VPInstruction::BranchOnCond, {Cond}, Inst, {},
201 VPIRMetadata(*Inst), Inst->getDebugLoc());
202 }
203
204 // Skip the rest of the Instruction processing for Branch instructions.
205 continue;
206 }
207
208 if (auto *SI = dyn_cast<SwitchInst>(Inst)) {
209 // Don't emit recipes for unconditional switch instructions.
210 if (SI->getNumCases() == 0)
211 continue;
212 SmallVector<VPValue *> Ops = {getOrCreateVPOperand(SI->getCondition())};
213 for (auto Case : SI->cases())
214 Ops.push_back(getOrCreateVPOperand(Case.getCaseValue()));
215 VPIRBuilder.createNaryOp(Instruction::Switch, Ops, Inst, {},
216 VPIRMetadata(*Inst), Inst->getDebugLoc());
217 continue;
218 }
219
220 VPSingleDefRecipe *NewR;
221 if (auto *Phi = dyn_cast<PHINode>(Inst)) {
222 // Phi node's operands may not have been visited at this point. We create
223 // an empty VPInstruction that we will fix once the whole plain CFG has
224 // been built.
225 NewR =
226 VPIRBuilder.createScalarPhi({}, Phi->getDebugLoc(), "vec.phi", *Phi);
227 NewR->setUnderlyingValue(Phi);
228 if (isHeaderBB(Phi->getParent(), LI->getLoopFor(Phi->getParent()))) {
229 // Header phis need to be fixed after the VPBB for the latch has been
230 // created.
231 PhisToFix.push_back(Phi);
232 } else {
233 // Add operands for VPPhi in the order matching its predecessors in
234 // VPlan.
235 DenseMap<const VPBasicBlock *, VPValue *> VPPredToIncomingValue;
236 for (unsigned I = 0; I != Phi->getNumOperands(); ++I) {
237 VPPredToIncomingValue[BB2VPBB[Phi->getIncomingBlock(I)]] =
238 getOrCreateVPOperand(Phi->getIncomingValue(I));
239 }
240 for (VPBlockBase *Pred : VPBB->getPredecessors())
241 NewR->addOperand(
242 VPPredToIncomingValue.lookup(Pred->getExitingBasicBlock()));
243 }
244 } else {
245 // Build VPIRMetadata from the instruction and add loop versioning
246 // metadata for loads and stores.
247 VPIRMetadata MD(*Inst);
248 if (isa<LoadInst, StoreInst>(Inst) && LVer) {
249 const auto &[AliasScopeMD, NoAliasMD] =
250 LVer->getNoAliasMetadataFor(Inst);
251 if (AliasScopeMD)
252 MD.setMetadata(LLVMContext::MD_alias_scope, AliasScopeMD);
253 if (NoAliasMD)
254 MD.setMetadata(LLVMContext::MD_noalias, NoAliasMD);
255 }
256
257 // Translate LLVM-IR operands into VPValue operands and set them in the
258 // new VPInstruction.
259 SmallVector<VPValue *, 4> VPOperands;
260 for (Value *Op : Inst->operands())
261 VPOperands.push_back(getOrCreateVPOperand(Op));
262
263 if (auto *CI = dyn_cast<CastInst>(Inst)) {
264 NewR = VPIRBuilder.createScalarCast(CI->getOpcode(), VPOperands[0],
265 CI->getType(), CI->getDebugLoc(),
266 VPIRFlags(*CI), MD);
267 NewR->setUnderlyingValue(CI);
268 } else if (auto *LI = dyn_cast<LoadInst>(Inst)) {
269 NewR = VPIRBuilder.createScalarLoad(LI->getType(), VPOperands[0],
270 LI->getDebugLoc(), MD);
271 NewR->setUnderlyingValue(LI);
272 } else {
273 // Build VPInstruction for any arbitrary Instruction without specific
274 // representation in VPlan.
275 NewR =
276 VPIRBuilder.createNaryOp(Inst->getOpcode(), VPOperands, Inst,
277 VPIRFlags(*Inst), MD, Inst->getDebugLoc());
278 }
279 }
280
281 IRDef2VPValue[Inst] = NewR;
282 }
283}
284
285// Main interface to build the plain CFG.
286std::unique_ptr<VPlan> PlainCFGBuilder::buildPlainCFG() {
287 VPIRBasicBlock *Entry = cast<VPIRBasicBlock>(Plan->getEntry());
288 BB2VPBB[Entry->getIRBasicBlock()] = Entry;
289 for (VPIRBasicBlock *ExitVPBB : Plan->getExitBlocks())
290 BB2VPBB[ExitVPBB->getIRBasicBlock()] = ExitVPBB;
291
292 // 1. Scan the body of the loop in a topological order to visit each basic
293 // block after having visited its predecessor basic blocks. Create a VPBB for
294 // each BB and link it to its successor and predecessor VPBBs. Note that
295 // predecessors must be set in the same order as they are in the incomming IR.
296 // Otherwise, there might be problems with existing phi nodes and algorithm
297 // based on predecessors traversal.
298
299 // Loop PH needs to be explicitly visited since it's not taken into account by
300 // LoopBlocksDFS.
301 BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
302 assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
303 "Unexpected loop preheader");
304 for (auto &I : *ThePreheaderBB) {
305 if (I.getType()->isVoidTy())
306 continue;
307 IRDef2VPValue[&I] = Plan->getOrAddLiveIn(&I);
308 }
309
310 LoopBlocksRPO RPO(TheLoop);
311 RPO.perform(LI);
312
313 for (BasicBlock *BB : RPO) {
314 // Create or retrieve the VPBasicBlock for this BB.
315 VPBasicBlock *VPBB = getOrCreateVPBB(BB);
316 // Set VPBB predecessors in the same order as they are in the incoming BB.
317 setVPBBPredsFromBB(VPBB, BB);
318
319 // Create VPInstructions for BB.
320 createVPInstructionsForVPBB(VPBB, BB);
321
322 // Set VPBB successors. We create empty VPBBs for successors if they don't
323 // exist already. Recipes will be created when the successor is visited
324 // during the RPO traversal.
325 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
327 getOrCreateVPBB(SI->getDefaultDest())};
328 for (auto Case : SI->cases())
329 Succs.push_back(getOrCreateVPBB(Case.getCaseSuccessor()));
330 VPBB->setSuccessors(Succs);
331 continue;
332 }
333 auto *BI = cast<BranchInst>(BB->getTerminator());
334 unsigned NumSuccs = succ_size(BB);
335 if (NumSuccs == 1) {
336 VPBB->setOneSuccessor(getOrCreateVPBB(BB->getSingleSuccessor()));
337 continue;
338 }
339 assert(BI->isConditional() && NumSuccs == 2 && BI->isConditional() &&
340 "block must have conditional branch with 2 successors");
341
342 BasicBlock *IRSucc0 = BI->getSuccessor(0);
343 BasicBlock *IRSucc1 = BI->getSuccessor(1);
344 VPBasicBlock *Successor0 = getOrCreateVPBB(IRSucc0);
345 VPBasicBlock *Successor1 = getOrCreateVPBB(IRSucc1);
346 VPBB->setTwoSuccessors(Successor0, Successor1);
347 }
348
349 for (auto *EB : Plan->getExitBlocks())
350 setVPBBPredsFromBB(EB, EB->getIRBasicBlock());
351
352 // 2. The whole CFG has been built at this point so all the input Values must
353 // have a VPlan counterpart. Fix VPlan header phi by adding their
354 // corresponding VPlan operands.
355 fixHeaderPhis();
356
357 Plan->getEntry()->setOneSuccessor(getOrCreateVPBB(TheLoop->getHeader()));
358 Plan->getEntry()->setPlan(&*Plan);
359
360 // Fix VPlan loop-closed-ssa exit phi's by adding incoming operands to the
361 // VPIRInstructions wrapping them.
362 // // Note that the operand order corresponds to IR predecessor order, and may
363 // need adjusting when VPlan predecessors are added, if an exit block has
364 // multiple predecessor.
365 for (auto *EB : Plan->getExitBlocks()) {
366 for (VPRecipeBase &R : EB->phis()) {
367 auto *PhiR = cast<VPIRPhi>(&R);
368 PHINode &Phi = PhiR->getIRPhi();
369 assert(PhiR->getNumOperands() == 0 &&
370 "no phi operands should be added yet");
371 for (BasicBlock *Pred : predecessors(EB->getIRBasicBlock()))
372 PhiR->addOperand(
373 getOrCreateVPOperand(Phi.getIncomingValueForBlock(Pred)));
374 }
375 }
376
377 LLVM_DEBUG(Plan->setName("Plain CFG\n"); dbgs() << *Plan);
378 return std::move(Plan);
379}
380
381/// Checks if \p HeaderVPB is a loop header block in the plain CFG; that is, it
382/// has exactly 2 predecessors (preheader and latch), where the block
383/// dominates the latch and the preheader dominates the block. If it is a
384/// header block return true and canonicalize the predecessors of the header
385/// (making sure the preheader appears first and the latch second) and the
386/// successors of the latch (making sure the loop exit comes first). Otherwise
387/// return false.
389 const VPDominatorTree &VPDT) {
390 ArrayRef<VPBlockBase *> Preds = HeaderVPB->getPredecessors();
391 if (Preds.size() != 2)
392 return false;
393
394 auto *PreheaderVPBB = Preds[0];
395 auto *LatchVPBB = Preds[1];
396 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
397 !VPDT.dominates(HeaderVPB, LatchVPBB)) {
398 std::swap(PreheaderVPBB, LatchVPBB);
399
400 if (!VPDT.dominates(PreheaderVPBB, HeaderVPB) ||
401 !VPDT.dominates(HeaderVPB, LatchVPBB))
402 return false;
403
404 // Canonicalize predecessors of header so that preheader is first and
405 // latch second.
406 HeaderVPB->swapPredecessors();
407 for (VPRecipeBase &R : cast<VPBasicBlock>(HeaderVPB)->phis())
408 R.swapOperands();
409 }
410
411 // The two successors of conditional branch match the condition, with the
412 // first successor corresponding to true and the second to false. We
413 // canonicalize the successors of the latch when introducing the region, such
414 // that the latch exits the region when its condition is true; invert the
415 // original condition if the original CFG branches to the header on true.
416 // Note that the exit edge is not yet connected for top-level loops.
417 if (LatchVPBB->getSingleSuccessor() ||
418 LatchVPBB->getSuccessors()[0] != HeaderVPB)
419 return true;
420
421 assert(LatchVPBB->getNumSuccessors() == 2 && "Must have 2 successors");
422 auto *Term = cast<VPBasicBlock>(LatchVPBB)->getTerminator();
425 "terminator must be a BranchOnCond");
426 auto *Not = new VPInstruction(VPInstruction::Not, {Term->getOperand(0)});
427 Not->insertBefore(Term);
428 Term->setOperand(0, Not);
429 LatchVPBB->swapSuccessors();
430
431 return true;
432}
433
434/// Create a new VPRegionBlock for the loop starting at \p HeaderVPB.
435static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) {
436 auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0];
437 auto *LatchVPBB = HeaderVPB->getPredecessors()[1];
438
439 VPBlockUtils::disconnectBlocks(PreheaderVPBB, HeaderVPB);
440 VPBlockUtils::disconnectBlocks(LatchVPBB, HeaderVPB);
441
442 // Create an empty region first and insert it between PreheaderVPBB and
443 // the exit blocks, taking care to preserve the original predecessor &
444 // successor order of blocks. Set region entry and exiting after both
445 // HeaderVPB and LatchVPBB have been disconnected from their
446 // predecessors/successors.
447 auto *R = Plan.createLoopRegion();
448
449 // Transfer latch's successors to the region.
451
452 VPBlockUtils::connectBlocks(PreheaderVPBB, R);
453 R->setEntry(HeaderVPB);
454 R->setExiting(LatchVPBB);
455
456 // All VPBB's reachable shallowly from HeaderVPB belong to the current region.
457 for (VPBlockBase *VPBB : vp_depth_first_shallow(HeaderVPB))
458 VPBB->setParent(R);
459}
460
461// Add the necessary canonical IV and branch recipes required to control the
462// loop.
463static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB,
464 VPBasicBlock *LatchVPBB, Type *IdxTy,
465 DebugLoc DL) {
466 auto *StartV = Plan.getConstantInt(IdxTy, 0);
467
468 // Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
469 auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
470 HeaderVPBB->insert(CanonicalIVPHI, HeaderVPBB->begin());
471
472 // We are about to replace the branch to exit the region. Remove the original
473 // BranchOnCond, if there is any.
474 DebugLoc LatchDL = DL;
475 if (!LatchVPBB->empty() && match(&LatchVPBB->back(), m_BranchOnCond())) {
476 LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
477 LatchVPBB->getTerminator()->eraseFromParent();
478 }
479
480 VPBuilder Builder(LatchVPBB);
481 // Add a VPInstruction to increment the scalar canonical IV by VF * UF.
482 // Initially the induction increment is guaranteed to not wrap, but that may
483 // change later, e.g. when tail-folding, when the flags need to be dropped.
484 auto *CanonicalIVIncrement = Builder.createAdd(
485 CanonicalIVPHI, &Plan.getVFxUF(), DL, "index.next", {true, false});
486 CanonicalIVPHI->addOperand(CanonicalIVIncrement);
487
488 // Add the BranchOnCount VPInstruction to the latch.
489 Builder.createNaryOp(VPInstruction::BranchOnCount,
490 {CanonicalIVIncrement, &Plan.getVectorTripCount()},
491 LatchDL);
492}
493
494/// Creates extracts for values in \p Plan defined in a loop region and used
495/// outside a loop region.
496static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB) {
497 VPBuilder B(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
498 for (VPBasicBlock *EB : Plan.getExitBlocks()) {
499 if (!is_contained(EB->predecessors(), MiddleVPBB))
500 continue;
501
502 for (VPRecipeBase &R : EB->phis()) {
503 auto *ExitIRI = cast<VPIRPhi>(&R);
504 VPValue *Exiting = ExitIRI->getIncomingValueForBlock(MiddleVPBB);
505 if (isa<VPIRValue>(Exiting))
506 continue;
507 Exiting = B.createNaryOp(VPInstruction::ExtractLastPart, Exiting);
508 Exiting = B.createNaryOp(VPInstruction::ExtractLastLane, Exiting);
509 ExitIRI->setIncomingValueForBlock(MiddleVPBB, Exiting);
510 }
511 }
512}
513
514/// Return an iterator range to iterate over pairs of matching phi nodes in
515/// \p Header and \p ScalarHeader, skipping the canonical IV in the former.
517 VPBasicBlock *ScalarHeader) {
518 return zip_equal(drop_begin(Header->phis()), ScalarHeader->phis());
519}
520
521static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL,
522 PredicatedScalarEvolution &PSE, Loop *TheLoop) {
523 VPDominatorTree VPDT(Plan);
524
525 auto *HeaderVPBB = cast<VPBasicBlock>(Plan.getEntry()->getSingleSuccessor());
526 canonicalHeaderAndLatch(HeaderVPBB, VPDT);
527 auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPBB->getPredecessors()[1]);
528
529 VPBasicBlock *VecPreheader = Plan.createVPBasicBlock("vector.ph");
530 VPBlockUtils::insertBlockAfter(VecPreheader, Plan.getEntry());
531
532 VPBasicBlock *MiddleVPBB = Plan.createVPBasicBlock("middle.block");
533 // The canonical LatchVPBB has the header block as last successor. If it has
534 // another successor, this successor is an exit block - insert middle block on
535 // its edge. Otherwise, add middle block as another successor retaining header
536 // as last.
537 if (LatchVPBB->getNumSuccessors() == 2) {
538 VPBlockBase *LatchExitVPB = LatchVPBB->getSuccessors()[0];
539 VPBlockUtils::insertOnEdge(LatchVPBB, LatchExitVPB, MiddleVPBB);
540 } else {
541 VPBlockUtils::connectBlocks(LatchVPBB, MiddleVPBB);
542 LatchVPBB->swapSuccessors();
543 }
544
545 addCanonicalIVRecipes(Plan, HeaderVPBB, LatchVPBB, InductionTy, IVDL);
546
547 // Create SCEV and VPValue for the trip count.
548 // We use the symbolic max backedge-taken-count, which works also when
549 // vectorizing loops with uncountable early exits.
550 const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount();
551 assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) &&
552 "Invalid backedge-taken count");
553 ScalarEvolution &SE = *PSE.getSE();
554 const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV,
555 InductionTy, TheLoop);
557
558 VPBasicBlock *ScalarPH = Plan.createVPBasicBlock("scalar.ph");
560
561 // The connection order corresponds to the operands of the conditional branch,
562 // with the middle block already connected to the exit block.
563 VPBlockUtils::connectBlocks(MiddleVPBB, ScalarPH);
564 // Also connect the entry block to the scalar preheader.
565 // TODO: Also introduce a branch recipe together with the minimum trip count
566 // check.
567 VPBlockUtils::connectBlocks(Plan.getEntry(), ScalarPH);
568 Plan.getEntry()->swapSuccessors();
569
570 createExtractsForLiveOuts(Plan, MiddleVPBB);
571
572 // Create resume phis in the scalar preheader for each phi in the scalar loop.
573 // Their incoming value from the vector loop will be the last lane of the
574 // corresponding vector loop header phi.
575 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
576 VPBuilder ScalarPHBuilder(ScalarPH);
577 assert(equal(ScalarPH->getPredecessors(),
578 ArrayRef<VPBlockBase *>({MiddleVPBB, Plan.getEntry()})) &&
579 "unexpected predecessor order of scalar ph");
580 for (const auto &[PhiR, ScalarPhiR] :
581 getMatchingPhisForScalarLoop(HeaderVPBB, Plan.getScalarHeader())) {
582 auto *VectorPhiR = cast<VPPhi>(&PhiR);
583 VPValue *BackedgeVal = VectorPhiR->getOperand(1);
584 VPValue *ResumeFromVectorLoop =
585 MiddleBuilder.createNaryOp(VPInstruction::ExtractLastPart, BackedgeVal);
586 ResumeFromVectorLoop = MiddleBuilder.createNaryOp(
587 VPInstruction::ExtractLastLane, ResumeFromVectorLoop);
588 // Create scalar resume phi, with the first operand being the incoming value
589 // from the middle block and the second operand coming from the entry block.
590 auto *ResumePhiR = ScalarPHBuilder.createScalarPhi(
591 {ResumeFromVectorLoop, VectorPhiR->getOperand(0)},
592 VectorPhiR->getDebugLoc());
593 cast<VPIRPhi>(&ScalarPhiR)->addOperand(ResumePhiR);
594 }
595}
596
597/// Check \p Plan's live-in and replace them with constants, if they can be
598/// simplified via SCEV.
601 auto GetSimplifiedLiveInViaSCEV = [&](VPValue *VPV) -> VPValue * {
602 const SCEV *Expr = vputils::getSCEVExprForVPValue(VPV, PSE);
603 if (auto *C = dyn_cast<SCEVConstant>(Expr))
604 return Plan.getOrAddLiveIn(C->getValue());
605 return nullptr;
606 };
607
608 for (VPValue *LiveIn : to_vector(Plan.getLiveIns())) {
609 if (VPValue *SimplifiedLiveIn = GetSimplifiedLiveInViaSCEV(LiveIn))
610 LiveIn->replaceAllUsesWith(SimplifiedLiveIn);
611 }
612}
613
614/// To make RUN_VPLAN_PASS print initial VPlan.
616
617std::unique_ptr<VPlan>
618VPlanTransforms::buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy,
620 LoopVersioning *LVer) {
621 PlainCFGBuilder Builder(TheLoop, &LI, LVer);
622 std::unique_ptr<VPlan> VPlan0 = Builder.buildPlainCFG();
623 addInitialSkeleton(*VPlan0, InductionTy, IVDL, PSE, TheLoop);
624 simplifyLiveInsWithSCEV(*VPlan0, PSE);
625
627 return VPlan0;
628}
629
630/// Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe
631/// for \p Phi based on \p IndDesc.
632static VPHeaderPHIRecipe *
634 const InductionDescriptor &IndDesc, VPlan &Plan,
635 PredicatedScalarEvolution &PSE, Loop &OrigLoop,
636 DebugLoc DL) {
637 [[maybe_unused]] ScalarEvolution &SE = *PSE.getSE();
638 assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
639 "step must be loop invariant");
640 assert((Plan.getLiveIn(IndDesc.getStartValue()) == Start ||
641 (SE.isSCEVable(IndDesc.getStartValue()->getType()) &&
642 SE.getSCEV(IndDesc.getStartValue()) ==
643 vputils::getSCEVExprForVPValue(Start, PSE))) &&
644 "Start VPValue must match IndDesc's start value");
645
646 VPValue *Step =
648
649 VPValue *BackedgeVal = PhiR->getOperand(1);
650 // Replace live-out extracts of WideIV's backedge value by ExitingIVValue
651 // recipes. optimizeInductionLiveOutUsers will later compute the proper
652 // DerivedIV.
653 auto ReplaceExtractsWithExitingIVValue = [&](VPHeaderPHIRecipe *WideIV) {
654 for (VPUser *U : to_vector(BackedgeVal->users())) {
656 continue;
657 auto *ExtractLastPart = cast<VPInstruction>(U);
658 VPUser *ExtractLastPartUser = ExtractLastPart->getSingleUser();
659 assert(ExtractLastPartUser && "must have a single user");
660 if (!match(ExtractLastPartUser, m_ExtractLastLane(m_VPValue())))
661 continue;
662 auto *ExtractLastLane = cast<VPInstruction>(ExtractLastPartUser);
663 assert(is_contained(ExtractLastLane->getParent()->successors(),
664 Plan.getScalarPreheader()) &&
665 "last lane must be extracted in the middle block");
666 VPBuilder Builder(ExtractLastLane);
667 ExtractLastLane->replaceAllUsesWith(
668 Builder.createNaryOp(VPInstruction::ExitingIVValue, {WideIV}));
669 ExtractLastLane->eraseFromParent();
670 ExtractLastPart->eraseFromParent();
671 }
672 };
673
675 auto *WideIV = new VPWidenPointerInductionRecipe(
676 Phi, Start, Step, &Plan.getVFxUF(), IndDesc, DL);
677 ReplaceExtractsWithExitingIVValue(WideIV);
678 return WideIV;
679 }
680
683 "must have an integer or float induction at this point");
684
685 // Update wide induction increments to use the same step as the corresponding
686 // wide induction. This enables detecting induction increments directly in
687 // VPlan and removes redundant splats.
688 if (match(BackedgeVal, m_Add(m_Specific(PhiR), m_VPValue())))
689 BackedgeVal->getDefiningRecipe()->setOperand(1, Step);
690
691 // It is always safe to copy over the NoWrap and FastMath flags. In
692 // particular, when folding tail by masking, the masked-off lanes are never
693 // used, so it is safe.
695
696 auto *WideIV = new VPWidenIntOrFpInductionRecipe(
697 Phi, Start, Step, &Plan.getVF(), IndDesc, Flags, DL);
698
699 ReplaceExtractsWithExitingIVValue(WideIV);
700 return WideIV;
701}
702
704 VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop,
707 const SmallPtrSetImpl<const PHINode *> &FixedOrderRecurrences,
708 const SmallPtrSetImpl<PHINode *> &InLoopReductions, bool AllowReordering) {
709 // Retrieve the header manually from the intial plain-CFG VPlan.
710 VPBasicBlock *HeaderVPBB = cast<VPBasicBlock>(
711 Plan.getEntry()->getSuccessors()[1]->getSingleSuccessor());
712 assert(VPDominatorTree(Plan).dominates(HeaderVPBB,
713 HeaderVPBB->getPredecessors()[1]) &&
714 "header must dominate its latch");
715
716 auto CreateHeaderPhiRecipe = [&](VPPhi *PhiR) -> VPHeaderPHIRecipe * {
717 // TODO: Gradually replace uses of underlying instruction by analyses on
718 // VPlan.
719 auto *Phi = cast<PHINode>(PhiR->getUnderlyingInstr());
720 assert(PhiR->getNumOperands() == 2 &&
721 "Must have 2 operands for header phis");
722
723 // Extract common values once.
724 VPIRValue *Start = cast<VPIRValue>(PhiR->getOperand(0));
725 VPValue *BackedgeValue = PhiR->getOperand(1);
726
727 if (FixedOrderRecurrences.contains(Phi)) {
728 // TODO: Currently fixed-order recurrences are modeled as chains of
729 // first-order recurrences. If there are no users of the intermediate
730 // recurrences in the chain, the fixed order recurrence should be
731 // modeled directly, enabling more efficient codegen.
732 return new VPFirstOrderRecurrencePHIRecipe(Phi, *Start, *BackedgeValue);
733 }
734
735 auto InductionIt = Inductions.find(Phi);
736 if (InductionIt != Inductions.end())
737 return createWidenInductionRecipe(Phi, PhiR, Start, InductionIt->second,
738 Plan, PSE, OrigLoop,
739 PhiR->getDebugLoc());
740
741 assert(Reductions.contains(Phi) && "only reductions are expected now");
742 const RecurrenceDescriptor &RdxDesc = Reductions.lookup(Phi);
744 Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()) &&
745 "incoming value must match start value");
746 // Will be updated later to >1 if reduction is partial.
747 unsigned ScaleFactor = 1;
748 bool UseOrderedReductions = !AllowReordering && RdxDesc.isOrdered();
749 return new VPReductionPHIRecipe(
750 Phi, RdxDesc.getRecurrenceKind(), *Start, *BackedgeValue,
751 getReductionStyle(InLoopReductions.contains(Phi), UseOrderedReductions,
752 ScaleFactor),
753 Phi->getType()->isFloatingPointTy() ? RdxDesc.getFastMathFlags()
754 : VPIRFlags(),
756 };
757
758 for (VPRecipeBase &R : make_early_inc_range(HeaderVPBB->phis())) {
760 continue;
761 auto *PhiR = cast<VPPhi>(&R);
762 VPHeaderPHIRecipe *HeaderPhiR = CreateHeaderPhiRecipe(PhiR);
763 HeaderPhiR->insertBefore(PhiR);
764 PhiR->replaceAllUsesWith(HeaderPhiR);
765 PhiR->eraseFromParent();
766 }
767
768 for (const auto &[HeaderPhiR, ScalarPhiR] :
770 auto *ResumePhiR = cast<VPPhi>(&ScalarPhiR);
771 if (isa<VPFirstOrderRecurrencePHIRecipe>(&HeaderPhiR)) {
772 ResumePhiR->setName("scalar.recur.init");
773 auto *ExtractLastLane = cast<VPInstruction>(ResumePhiR->getOperand(0));
774 ExtractLastLane->setName("vector.recur.extract");
775 continue;
776 }
777 ResumePhiR->setName(isa<VPWidenInductionRecipe>(HeaderPhiR)
778 ? "bc.resume.val"
779 : "bc.merge.rdx");
780 }
781}
782
784 VPlan &Plan, const DenseSet<BasicBlock *> &BlocksNeedingPredication,
785 ElementCount MinVF) {
786 VPTypeAnalysis TypeInfo(Plan);
789
790 for (VPRecipeBase &R : Header->phis()) {
791 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
792 if (!PhiR || !PhiR->isInLoop() || (MinVF.isScalar() && !PhiR->isOrdered()))
793 continue;
794
795 RecurKind Kind = PhiR->getRecurrenceKind();
799 "AnyOf and Find reductions are not allowed for in-loop reductions");
800
801 bool IsFPRecurrence =
803 FastMathFlags FMFs =
804 IsFPRecurrence ? FastMathFlags::getFast() : FastMathFlags();
805
806 // Collect the chain of "link" recipes for the reduction starting at PhiR.
808 Worklist.insert(PhiR);
809 for (unsigned I = 0; I != Worklist.size(); ++I) {
810 VPSingleDefRecipe *Cur = Worklist[I];
811 for (VPUser *U : Cur->users()) {
812 auto *UserRecipe = cast<VPSingleDefRecipe>(U);
813 if (!UserRecipe->getParent()->getEnclosingLoopRegion()) {
814 assert((UserRecipe->getParent() == Plan.getMiddleBlock() ||
815 UserRecipe->getParent() == Plan.getScalarPreheader()) &&
816 "U must be either in the loop region, the middle block or the "
817 "scalar preheader.");
818 continue;
819 }
820
821 // Stores using instructions will be sunk later.
823 continue;
824 Worklist.insert(UserRecipe);
825 }
826 }
827
828 // Visit operation "Links" along the reduction chain top-down starting from
829 // the phi until LoopExitValue. We keep track of the previous item
830 // (PreviousLink) to tell which of the two operands of a Link will remain
831 // scalar and which will be reduced. For minmax by select(cmp), Link will be
832 // the select instructions. Blend recipes of in-loop reduction phi's will
833 // get folded to their non-phi operand, as the reduction recipe handles the
834 // condition directly.
835 VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
836 for (VPSingleDefRecipe *CurrentLink : drop_begin(Worklist)) {
837 if (auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink)) {
838 assert(Blend->getNumIncomingValues() == 2 &&
839 "Blend must have 2 incoming values");
840 unsigned PhiRIdx = Blend->getIncomingValue(0) == PhiR ? 0 : 1;
841 assert(Blend->getIncomingValue(PhiRIdx) == PhiR &&
842 "PhiR must be an operand of the blend");
843 Blend->replaceAllUsesWith(Blend->getIncomingValue(1 - PhiRIdx));
844 continue;
845 }
846
847 if (IsFPRecurrence) {
848 FastMathFlags CurFMF =
849 cast<VPRecipeWithIRFlags>(CurrentLink)->getFastMathFlags();
850 if (match(CurrentLink, m_Select(m_VPValue(), m_VPValue(), m_VPValue())))
851 CurFMF |= cast<VPRecipeWithIRFlags>(CurrentLink->getOperand(0))
852 ->getFastMathFlags();
853 FMFs &= CurFMF;
854 }
855
856 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr();
857
858 // Recognize a call to the llvm.fmuladd intrinsic.
859 bool IsFMulAdd = Kind == RecurKind::FMulAdd;
860 VPValue *VecOp;
861 VPBasicBlock *LinkVPBB = CurrentLink->getParent();
862 if (IsFMulAdd) {
864 "Expected current VPInstruction to be a call to the "
865 "llvm.fmuladd intrinsic");
866 assert(CurrentLink->getOperand(2) == PreviousLink &&
867 "expected a call where the previous link is the added operand");
868
869 // If the instruction is a call to the llvm.fmuladd intrinsic then we
870 // need to create an fmul recipe (multiplying the first two operands of
871 // the fmuladd together) to use as the vector operand for the fadd
872 // reduction.
873 auto *FMulRecipe = new VPInstruction(
874 Instruction::FMul,
875 {CurrentLink->getOperand(0), CurrentLink->getOperand(1)},
876 CurrentLinkI->getFastMathFlags());
877 LinkVPBB->insert(FMulRecipe, CurrentLink->getIterator());
878 VecOp = FMulRecipe;
879 } else if (Kind == RecurKind::AddChainWithSubs &&
880 match(CurrentLink, m_Sub(m_VPValue(), m_VPValue()))) {
881 Type *PhiTy = TypeInfo.inferScalarType(PhiR);
882 auto *Zero = Plan.getConstantInt(PhiTy, 0);
883 VPBuilder Builder(LinkVPBB, CurrentLink->getIterator());
884 auto *Sub = Builder.createSub(Zero, CurrentLink->getOperand(1),
885 CurrentLinkI->getDebugLoc());
886 Sub->setUnderlyingValue(CurrentLinkI);
887 VecOp = Sub;
888 } else {
889 // Index of the first operand which holds a non-mask vector operand.
890 unsigned IndexOfFirstOperand = 0;
892 if (match(CurrentLink, m_Cmp(m_VPValue(), m_VPValue())))
893 continue;
894 assert(match(CurrentLink,
896 "must be a select recipe");
897 IndexOfFirstOperand = 1;
898 }
899 // Note that for non-commutable operands (cmp-selects), the semantics of
900 // the cmp-select are captured in the recurrence kind.
901 unsigned VecOpId =
902 CurrentLink->getOperand(IndexOfFirstOperand) == PreviousLink
903 ? IndexOfFirstOperand + 1
904 : IndexOfFirstOperand;
905 VecOp = CurrentLink->getOperand(VecOpId);
906 assert(
907 VecOp != PreviousLink &&
908 CurrentLink->getOperand(
909 cast<VPInstruction>(CurrentLink)->getNumOperandsWithoutMask() -
910 1 - (VecOpId - IndexOfFirstOperand)) == PreviousLink &&
911 "PreviousLink must be the operand other than VecOp");
912 }
913
914 // Get block mask from CurrentLink, if it needs predication.
915 VPValue *CondOp = nullptr;
916 if (BlocksNeedingPredication.contains(CurrentLinkI->getParent()))
917 CondOp = cast<VPInstruction>(CurrentLink)->getMask();
918
919 assert(PhiR->getVFScaleFactor() == 1 &&
920 "inloop reductions must be unscaled");
921 auto *RedRecipe = new VPReductionRecipe(
922 Kind, FMFs, CurrentLinkI, PreviousLink, VecOp, CondOp,
923 getReductionStyle(/*IsInLoop=*/true, PhiR->isOrdered(), 1),
924 CurrentLinkI->getDebugLoc());
925 // Append the recipe to the end of the VPBasicBlock because we need to
926 // ensure that it comes after all of it's inputs, including CondOp.
927 // Delete CurrentLink as it will be invalid if its operand is replaced
928 // with a reduction defined at the bottom of the block in the next link.
929 if (LinkVPBB->getNumSuccessors() == 0)
930 RedRecipe->insertBefore(&*std::prev(std::prev(LinkVPBB->end())));
931 else
932 LinkVPBB->appendRecipe(RedRecipe);
933
934 CurrentLink->replaceAllUsesWith(RedRecipe);
935 ToDelete.push_back(CurrentLink);
936 PreviousLink = RedRecipe;
937 }
938 }
939
940 for (VPRecipeBase *R : ToDelete)
941 R->eraseFromParent();
942}
943
945 bool HasUncountableEarlyExit) {
946 auto *MiddleVPBB = cast<VPBasicBlock>(
948 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
949 VPBlockBase *HeaderVPB = cast<VPBasicBlock>(LatchVPBB->getSuccessors()[1]);
950
951 if (HasUncountableEarlyExit) {
952 handleUncountableEarlyExits(Plan, cast<VPBasicBlock>(HeaderVPB), LatchVPBB,
953 MiddleVPBB);
954 return;
955 }
956
957 // Disconnect countable early exits from the loop, leaving it with a single
958 // exit from the latch. Countable early exits are left for a scalar epilog.
959 for (VPIRBasicBlock *EB : Plan.getExitBlocks()) {
960 for (VPBlockBase *Pred : to_vector(EB->getPredecessors())) {
961 if (Pred == MiddleVPBB)
962 continue;
963
964 // Remove phi operands for the early exiting block.
965 for (VPRecipeBase &R : EB->phis())
966 cast<VPIRPhi>(&R)->removeIncomingValueFor(Pred);
967 auto *EarlyExitingVPBB = cast<VPBasicBlock>(Pred);
968 EarlyExitingVPBB->getTerminator()->eraseFromParent();
970 }
971 }
972}
973
975 bool RequiresScalarEpilogueCheck,
976 bool TailFolded) {
977 auto *MiddleVPBB = cast<VPBasicBlock>(
979 // If MiddleVPBB has a single successor then the original loop does not exit
980 // via the latch and the single successor must be the scalar preheader.
981 // There's no need to add a runtime check to MiddleVPBB.
982 if (MiddleVPBB->getNumSuccessors() == 1) {
983 assert(MiddleVPBB->getSingleSuccessor() == Plan.getScalarPreheader() &&
984 "must have ScalarPH as single successor");
985 return;
986 }
987
988 assert(MiddleVPBB->getNumSuccessors() == 2 && "must have 2 successors");
989
990 // Add a check in the middle block to see if we have completed all of the
991 // iterations in the first vector loop.
992 //
993 // Three cases:
994 // 1) If we require a scalar epilogue, the scalar ph must execute. Set the
995 // condition to false.
996 // 2) If (N - N%VF) == N, then we *don't* need to run the
997 // remainder. Thus if tail is to be folded, we know we don't need to run
998 // the remainder and we can set the condition to true.
999 // 3) Otherwise, construct a runtime check.
1000
1001 // We use the same DebugLoc as the scalar loop latch terminator instead of
1002 // the corresponding compare because they may have ended up with different
1003 // line numbers and we want to avoid awkward line stepping while debugging.
1004 // E.g., if the compare has got a line number inside the loop.
1005 auto *LatchVPBB = cast<VPBasicBlock>(MiddleVPBB->getSinglePredecessor());
1006 DebugLoc LatchDL = LatchVPBB->getTerminator()->getDebugLoc();
1007 VPBuilder Builder(MiddleVPBB);
1008 VPValue *Cmp;
1009 if (!RequiresScalarEpilogueCheck)
1010 Cmp = Plan.getFalse();
1011 else if (TailFolded)
1012 Cmp = Plan.getTrue();
1013 else
1014 Cmp = Builder.createICmp(CmpInst::ICMP_EQ, Plan.getTripCount(),
1015 &Plan.getVectorTripCount(), LatchDL, "cmp.n");
1016 Builder.createNaryOp(VPInstruction::BranchOnCond, {Cmp}, LatchDL);
1017}
1018
1020 VPDominatorTree VPDT(Plan);
1021 for (VPBlockBase *HeaderVPB : vp_post_order_shallow(Plan.getEntry()))
1022 if (canonicalHeaderAndLatch(HeaderVPB, VPDT))
1023 createLoopRegion(Plan, HeaderVPB);
1024
1025 VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
1026 TopRegion->setName("vector loop");
1027 TopRegion->getEntryBasicBlock()->setName("vector.body");
1028}
1029
1031 assert(Plan.getExitBlocks().size() == 1 &&
1032 "only a single-exit block is supported currently");
1033 assert(Plan.getExitBlocks().front()->getSinglePredecessor() ==
1034 Plan.getMiddleBlock() &&
1035 "the exit block must have middle block as single predecessor");
1036
1037 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1038 assert(LoopRegion->getSingleSuccessor() == Plan.getMiddleBlock() &&
1039 "The vector loop region must have the middle block as its single "
1040 "successor for now");
1041 VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1042
1043 Header->splitAt(Header->getFirstNonPhi());
1044
1045 // Create the header mask, insert it in the header and branch on it.
1046 auto *IV =
1047 new VPWidenCanonicalIVRecipe(Header->getParent()->getCanonicalIV());
1048 VPBuilder Builder(Header, Header->getFirstNonPhi());
1049 Builder.insert(IV);
1051 VPValue *HeaderMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
1052 Builder.createNaryOp(VPInstruction::BranchOnCond, HeaderMask);
1053
1054 VPBasicBlock *OrigLatch = LoopRegion->getExitingBasicBlock();
1055 VPValue *IVInc;
1056 [[maybe_unused]] bool TermBranchOnCount =
1057 match(OrigLatch->getTerminator(),
1059 m_Specific(&Plan.getVectorTripCount())));
1060 assert(TermBranchOnCount &&
1061 match(IVInc, m_Add(m_Specific(LoopRegion->getCanonicalIV()),
1062 m_Specific(&Plan.getVFxUF()))) &&
1063 std::next(IVInc->getDefiningRecipe()->getIterator()) ==
1064 OrigLatch->getTerminator()->getIterator() &&
1065 "Unexpected canonical iv increment");
1066
1067 // Split the latch at the IV update, and branch to it from the header mask.
1068 VPBasicBlock *Latch =
1069 OrigLatch->splitAt(IVInc->getDefiningRecipe()->getIterator());
1070 Latch->setName("vector.latch");
1071 VPBlockUtils::connectBlocks(Header, Latch);
1072
1073 // Collect any values defined in the loop that need a phi. Currently this
1074 // includes header phi backedges and live-outs extracted in the middle block.
1075 // TODO: Handle early exits via Plan.getExitBlocks()
1077 for (VPRecipeBase &R : Header->phis())
1079 NeedsPhi[cast<VPHeaderPHIRecipe>(R).getBackedgeValue()].push_back(&R);
1080
1081 VPValue *V;
1082 for (VPRecipeBase &R : *Plan.getMiddleBlock())
1083 if (match(&R, m_ExtractLastPart(m_VPValue(V))))
1084 NeedsPhi[V].push_back(&R);
1085
1086 // Insert phis with a poison incoming value for past the end of the tail.
1087 Builder.setInsertPoint(Latch, Latch->begin());
1088 VPTypeAnalysis TypeInfo(Plan);
1089 for (const auto &[V, Users] : NeedsPhi) {
1090 if (isa<VPIRValue>(V))
1091 continue;
1092 // TODO: For reduction phis, use phi value instead of poison so we can
1093 // remove the special casing for tail folding in
1094 // LoopVectorizationPlanner::addReductionResultComputation
1095 VPValue *Poison =
1097 VPInstruction *Phi = Builder.createScalarPhi({V, Poison});
1098 for (VPUser *U : Users)
1099 U->replaceUsesOfWith(V, Phi);
1100 }
1101
1102 // Any extract of the last element must be updated to extract from the last
1103 // active lane of the header mask instead (i.e., the lane corresponding to the
1104 // last active iteration).
1105 Builder.setInsertPoint(Plan.getMiddleBlock()->getTerminator());
1106 for (VPRecipeBase &R : *Plan.getMiddleBlock()) {
1107 VPValue *Op;
1109 continue;
1110
1111 // Compute the index of the last active lane.
1112 VPValue *LastActiveLane =
1113 Builder.createNaryOp(VPInstruction::LastActiveLane, HeaderMask);
1114 auto *Ext =
1115 Builder.createNaryOp(VPInstruction::ExtractLane, {LastActiveLane, Op});
1116 R.getVPSingleValue()->replaceAllUsesWith(Ext);
1117 }
1118}
1119
1120/// Insert \p CheckBlockVPBB on the edge leading to the vector preheader,
1121/// connecting it to both vector and scalar preheaders. Updates scalar
1122/// preheader phis to account for the new predecessor.
1124 VPBasicBlock *CheckBlockVPBB) {
1125 VPBlockBase *VectorPH = Plan.getVectorPreheader();
1126 auto *ScalarPH = cast<VPBasicBlock>(Plan.getScalarPreheader());
1127 VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor();
1128 VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckBlockVPBB);
1129 VPBlockUtils::connectBlocks(CheckBlockVPBB, ScalarPH);
1130 CheckBlockVPBB->swapSuccessors();
1131 unsigned NumPreds = ScalarPH->getNumPredecessors();
1132 for (VPRecipeBase &R : ScalarPH->phis()) {
1133 auto *Phi = cast<VPPhi>(&R);
1134 assert(Phi->getNumIncoming() == NumPreds - 1 &&
1135 "must have incoming values for all predecessors");
1136 Phi->addOperand(Phi->getOperand(NumPreds - 2));
1137 }
1138}
1139
1140// Likelyhood of bypassing the vectorized loop due to a runtime check block,
1141// including memory overlap checks block and wrapping/unit-stride checks block.
1142static constexpr uint32_t CheckBypassWeights[] = {1, 127};
1143
1144/// Create a BranchOnCond terminator in \p CheckBlockVPBB. Optionally adds
1145/// branch weights.
1146static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB,
1147 VPValue *Cond, bool AddBranchWeights) {
1149 auto *Term = VPBuilder(CheckBlockVPBB)
1151 if (AddBranchWeights) {
1152 MDBuilder MDB(Plan.getContext());
1153 MDNode *BranchWeights =
1154 MDB.createBranchWeights(CheckBypassWeights, /*IsExpected=*/false);
1155 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1156 }
1157}
1158
1160 BasicBlock *CheckBlock,
1161 bool AddBranchWeights) {
1162 VPValue *CondVPV = Plan.getOrAddLiveIn(Cond);
1163 VPBasicBlock *CheckBlockVPBB = Plan.createVPIRBasicBlock(CheckBlock);
1164 insertCheckBlockBeforeVectorLoop(Plan, CheckBlockVPBB);
1165 addBypassBranch(Plan, CheckBlockVPBB, CondVPV, AddBranchWeights);
1166}
1167
1169 VPlan &Plan, ElementCount VF, unsigned UF,
1170 ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue,
1171 bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights,
1173 // Generate code to check if the loop's trip count is less than VF * UF, or
1174 // equal to it in case a scalar epilogue is required; this implies that the
1175 // vector trip count is zero. This check also covers the case where adding one
1176 // to the backedge-taken count overflowed leading to an incorrect trip count
1177 // of zero. In this case we will also jump to the scalar loop.
1178 CmpInst::Predicate CmpPred =
1179 RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1180 // If tail is to be folded, vector loop takes care of all iterations.
1181 VPValue *TripCountVPV = Plan.getTripCount();
1182 const SCEV *TripCount = vputils::getSCEVExprForVPValue(TripCountVPV, PSE);
1183 Type *TripCountTy = TripCount->getType();
1184 ScalarEvolution &SE = *PSE.getSE();
1185 auto GetMinTripCount = [&]() -> const SCEV * {
1186 // Compute max(MinProfitableTripCount, UF * VF) and return it.
1187 const SCEV *VFxUF =
1188 SE.getElementCount(TripCountTy, (VF * UF), SCEV::FlagNUW);
1189 if (UF * VF.getKnownMinValue() >=
1190 MinProfitableTripCount.getKnownMinValue()) {
1191 // TODO: SCEV should be able to simplify test.
1192 return VFxUF;
1193 }
1194 const SCEV *MinProfitableTripCountSCEV =
1195 SE.getElementCount(TripCountTy, MinProfitableTripCount, SCEV::FlagNUW);
1196 return SE.getUMaxExpr(MinProfitableTripCountSCEV, VFxUF);
1197 };
1198
1199 VPBasicBlock *EntryVPBB = Plan.getEntry();
1200 VPBuilder Builder(EntryVPBB);
1201 VPValue *TripCountCheck = Plan.getFalse();
1202 const SCEV *Step = GetMinTripCount();
1203 // TripCountCheck = false, folding tail implies positive vector trip
1204 // count.
1205 if (!TailFolded) {
1206 // TODO: Emit unconditional branch to vector preheader instead of
1207 // conditional branch with known condition.
1208 TripCount = SE.applyLoopGuards(TripCount, OrigLoop);
1209 // Check if the trip count is < the step.
1210 if (SE.isKnownPredicate(CmpPred, TripCount, Step)) {
1211 // TODO: Ensure step is at most the trip count when determining max VF and
1212 // UF, w/o tail folding.
1213 TripCountCheck = Plan.getTrue();
1214 } else if (!SE.isKnownPredicate(CmpInst::getInversePredicate(CmpPred),
1215 TripCount, Step)) {
1216 // Generate the minimum iteration check only if we cannot prove the
1217 // check is known to be true, or known to be false.
1218 VPValue *MinTripCountVPV = Builder.createExpandSCEV(Step);
1219 TripCountCheck = Builder.createICmp(
1220 CmpPred, TripCountVPV, MinTripCountVPV, DL, "min.iters.check");
1221 } // else step known to be < trip count, use TripCountCheck preset to false.
1222 }
1223 VPInstruction *Term =
1224 Builder.createNaryOp(VPInstruction::BranchOnCond, {TripCountCheck}, DL);
1226 MDBuilder MDB(Plan.getContext());
1227 MDNode *BranchWeights = MDB.createBranchWeights(
1228 ArrayRef(MinItersBypassWeights, 2), /*IsExpected=*/false);
1229 Term->setMetadata(LLVMContext::MD_prof, BranchWeights);
1230 }
1231}
1232
1234 VPlan &Plan, Value *TripCount, Value *VectorTripCount,
1235 bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF,
1236 unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE) {
1237 // Add the minimum iteration check for the epilogue vector loop.
1238 VPValue *TC = Plan.getOrAddLiveIn(TripCount);
1239 VPBuilder Builder(cast<VPBasicBlock>(Plan.getEntry()));
1240 VPValue *VFxUF = Builder.createExpandSCEV(SE.getElementCount(
1241 TripCount->getType(), (EpilogueVF * EpilogueUF), SCEV::FlagNUW));
1242 VPValue *Count = Builder.createSub(TC, Plan.getOrAddLiveIn(VectorTripCount),
1243 DebugLoc::getUnknown(), "n.vec.remaining");
1244
1245 // Generate code to check if the loop's trip count is less than VF * UF of
1246 // the vector epilogue loop.
1247 auto P = RequiresScalarEpilogue ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
1248 auto *CheckMinIters = Builder.createICmp(
1249 P, Count, VFxUF, DebugLoc::getUnknown(), "min.epilog.iters.check");
1250 VPInstruction *Branch =
1251 Builder.createNaryOp(VPInstruction::BranchOnCond, CheckMinIters);
1252
1253 // We assume the remaining `Count` is equally distributed in
1254 // [0, MainLoopStep)
1255 // So the probability for `Count < EpilogueLoopStep` should be
1256 // min(MainLoopStep, EpilogueLoopStep) / MainLoopStep
1257 // TODO: Improve the estimate by taking the estimated trip count into
1258 // consideration.
1259 unsigned EstimatedSkipCount = std::min(MainLoopStep, EpilogueLoopStep);
1260 const uint32_t Weights[] = {EstimatedSkipCount,
1261 MainLoopStep - EstimatedSkipCount};
1262 MDBuilder MDB(Plan.getContext());
1263 MDNode *BranchWeights =
1264 MDB.createBranchWeights(Weights, /*IsExpected=*/false);
1265 Branch->setMetadata(LLVMContext::MD_prof, BranchWeights);
1266}
1267
1268/// Find and return the final select instruction of the FindIV result pattern
1269/// for the given \p BackedgeVal:
1270/// select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),
1271/// ComputeReductionResult(ReducedIV), Start.
1273 return cast<VPInstruction>(
1274 vputils::findRecipe(BackedgeVal, [BackedgeVal](VPRecipeBase *R) {
1275 auto *VPI = dyn_cast<VPInstruction>(R);
1276 return VPI &&
1277 matchFindIVResult(VPI, m_Specific(BackedgeVal), m_VPValue());
1278 }));
1279}
1280
1282 auto GetMinOrMaxCompareValue =
1283 [](VPReductionPHIRecipe *RedPhiR) -> VPValue * {
1284 auto *MinOrMaxR =
1285 dyn_cast_or_null<VPRecipeWithIRFlags>(RedPhiR->getBackedgeValue());
1286 if (!MinOrMaxR)
1287 return nullptr;
1288
1289 // Check that MinOrMaxR is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1290 // with an intrinsic that matches the reduction kind.
1291 Intrinsic::ID ExpectedIntrinsicID =
1292 getMinMaxReductionIntrinsicOp(RedPhiR->getRecurrenceKind());
1293 if (!match(MinOrMaxR, m_Intrinsic(ExpectedIntrinsicID)))
1294 return nullptr;
1295
1296 if (MinOrMaxR->getOperand(0) == RedPhiR)
1297 return MinOrMaxR->getOperand(1);
1298
1299 assert(MinOrMaxR->getOperand(1) == RedPhiR &&
1300 "Reduction phi operand expected");
1301 return MinOrMaxR->getOperand(0);
1302 };
1303
1304 VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1306 MinOrMaxNumReductionsToHandle;
1307 bool HasUnsupportedPhi = false;
1308 for (auto &R : LoopRegion->getEntryBasicBlock()->phis()) {
1310 continue;
1311 auto *Cur = dyn_cast<VPReductionPHIRecipe>(&R);
1312 if (!Cur) {
1313 // TODO: Also support fixed-order recurrence phis.
1314 HasUnsupportedPhi = true;
1315 continue;
1316 }
1318 Cur->getRecurrenceKind())) {
1319 HasUnsupportedPhi = true;
1320 continue;
1321 }
1322
1323 VPValue *MinOrMaxOp = GetMinOrMaxCompareValue(Cur);
1324 if (!MinOrMaxOp)
1325 return false;
1326
1327 MinOrMaxNumReductionsToHandle.emplace_back(Cur, MinOrMaxOp);
1328 }
1329
1330 if (MinOrMaxNumReductionsToHandle.empty())
1331 return true;
1332
1333 // We won't be able to resume execution in the scalar tail, if there are
1334 // unsupported header phis or there is no scalar tail at all, due to
1335 // tail-folding.
1336 if (HasUnsupportedPhi || !Plan.hasScalarTail())
1337 return false;
1338
1339 /// Check if the vector loop of \p Plan can early exit and restart
1340 /// execution of last vector iteration in the scalar loop. This requires all
1341 /// recipes up to early exit point be side-effect free as they are
1342 /// re-executed. Currently we check that the loop is free of any recipe that
1343 /// may write to memory. Expected to operate on an early VPlan w/o nested
1344 /// regions.
1347 auto *VPBB = cast<VPBasicBlock>(VPB);
1348 for (auto &R : *VPBB) {
1349 if (R.mayWriteToMemory() && !match(&R, m_BranchOnCount()))
1350 return false;
1351 }
1352 }
1353
1354 VPBasicBlock *LatchVPBB = LoopRegion->getExitingBasicBlock();
1355 VPBuilder LatchBuilder(LatchVPBB->getTerminator());
1356 VPValue *AllNaNLanes = nullptr;
1357 SmallPtrSet<VPValue *, 2> RdxResults;
1358 for (const auto &[_, MinOrMaxOp] : MinOrMaxNumReductionsToHandle) {
1359 VPValue *RedNaNLanes =
1360 LatchBuilder.createFCmp(CmpInst::FCMP_UNO, MinOrMaxOp, MinOrMaxOp);
1361 AllNaNLanes = AllNaNLanes ? LatchBuilder.createOr(AllNaNLanes, RedNaNLanes)
1362 : RedNaNLanes;
1363 }
1364
1365 VPValue *AnyNaNLane =
1366 LatchBuilder.createNaryOp(VPInstruction::AnyOf, {AllNaNLanes});
1367 VPBasicBlock *MiddleVPBB = Plan.getMiddleBlock();
1368 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->begin());
1369 for (const auto &[RedPhiR, _] : MinOrMaxNumReductionsToHandle) {
1371 RedPhiR->getRecurrenceKind()) &&
1372 "unsupported reduction");
1373
1374 // If we exit early due to NaNs, compute the final reduction result based on
1375 // the reduction phi at the beginning of the last vector iteration.
1376 auto *RdxResult = vputils::findComputeReductionResult(RedPhiR);
1377 assert(RdxResult && "must find a ComputeReductionResult");
1378
1379 auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
1380 RdxResult->getOperand(0));
1381 RdxResult->setOperand(0, NewSel);
1382 assert(!RdxResults.contains(RdxResult) && "RdxResult already used");
1383 RdxResults.insert(RdxResult);
1384 }
1385
1386 auto *LatchExitingBranch = LatchVPBB->getTerminator();
1387 assert(match(LatchExitingBranch, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
1388 "Unexpected terminator");
1389 auto *IsLatchExitTaken = LatchBuilder.createICmp(
1390 CmpInst::ICMP_EQ, LatchExitingBranch->getOperand(0),
1391 LatchExitingBranch->getOperand(1));
1392 auto *AnyExitTaken = LatchBuilder.createOr(AnyNaNLane, IsLatchExitTaken);
1393 LatchBuilder.createNaryOp(VPInstruction::BranchOnCond, AnyExitTaken);
1394 LatchExitingBranch->eraseFromParent();
1395
1396 // Update resume phis for inductions in the scalar preheader. If AnyNaNLane is
1397 // true, the resume from the start of the last vector iteration via the
1398 // canonical IV, otherwise from the original value.
1399 auto IsTC = [&Plan](VPValue *V) {
1400 return V == &Plan.getVectorTripCount() || V == Plan.getTripCount();
1401 };
1402 for (auto &R : Plan.getScalarPreheader()->phis()) {
1403 auto *ResumeR = cast<VPPhi>(&R);
1404 VPValue *VecV = ResumeR->getOperand(0);
1405 if (RdxResults.contains(VecV))
1406 continue;
1407 if (auto *DerivedIV = dyn_cast<VPDerivedIVRecipe>(VecV)) {
1408 VPValue *DIVTC = DerivedIV->getOperand(1);
1409 if (DerivedIV->getNumUsers() == 1 && IsTC(DIVTC)) {
1410 auto *NewSel = MiddleBuilder.createSelect(
1411 AnyNaNLane, LoopRegion->getCanonicalIV(), DIVTC);
1412 DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint());
1413 DerivedIV->setOperand(1, NewSel);
1414 continue;
1415 }
1416 }
1417 // Bail out and abandon the current, partially modified, VPlan if we
1418 // encounter resume phi that cannot be updated yet.
1419 if (!IsTC(VecV)) {
1420 LLVM_DEBUG(dbgs() << "Found resume phi we cannot update for VPlan with "
1421 "FMaxNum/FMinNum reduction.\n");
1422 return false;
1423 }
1424 auto *NewSel = MiddleBuilder.createSelect(
1425 AnyNaNLane, LoopRegion->getCanonicalIV(), VecV);
1426 ResumeR->setOperand(0, NewSel);
1427 }
1428
1429 auto *MiddleTerm = MiddleVPBB->getTerminator();
1430 MiddleBuilder.setInsertPoint(MiddleTerm);
1431 VPValue *MiddleCond = MiddleTerm->getOperand(0);
1432 VPValue *NewCond =
1433 MiddleBuilder.createAnd(MiddleCond, MiddleBuilder.createNot(AnyNaNLane));
1434 MiddleTerm->setOperand(0, NewCond);
1435 return true;
1436}
1437
1439 if (Plan.hasScalarVFOnly())
1440 return false;
1441
1442 // We want to create the following nodes:
1443 // vector.body:
1444 // ...new WidenPHI recipe introduced to keep the mask value for the latest
1445 // iteration where any lane was active.
1446 // mask.phi = phi [ ir<false>, vector.ph ], [ vp<new.mask>, vector.body ]
1447 // ...data.phi (a VPReductionPHIRecipe for a FindLast reduction) already
1448 // exists, but needs updating to use 'new.data' for the backedge value.
1449 // data.phi = phi ir<default.val>, vp<new.data>
1450 //
1451 // ...'data' and 'compare' created by existing nodes...
1452 //
1453 // ...new recipes introduced to determine whether to update the reduction
1454 // values or keep the current one.
1455 // any.active = i1 any-of ir<compare>
1456 // new.mask = select vp<any.active>, ir<compare>, vp<mask.phi>
1457 // new.data = select vp<any.active>, ir<data>, ir<data.phi>
1458 //
1459 // middle.block:
1460 // ...extract-last-active replaces compute-reduction-result.
1461 // result = extract-last-active vp<new.data>, vp<new.mask>, ir<default.val>
1462
1463 VPValue *HeaderMask = vputils::findHeaderMask(Plan);
1464 for (auto &Phi : make_early_inc_range(
1466 auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&Phi);
1468 PhiR->getRecurrenceKind()))
1469 continue;
1470
1471 // Find the condition for the select/blend.
1472 VPValue *BackedgeSelect = PhiR->getBackedgeValue();
1473 VPValue *CondSelect = BackedgeSelect;
1474
1475 // If there's a header mask, the backedge select will not be the find-last
1476 // select.
1477 if (HeaderMask && !match(BackedgeSelect,
1478 m_Select(m_Specific(HeaderMask),
1479 m_VPValue(CondSelect), m_Specific(PhiR))))
1480 llvm_unreachable("expected header mask select");
1481
1482 VPValue *Cond = nullptr, *Op1 = nullptr, *Op2 = nullptr;
1483
1484 // If we're matching a blend rather than a select, there should be one
1485 // incoming value which is the data, then all other incoming values should
1486 // be the phi.
1487 auto MatchBlend = [&](VPRecipeBase *R) {
1488 auto *Blend = dyn_cast<VPBlendRecipe>(R);
1489 if (!Blend)
1490 return false;
1491 assert(!Blend->isNormalized() && "must run before blend normalizaion");
1492 unsigned NumIncomingDataValues = 0;
1493 for (unsigned I = 0; I < Blend->getNumIncomingValues(); ++I) {
1494 VPValue *Incoming = Blend->getIncomingValue(I);
1495 if (Incoming != PhiR) {
1496 ++NumIncomingDataValues;
1497 Cond = Blend->getMask(I);
1498 Op1 = Incoming;
1499 Op2 = PhiR;
1500 }
1501 }
1502 return NumIncomingDataValues == 1;
1503 };
1504
1505 VPSingleDefRecipe *SelectR =
1507 if (!match(SelectR,
1508 m_Select(m_VPValue(Cond), m_VPValue(Op1), m_VPValue(Op2))) &&
1509 !MatchBlend(SelectR))
1510 return false;
1511
1512 assert(Cond != HeaderMask && "Cond must not be HeaderMask");
1513
1514 // Find final reduction computation and replace it with an
1515 // extract.last.active intrinsic.
1516 auto *RdxResult =
1518 BackedgeSelect);
1519 assert(RdxResult && "Could not find reduction result");
1520
1521 // Add mask phi.
1522 VPBuilder Builder = VPBuilder::getToInsertAfter(PhiR);
1523 auto *MaskPHI = new VPWidenPHIRecipe(nullptr, /*Start=*/Plan.getFalse());
1524 Builder.insert(MaskPHI);
1525
1526 // Add select for mask.
1527 Builder.setInsertPoint(SelectR);
1528
1529 if (Op1 == PhiR) {
1530 // Normalize to selecting the data operand when the condition is true by
1531 // swapping operands and negating the condition.
1532 std::swap(Op1, Op2);
1533 Cond = Builder.createNot(Cond);
1534 }
1535 assert(Op2 == PhiR && "data value must be selected if Cond is true");
1536
1537 if (HeaderMask)
1538 Cond = Builder.createLogicalAnd(HeaderMask, Cond);
1539
1540 VPValue *AnyOf = Builder.createNaryOp(VPInstruction::AnyOf, {Cond});
1541 VPValue *MaskSelect = Builder.createSelect(AnyOf, Cond, MaskPHI);
1542 MaskPHI->addOperand(MaskSelect);
1543
1544 // Replace select for data.
1545 VPValue *DataSelect =
1546 Builder.createSelect(AnyOf, Op1, Op2, SelectR->getDebugLoc());
1547 SelectR->replaceAllUsesWith(DataSelect);
1548 PhiR->setBackedgeValue(DataSelect);
1549 SelectR->eraseFromParent();
1550
1551 Builder.setInsertPoint(RdxResult);
1552 auto *ExtractLastActive =
1553 Builder.createNaryOp(VPInstruction::ExtractLastActive,
1554 {PhiR->getStartValue(), DataSelect, MaskSelect},
1555 RdxResult->getDebugLoc());
1556 RdxResult->replaceAllUsesWith(ExtractLastActive);
1557 RdxResult->eraseFromParent();
1558 }
1559
1560 return true;
1561}
1562
1563/// Given a first argmin/argmax pattern with strict predicate consisting of
1564/// 1) a MinOrMax reduction \p MinOrMaxPhiR producing \p MinOrMaxResult,
1565/// 2) a wide induction \p WideIV,
1566/// 3) a FindLastIV reduction \p FindLastIVPhiR using \p WideIV,
1567/// return the smallest index of the FindLastIV reduction result using UMin,
1568/// unless \p MinOrMaxResult equals the start value of its MinOrMax reduction.
1569/// In that case, return the start value of the FindLastIV reduction instead.
1570/// If \p WideIV is not canonical, a new canonical wide IV is added, and the
1571/// final result is scaled back to the non-canonical \p WideIV.
1572/// The final value of the FindLastIV reduction is originally computed using
1573/// \p FindIVSelect, \p FindIVCmp, and \p FindIVRdxResult, which are replaced
1574/// and removed.
1575/// Returns true if the pattern was handled successfully, false otherwise.
1577 VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR,
1578 VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV,
1579 VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect,
1580 VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult) {
1581 assert(!FindLastIVPhiR->isInLoop() && !FindLastIVPhiR->isOrdered() &&
1582 "inloop and ordered reductions not supported");
1583 assert(FindLastIVPhiR->getVFScaleFactor() == 1 &&
1584 "FindIV reduction must not be scaled");
1585
1587 // TODO: Support non (i.e., narrower than) canonical IV types.
1588 // TODO: Emit remarks for failed transformations.
1589 if (Ty != VPTypeAnalysis(Plan).inferScalarType(WideIV))
1590 return false;
1591
1592 auto *FindIVSelectR = cast<VPSingleDefRecipe>(
1593 FindLastIVPhiR->getBackedgeValue()->getDefiningRecipe());
1594 assert(
1595 match(FindIVSelectR, m_Select(m_VPValue(), m_VPValue(), m_VPValue())) &&
1596 "backedge value must be a select");
1597 if (FindIVSelectR->getOperand(1) != WideIV &&
1598 FindIVSelectR->getOperand(2) != WideIV)
1599 return false;
1600
1601 // If the original wide IV is not canonical, create a new one. The canonical
1602 // wide IV is guaranteed to not wrap for all lanes that are active in the
1603 // vector loop.
1604 if (!WideIV->isCanonical()) {
1605 VPIRValue *Zero = Plan.getConstantInt(Ty, 0);
1606 VPIRValue *One = Plan.getConstantInt(Ty, 1);
1607 auto *WidenCanIV = new VPWidenIntOrFpInductionRecipe(
1608 nullptr, Zero, One, WideIV->getVFValue(),
1609 WideIV->getInductionDescriptor(),
1610 VPIRFlags::WrapFlagsTy(/*HasNUW=*/true, /*HasNSW=*/false),
1611 WideIV->getDebugLoc());
1612 WidenCanIV->insertBefore(WideIV);
1613
1614 // Update the select to use the wide canonical IV.
1615 FindIVSelectR->setOperand(FindIVSelectR->getOperand(1) == WideIV ? 1 : 2,
1616 WidenCanIV);
1617 }
1618 FindLastIVPhiR->setOperand(0, Plan.getOrAddLiveIn(PoisonValue::get(Ty)));
1619
1620 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1621 // result:
1622 // 1. Find the first canonical indices corresponding to partial min/max
1623 // values, using loop reductions.
1624 // 2. Find which of the partial min/max values are equal to the overall
1625 // min/max value.
1626 // 3. Select among the canonical indices those corresponding to the overall
1627 // min/max value.
1628 // 4. Find the first canonical index of overall min/max and scale it back to
1629 // the original IV using VPDerivedIVRecipe.
1630 // 5. If the overall min/max equals the starting min/max, the condition in
1631 // the loop was always false, due to being strict; return the start value
1632 // of FindLastIVPhiR in that case.
1633 //
1634 // For example, we transforms two independent reduction result computations
1635 // for
1636 //
1637 // <x1> vector loop: {
1638 // vector.body:
1639 // ...
1640 // ir<%iv> = WIDEN-INDUCTION nuw nsw ir<10>, ir<1>, vp<%0>
1641 // WIDEN-REDUCTION-PHI ir<%min.idx> = phi ir<sentinel.min.start>,
1642 // ir<%min.idx.next>
1643 // WIDEN-REDUCTION-PHI ir<%min.val> = phi ir<100>, ir<%min.val.next>
1644 // ....
1645 // WIDEN-INTRINSIC ir<%min.val.next> = call llvm.umin(ir<%min.val>, ir<%l>)
1646 // WIDEN ir<%min.idx.next> = select ir<%cmp>, ir<%iv>, ir<%min.idx>
1647 // ...
1648 // }
1649 // Successor(s): middle.block
1650 //
1651 // middle.block:
1652 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
1653 // vp<%min.result> = compute-reduction-result (umin) ir<%min.val.next>
1654 // vp<%cmp> = icmp ne vp<%iv.rdx>, ir<sentinel.min.start>
1655 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<10>
1656 //
1657 //
1658 // Into:
1659 //
1660 // vp<%reduced.min> = compute-reduction-result (umin) ir<%min.val.next>
1661 // vp<%reduced.mins.mask> = icmp eq ir<%min.val.next>, vp<%reduced.min>
1662 // vp<%idxs2reduce> = select vp<%reduced.mins.mask>, ir<%min.idx.next>,
1663 // ir<MaxUInt>
1664 // vp<%reduced.idx> = compute-reduction-result (umin) vp<%idxs2reduce>
1665 // vp<%scaled.idx> = DERIVED-IV ir<20> + vp<%reduced.idx> * ir<1>
1666 // vp<%always.false> = icmp eq vp<%reduced.min>, ir<100>
1667 // vp<%final.idx> = select vp<%always.false>, ir<10>,
1668 // vp<%scaled.idx>
1669
1670 VPBuilder Builder(FindIVRdxResult);
1671 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
1672 auto *FinalMinOrMaxCmp =
1673 Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
1674 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
1675 VPValue *MaxIV =
1676 Plan.getConstantInt(APInt::getMaxValue(Ty->getIntegerBitWidth()));
1677 auto *FinalIVSelect =
1678 Builder.createSelect(FinalMinOrMaxCmp, LastIVExiting, MaxIV);
1679 VPIRFlags RdxFlags(RecurKind::UMin, false, false, FastMathFlags());
1680 VPSingleDefRecipe *FinalCanIV = Builder.createNaryOp(
1681 VPInstruction::ComputeReductionResult, {FinalIVSelect}, RdxFlags,
1682 FindIVRdxResult->getDebugLoc());
1683
1684 // If we used a new wide canonical IV convert the reduction result back to the
1685 // original IV scale before the final select.
1686 if (!WideIV->isCanonical()) {
1687 auto *DerivedIVRecipe =
1689 nullptr, // No FPBinOp for integer induction
1690 WideIV->getStartValue(), FinalCanIV,
1691 WideIV->getStepValue(), "derived.iv.result");
1692 DerivedIVRecipe->insertBefore(&*Builder.getInsertPoint());
1693 FinalCanIV = DerivedIVRecipe;
1694 }
1695
1696 // If the final min/max value matches its start value, the condition in the
1697 // loop was always false, i.e. no induction value has been selected. If that's
1698 // the case, set the result of the IV reduction to its start value.
1699 VPValue *AlwaysFalse = Builder.createICmp(CmpInst::ICMP_EQ, MinOrMaxResult,
1700 MinOrMaxPhiR->getStartValue());
1701 VPValue *FinalIV = Builder.createSelect(
1702 AlwaysFalse, FindIVSelect->getOperand(2), FinalCanIV);
1703 FindIVSelect->replaceAllUsesWith(FinalIV);
1704
1705 // Erase the old FindIV result pattern which is now dead.
1706 FindIVSelect->eraseFromParent();
1707 FindIVCmp->eraseFromParent();
1708 FindIVRdxResult->eraseFromParent();
1709 return true;
1710}
1711
1714 Loop *TheLoop) {
1715 for (auto &PhiR : make_early_inc_range(
1717 auto *MinOrMaxPhiR = dyn_cast<VPReductionPHIRecipe>(&PhiR);
1718 // TODO: check for multi-uses in VPlan directly.
1719 if (!MinOrMaxPhiR || !MinOrMaxPhiR->hasUsesOutsideReductionChain())
1720 continue;
1721
1722 // MinOrMaxPhiR has users outside the reduction cycle in the loop. Check if
1723 // the only other user is a FindLastIV reduction. MinOrMaxPhiR must have
1724 // exactly 2 users:
1725 // 1) the min/max operation of the reduction cycle, and
1726 // 2) the compare of a FindLastIV reduction cycle. This compare must match
1727 // the min/max operation - comparing MinOrMaxPhiR with the operand of the
1728 // min/max operation, and be used only by the select of the FindLastIV
1729 // reduction cycle.
1730 RecurKind RdxKind = MinOrMaxPhiR->getRecurrenceKind();
1731 assert(
1733 "only min/max recurrences support users outside the reduction chain");
1734
1735 auto *MinOrMaxOp =
1736 dyn_cast<VPRecipeWithIRFlags>(MinOrMaxPhiR->getBackedgeValue());
1737 if (!MinOrMaxOp)
1738 return false;
1739
1740 // Check that MinOrMaxOp is a VPWidenIntrinsicRecipe or VPReplicateRecipe
1741 // with an intrinsic that matches the reduction kind.
1742 Intrinsic::ID ExpectedIntrinsicID = getMinMaxReductionIntrinsicOp(RdxKind);
1743 if (!match(MinOrMaxOp, m_Intrinsic(ExpectedIntrinsicID)))
1744 return false;
1745
1746 // MinOrMaxOp must have 2 users: 1) MinOrMaxPhiR and 2)
1747 // ComputeReductionResult.
1748 assert(MinOrMaxOp->getNumUsers() == 2 &&
1749 "MinOrMaxOp must have exactly 2 users");
1750 VPValue *MinOrMaxOpValue = MinOrMaxOp->getOperand(0);
1751 if (MinOrMaxOpValue == MinOrMaxPhiR)
1752 MinOrMaxOpValue = MinOrMaxOp->getOperand(1);
1753
1754 VPValue *CmpOpA;
1755 VPValue *CmpOpB;
1756 CmpPredicate Pred;
1758 MinOrMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
1759 if (!Cmp || Cmp->getNumUsers() != 1 ||
1760 (CmpOpA != MinOrMaxOpValue && CmpOpB != MinOrMaxOpValue))
1761 return false;
1762
1763 if (MinOrMaxOpValue != CmpOpB)
1764 Pred = CmpInst::getSwappedPredicate(Pred);
1765
1766 // MinOrMaxPhiR must have exactly 2 users:
1767 // * MinOrMaxOp,
1768 // * Cmp (that's part of a FindLastIV chain).
1769 if (MinOrMaxPhiR->getNumUsers() != 2)
1770 return false;
1771
1772 VPInstruction *MinOrMaxResult =
1774 assert(is_contained(MinOrMaxPhiR->users(), MinOrMaxOp) &&
1775 "one user must be MinOrMaxOp");
1776 assert(MinOrMaxResult && "MinOrMaxResult must be a user of MinOrMaxOp");
1777
1778 // Cmp must be used by the select of a FindLastIV chain.
1779 VPValue *Sel = dyn_cast<VPSingleDefRecipe>(Cmp->getSingleUser());
1780 VPValue *IVOp, *FindIV;
1781 if (!Sel || Sel->getNumUsers() != 2 ||
1782 !match(Sel,
1784 return false;
1785
1787 std::swap(FindIV, IVOp);
1788 Pred = CmpInst::getInversePredicate(Pred);
1789 }
1790
1791 auto *FindIVPhiR = dyn_cast<VPReductionPHIRecipe>(FindIV);
1793 FindIVPhiR->getRecurrenceKind()))
1794 return false;
1795
1796 assert(!FindIVPhiR->isInLoop() && !FindIVPhiR->isOrdered() &&
1797 "cannot handle inloop/ordered reductions yet");
1798
1799 // Check if FindIVPhiR is a FindLast pattern by checking the MinMaxKind
1800 // on its ComputeReductionResult. SMax/UMax indicates FindLast.
1801 VPInstruction *FindIVResult =
1803 FindIVPhiR->getBackedgeValue());
1804 assert(FindIVResult &&
1805 "must be able to retrieve the FindIVResult VPInstruction");
1806 RecurKind FindIVMinMaxKind = FindIVResult->getRecurKind();
1807 if (FindIVMinMaxKind != RecurKind::SMax &&
1808 FindIVMinMaxKind != RecurKind::UMax)
1809 return false;
1810
1811 // TODO: Support cases where IVOp is the IV increment.
1812 if (!match(IVOp, m_TruncOrSelf(m_VPValue(IVOp))) ||
1814 return false;
1815
1816 // Check if the predicate is compatible with the reduction kind.
1817 bool IsValidKindPred = [RdxKind, Pred]() {
1818 switch (RdxKind) {
1819 case RecurKind::UMin:
1820 return Pred == CmpInst::ICMP_UGE || Pred == CmpInst::ICMP_UGT;
1821 case RecurKind::UMax:
1822 return Pred == CmpInst::ICMP_ULE || Pred == CmpInst::ICMP_ULT;
1823 case RecurKind::SMax:
1824 return Pred == CmpInst::ICMP_SLE || Pred == CmpInst::ICMP_SLT;
1825 case RecurKind::SMin:
1826 return Pred == CmpInst::ICMP_SGE || Pred == CmpInst::ICMP_SGT;
1827 default:
1828 llvm_unreachable("unhandled recurrence kind");
1829 }
1830 }();
1831 if (!IsValidKindPred) {
1832 ORE->emit([&]() {
1834 DEBUG_TYPE, "VectorizationMultiUseReductionPredicate",
1835 TheLoop->getStartLoc(), TheLoop->getHeader())
1836 << "Multi-use reduction with predicate "
1838 << " incompatible with reduction kind";
1839 });
1840 return false;
1841 }
1842
1843 auto *FindIVSelect = findFindIVSelect(FindIVPhiR->getBackedgeValue());
1844 auto *FindIVCmp = FindIVSelect->getOperand(0)->getDefiningRecipe();
1845 auto *FindIVRdxResult = cast<VPInstruction>(FindIVCmp->getOperand(0));
1846 assert(FindIVSelect->getParent() == MinOrMaxResult->getParent() &&
1847 "both results must be computed in the same block");
1848 // Reducing to a scalar min or max value is placed right before reducing to
1849 // its scalar iteration, in order to generate instructions that use both
1850 // their operands.
1851 MinOrMaxResult->moveBefore(*FindIVRdxResult->getParent(),
1852 FindIVRdxResult->getIterator());
1853
1854 bool IsStrictPredicate = ICmpInst::isLT(Pred) || ICmpInst::isGT(Pred);
1855 if (IsStrictPredicate) {
1856 if (!handleFirstArgMinOrMax(Plan, MinOrMaxPhiR, FindIVPhiR,
1858 MinOrMaxResult, FindIVSelect, FindIVCmp,
1859 FindIVRdxResult))
1860 return false;
1861 continue;
1862 }
1863
1864 // The reduction using MinOrMaxPhiR needs adjusting to compute the correct
1865 // result:
1866 // 1. We need to find the last IV for which the condition based on the
1867 // min/max recurrence is true,
1868 // 2. Compare the partial min/max reduction result to its final value and,
1869 // 3. Select the lanes of the partial FindLastIV reductions which
1870 // correspond to the lanes matching the min/max reduction result.
1871 //
1872 // For example, this transforms
1873 // vp<%min.result> = compute-reduction-result ir<%min.val.next>
1874 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%min.idx.next>
1875 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
1876 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
1877 //
1878 // into:
1879 //
1880 // vp<min.result> = compute-reduction-result ir<%min.val.next>
1881 // vp<%final.min.cmp> = icmp eq ir<%min.val.next>, vp<min.result>
1882 // vp<%final.iv> = select vp<%final.min.cmp>, vp<%min.idx.next>, SENTINEL
1883 // vp<%iv.rdx> = compute-reduction-result (smax) vp<%final.iv>
1884 // vp<%cmp> = icmp ne vp<%iv.rdx>, SENTINEL
1885 // vp<%find.iv.result> = select vp<%cmp>, vp<%iv.rdx>, ir<0>
1886 //
1887 VPBuilder B(FindIVRdxResult);
1888 VPValue *MinOrMaxExiting = MinOrMaxResult->getOperand(0);
1889 auto *FinalMinOrMaxCmp =
1890 B.createICmp(CmpInst::ICMP_EQ, MinOrMaxExiting, MinOrMaxResult);
1891 VPValue *Sentinel = FindIVCmp->getOperand(1);
1892 VPValue *LastIVExiting = FindIVRdxResult->getOperand(0);
1893 auto *FinalIVSelect =
1894 B.createSelect(FinalMinOrMaxCmp, LastIVExiting, Sentinel);
1895 FindIVRdxResult->setOperand(0, FinalIVSelect);
1896 }
1897 return true;
1898}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define DEBUG_TYPE
#define _
iv Induction Variable Users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file provides a LoopVectorizationPlanner class.
static constexpr uint32_t MinItersBypassWeights[]
#define I(x, y, z)
Definition MD5.cpp:57
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB)
Create a new VPRegionBlock for the loop starting at HeaderVPB.
static bool isHeaderBB(BasicBlock *BB, Loop *L)
static bool handleFirstArgMinOrMax(VPlan &Plan, VPReductionPHIRecipe *MinOrMaxPhiR, VPReductionPHIRecipe *FindLastIVPhiR, VPWidenIntOrFpInductionRecipe *WideIV, VPInstruction *MinOrMaxResult, VPInstruction *FindIVSelect, VPRecipeBase *FindIVCmp, VPInstruction *FindIVRdxResult)
Given a first argmin/argmax pattern with strict predicate consisting of 1) a MinOrMax reduction MinOr...
static VPHeaderPHIRecipe * createWidenInductionRecipe(PHINode *Phi, VPPhi *PhiR, VPIRValue *Start, const InductionDescriptor &IndDesc, VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, DebugLoc DL)
Creates a VPWidenIntOrFpInductionRecipe or VPWidenPointerInductionRecipe for Phi based on IndDesc.
static void insertCheckBlockBeforeVectorLoop(VPlan &Plan, VPBasicBlock *CheckBlockVPBB)
Insert CheckBlockVPBB on the edge leading to the vector preheader, connecting it to both vector and s...
static void simplifyLiveInsWithSCEV(VPlan &Plan, PredicatedScalarEvolution &PSE)
Check Plan's live-in and replace them with constants, if they can be simplified via SCEV.
static void addInitialSkeleton(VPlan &Plan, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, Loop *TheLoop)
static void addBypassBranch(VPlan &Plan, VPBasicBlock *CheckBlockVPBB, VPValue *Cond, bool AddBranchWeights)
Create a BranchOnCond terminator in CheckBlockVPBB.
static void addCanonicalIVRecipes(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, Type *IdxTy, DebugLoc DL)
static bool canonicalHeaderAndLatch(VPBlockBase *HeaderVPB, const VPDominatorTree &VPDT)
Checks if HeaderVPB is a loop header block in the plain CFG; that is, it has exactly 2 predecessors (...
static VPInstruction * findFindIVSelect(VPValue *BackedgeVal)
Find and return the final select instruction of the FindIV result pattern for the given BackedgeVal: ...
static constexpr uint32_t CheckBypassWeights[]
static void printAfterInitialConstruction(VPlan &)
To make RUN_VPLAN_PASS print initial VPlan.
static auto getMatchingPhisForScalarLoop(VPBasicBlock *Header, VPBasicBlock *ScalarHeader)
Return an iterator range to iterate over pairs of matching phi nodes in Header and ScalarHeader,...
static void createExtractsForLiveOuts(VPlan &Plan, VPBasicBlock *MiddleVPBB)
Creates extracts for values in Plan defined in a loop region and used outside a loop region.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
This file provides utility VPlan to VPlan transformations.
#define RUN_VPLAN_PASS_NO_VERIFY(PASS,...)
This file contains the declarations of the Vectorization Plan base classes:
static const uint32_t IV[8]
Definition blake3_impl.h:83
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI iterator_range< filter_iterator< BasicBlock::const_iterator, std::function< bool(const Instruction &)> > > instructionsWithoutDebug(bool SkipPseudoOp=true) const
Return a const iterator range over the instructions in the block, skipping any debug instructions.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI StringRef getPredicateName(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
A debug info location.
Definition DebugLoc.h:123
static DebugLoc getUnknown()
Definition DebugLoc.h:161
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
static FastMathFlags getFast()
Definition FMF.h:53
static bool isLT(Predicate P)
Return true if the predicate is SLT or ULT.
static bool isGT(Predicate P)
Return true if the predicate is SGT or UGT.
A struct for saving information about induction variables.
InductionKind getKind() const
const SCEV * getStep() const
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
Value * getStartValue() const
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class emits a version of the loop where run-time checks ensure that may-alias pointers can't ove...
std::pair< MDNode *, MDNode * > getNoAliasMetadataFor(const Instruction *OrigInst) const
Returns a pair containing the alias_scope and noalias metadata nodes for OrigInst,...
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition LoopInfo.cpp:654
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1080
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
iterator find(const KeyT &Key)
Definition MapVector.h:154
The optimization diagnostic interface.
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Diagnostic information for missed-optimization remarks.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSymbolicMaxBackedgeTakenCount()
Get the (predicated) symbolic max backedge count for the analyzed loop.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
FastMathFlags getFastMathFlags() const
static bool isFPMinMaxNumRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point minnum/maxnum kind.
bool hasUsesOutsideReductionChain() const
Returns true if the reduction PHI has any uses outside the reduction chain.
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
TrackingVH< Value > getRecurrenceStartValue() const
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RecurKind getRecurrenceKind() const
bool isOrdered() const
Expose an ordered FP reduction to the instance users.
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getTripCountFromExitCount(const SCEV *ExitCount)
A version of getTripCountFromExitCount below which always picks an evaluation type which can not resu...
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
LLVM_ABI const SCEV * applyLoopGuards(const SCEV *Expr, const Loop *L)
Try to apply information from loop guards for L to Expr.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
op_range operands()
Definition User.h:267
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4236
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4311
iterator end()
Definition VPlan.h:4273
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4271
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4324
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:232
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
Definition VPlan.cpp:565
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
Definition VPlan.cpp:644
const VPRecipeBase & back() const
Definition VPlan.h:4285
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4302
bool empty() const
Definition VPlan.h:4282
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:82
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:301
VPRegionBlock * getParent()
Definition VPlan.h:174
const VPBasicBlock * getExitingBasicBlock() const
Definition VPlan.cpp:202
void setName(const Twine &newName)
Definition VPlan.h:167
size_t getNumSuccessors() const
Definition VPlan.h:220
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:323
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:292
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:205
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:283
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:216
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:315
const VPBasicBlock * getEntryBasicBlock() const
Definition VPlan.cpp:182
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:272
void setParent(VPRegionBlock *P)
Definition VPlan.h:185
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:210
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:199
static void insertBlockAfter(VPBlockBase *NewBlock, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBase NewBlock after BlockPtr.
Definition VPlanUtils.h:170
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
Definition VPlanUtils.h:290
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:221
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
Definition VPlanUtils.h:239
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
Definition VPlanUtils.h:259
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPBasicBlock::iterator getInsertPoint() const
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPInstruction * createFCmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new FCmp VPInstruction with predicate Pred and operands A and B.
VPInstructionWithType * createScalarLoad(Type *ResultTy, VPValue *Addr, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
Definition VPlan.h:3798
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Definition VPlan.h:3968
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2273
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2315
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2304
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4389
Class to record and manage LLVM IR flags.
Definition VPlan.h:672
RecurKind getRecurKind() const
Definition VPlan.h:1021
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1193
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1304
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1295
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1311
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:388
VPBasicBlock * getParent()
Definition VPlan.h:463
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:537
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
A recipe for handling reduction phis.
Definition VPlan.h:2667
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2728
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2707
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2731
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3030
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4424
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4535
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
Definition VPlan.h:4522
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Definition VPlan.h:589
An analysis for type-inference for VPValues.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:258
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:302
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:297
void addOperand(VPValue *Operand)
Definition VPlanValue.h:291
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:46
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:127
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:172
void replaceAllUsesWith(VPValue *New)
Definition VPlan.cpp:1408
unsigned getNumUsers() const
Definition VPlanValue.h:104
user_range users()
Definition VPlanValue.h:125
A Recipe for widening the canonical induction variable of the vector loop.
Definition VPlan.h:3931
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2370
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2390
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
Definition VPlan.h:2421
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2468
bool isCanonical() const
Returns true if the induction is canonical, i.e.
A recipe for widened phis.
Definition VPlan.h:2557
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4554
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:4858
LLVMContext & getContext() const
Definition VPlan.h:4745
VPBasicBlock * getEntry()
Definition VPlan.h:4646
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4743
VPValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4736
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4704
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4725
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:4829
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:4861
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4694
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4733
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:4806
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1038
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4711
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4671
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:4884
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1257
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:4826
VPRegionBlock * createLoopRegion(const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with Name and entry and exiting blocks set to Entry and Exiting respectively...
Definition VPlan.h:4894
bool hasScalarVFOnly() const
Definition VPlan.h:4774
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4685
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4690
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4651
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
Definition VPlan.h:4938
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:4840
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
bool matchFindIVResult(VPInstruction *VPI, Op0_t ReducedIV, Op1_t Start)
Match FindIV result pattern: select(icmp ne ComputeReductionResult(ReducedIV), Sentinel),...
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPIRFlags getFlagsFromIndDesc(const InductionDescriptor &ID)
Extracts and returns NoWrap and FastMath flags from the induction binop in ID.
Definition VPlanUtils.h:94
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
Definition VPlanUtils.h:111
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
Definition VPlanUtils.h:132
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:841
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2654
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
Definition VPlanCFG.h:253
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
Definition VPlanCFG.h:266
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A recipe for handling first-order recurrence phis.
Definition VPlan.h:2605
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:183
static LLVM_ABI_FOR_TEST std::unique_ptr< VPlan > buildVPlan0(Loop *TheLoop, LoopInfo &LI, Type *InductionTy, DebugLoc IVDL, PredicatedScalarEvolution &PSE, LoopVersioning *LVer=nullptr)
Create a base VPlan0, serving as the common starting point for all later candidates.
static void createInLoopReductionRecipes(VPlan &Plan, const DenseSet< BasicBlock * > &BlocksNeedingPredication, ElementCount MinVF)
Create VPReductionRecipes for in-loop reductions.
static void foldTailByMasking(VPlan &Plan)
Adapts the vector loop region for tail folding by introducing a header mask and conditionally executi...
static LLVM_ABI_FOR_TEST void handleEarlyExits(VPlan &Plan, bool HasUncountableExit)
Update Plan to account for all early exits.
static bool handleMultiUseReductions(VPlan &Plan, OptimizationRemarkEmitter *ORE, Loop *TheLoop)
Try to legalize reductions with multiple in-loop uses.
static bool handleFindLastReductions(VPlan &Plan)
Check if Plan contains any FindLast reductions.
static void handleUncountableEarlyExits(VPlan &Plan, VPBasicBlock *HeaderVPBB, VPBasicBlock *LatchVPBB, VPBasicBlock *MiddleVPBB)
Update Plan to account for uncountable early exits by introducing appropriate branching logic in the ...
static void createHeaderPhiRecipes(VPlan &Plan, PredicatedScalarEvolution &PSE, Loop &OrigLoop, const MapVector< PHINode *, InductionDescriptor > &Inductions, const MapVector< PHINode *, RecurrenceDescriptor > &Reductions, const SmallPtrSetImpl< const PHINode * > &FixedOrderRecurrences, const SmallPtrSetImpl< PHINode * > &InLoopReductions, bool AllowReordering)
Replace VPPhi recipes in Plan's header with corresponding VPHeaderPHIRecipe subclasses for inductions...
static bool handleMaxMinNumReductions(VPlan &Plan)
Check if Plan contains any FMaxNum or FMinNum reductions.
static LLVM_ABI_FOR_TEST void createLoopRegions(VPlan &Plan)
Replace loops in Plan's flat CFG with VPRegionBlocks, turning Plan's flat CFG into a hierarchical CFG...
static void attachCheckBlock(VPlan &Plan, Value *Cond, BasicBlock *CheckBlock, bool AddBranchWeights)
Wrap runtime check block CheckBlock in a VPIRBB and Cond in a VPValue and connect the block to Plan,...
static void addMinimumIterationCheck(VPlan &Plan, ElementCount VF, unsigned UF, ElementCount MinProfitableTripCount, bool RequiresScalarEpilogue, bool TailFolded, Loop *OrigLoop, const uint32_t *MinItersBypassWeights, DebugLoc DL, PredicatedScalarEvolution &PSE)
static void addMinimumVectorEpilogueIterationCheck(VPlan &Plan, Value *TripCount, Value *VectorTripCount, bool RequiresScalarEpilogue, ElementCount EpilogueVF, unsigned EpilogueUF, unsigned MainLoopStep, unsigned EpilogueLoopStep, ScalarEvolution &SE)
Add a check to Plan to see if the epilogue vector loop should be executed.
static LLVM_ABI_FOR_TEST void addMiddleCheck(VPlan &Plan, bool RequiresScalarEpilogueCheck, bool TailFolded)
If a check is needed to guard executing the scalar epilogue loop, it will be added to the middle bloc...