LLVM 23.0.0git
VPlan.h
Go to the documentation of this file.
1//===- VPlan.h - Represent A Vectorizer Plan --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file contains the declarations of the Vectorization Plan base classes:
11/// 1. VPBasicBlock and VPRegionBlock that inherit from a common pure virtual
12/// VPBlockBase, together implementing a Hierarchical CFG;
13/// 2. Pure virtual VPRecipeBase serving as the base class for recipes contained
14/// within VPBasicBlocks;
15/// 3. Pure virtual VPSingleDefRecipe serving as a base class for recipes that
16/// also inherit from VPValue.
17/// 4. VPInstruction, a concrete Recipe and VPUser modeling a single planned
18/// instruction;
19/// 5. The VPlan class holding a candidate for vectorization;
20/// These are documented in docs/VectorizationPlan.rst.
21//
22//===----------------------------------------------------------------------===//
23
24#ifndef LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
25#define LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
26
27#include "VPlanValue.h"
28#include "llvm/ADT/Bitfields.h"
29#include "llvm/ADT/MapVector.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/ADT/ilist.h"
34#include "llvm/ADT/ilist_node.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/FMF.h"
40#include "llvm/IR/Operator.h"
43#include <cassert>
44#include <cstddef>
45#include <functional>
46#include <string>
47#include <utility>
48#include <variant>
49
50namespace llvm {
51
52class BasicBlock;
53class DominatorTree;
55class IRBuilderBase;
56struct VPTransformState;
57class raw_ostream;
59class SCEV;
60class SCEVPredicate;
61class Type;
62class VPBasicBlock;
63class VPBuilder;
64class VPDominatorTree;
65class VPRegionBlock;
66class VPlan;
67class VPLane;
69class Value;
71
72struct VPCostContext;
73
74using VPlanPtr = std::unique_ptr<VPlan>;
75
76/// \enum UncountableExitStyle
77/// Different methods of handling early exits.
78///
81 /// No side effects to worry about, so we can process any uncountable exits
82 /// in the loop and branch either to the middle block if the trip count was
83 /// reached, or an early exitblock to determine which exit was taken.
85 /// All memory operations other than the load(s) required to determine whether
86 /// an uncountable exit occurre will be masked based on that condition. If an
87 /// uncountable exit is taken, then all lanes before the exiting lane will
88 /// complete, leaving just the final lane to execute in the scalar tail.
90};
91
92/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
93/// A VPBlockBase can be either a VPBasicBlock or a VPRegionBlock.
95 friend class VPBlockUtils;
96
97 const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
98
99 /// An optional name for the block.
100 std::string Name;
101
102 /// The immediate VPRegionBlock which this VPBlockBase belongs to, or null if
103 /// it is a topmost VPBlockBase.
104 VPRegionBlock *Parent = nullptr;
105
106 /// List of predecessor blocks.
108
109 /// List of successor blocks.
111
112 /// VPlan containing the block. Can only be set on the entry block of the
113 /// plan.
114 VPlan *Plan = nullptr;
115
116 /// Add \p Successor as the last successor to this block.
117 void appendSuccessor(VPBlockBase *Successor) {
118 assert(Successor && "Cannot add nullptr successor!");
119 Successors.push_back(Successor);
120 }
121
122 /// Add \p Predecessor as the last predecessor to this block.
123 void appendPredecessor(VPBlockBase *Predecessor) {
124 assert(Predecessor && "Cannot add nullptr predecessor!");
125 Predecessors.push_back(Predecessor);
126 }
127
128 /// Remove \p Predecessor from the predecessors of this block.
129 void removePredecessor(VPBlockBase *Predecessor) {
130 auto Pos = find(Predecessors, Predecessor);
131 assert(Pos && "Predecessor does not exist");
132 Predecessors.erase(Pos);
133 }
134
135 /// Remove \p Successor from the successors of this block.
136 void removeSuccessor(VPBlockBase *Successor) {
137 auto Pos = find(Successors, Successor);
138 assert(Pos && "Successor does not exist");
139 Successors.erase(Pos);
140 }
141
142 /// This function replaces one predecessor with another, useful when
143 /// trying to replace an old block in the CFG with a new one.
144 void replacePredecessor(VPBlockBase *Old, VPBlockBase *New) {
145 auto I = find(Predecessors, Old);
146 assert(I != Predecessors.end());
147 assert(Old->getParent() == New->getParent() &&
148 "replaced predecessor must have the same parent");
149 *I = New;
150 }
151
152 /// This function replaces one successor with another, useful when
153 /// trying to replace an old block in the CFG with a new one.
154 void replaceSuccessor(VPBlockBase *Old, VPBlockBase *New) {
155 auto I = find(Successors, Old);
156 assert(I != Successors.end());
157 assert(Old->getParent() == New->getParent() &&
158 "replaced successor must have the same parent");
159 *I = New;
160 }
161
162protected:
163 VPBlockBase(const unsigned char SC, const std::string &N)
164 : SubclassID(SC), Name(N) {}
165
166public:
167 /// An enumeration for keeping track of the concrete subclass of VPBlockBase
168 /// that are actually instantiated. Values of this enumeration are kept in the
169 /// SubclassID field of the VPBlockBase objects. They are used for concrete
170 /// type identification.
171 using VPBlockTy = enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC };
172
174
175 virtual ~VPBlockBase() = default;
176
177 const std::string &getName() const { return Name; }
178
179 void setName(const Twine &newName) { Name = newName.str(); }
180
181 /// \return an ID for the concrete type of this object.
182 /// This is used to implement the classof checks. This should not be used
183 /// for any other purpose, as the values may change as LLVM evolves.
184 unsigned getVPBlockID() const { return SubclassID; }
185
186 VPRegionBlock *getParent() { return Parent; }
187 const VPRegionBlock *getParent() const { return Parent; }
188
189 /// \return A pointer to the plan containing the current block.
190 VPlan *getPlan();
191 const VPlan *getPlan() const;
192
193 /// Sets the pointer of the plan containing the block. The block must be the
194 /// entry block into the VPlan.
195 void setPlan(VPlan *ParentPlan);
196
197 void setParent(VPRegionBlock *P) { Parent = P; }
198
199 /// \return the VPBasicBlock that is the entry of this VPBlockBase,
200 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
201 /// VPBlockBase is a VPBasicBlock, it is returned.
202 const VPBasicBlock *getEntryBasicBlock() const;
203 VPBasicBlock *getEntryBasicBlock();
204
205 /// \return the VPBasicBlock that is the exiting this VPBlockBase,
206 /// recursively, if the latter is a VPRegionBlock. Otherwise, if this
207 /// VPBlockBase is a VPBasicBlock, it is returned.
208 const VPBasicBlock *getExitingBasicBlock() const;
209 VPBasicBlock *getExitingBasicBlock();
210
211 const VPBlocksTy &getSuccessors() const { return Successors; }
212 VPBlocksTy &getSuccessors() { return Successors; }
213
214 /// Returns true if this block has any successors.
215 bool hasSuccessors() const { return !Successors.empty(); }
216 /// Returns true if this block has any predecessors.
217 bool hasPredecessors() const { return !Predecessors.empty(); }
218
221
222 const VPBlocksTy &getPredecessors() const { return Predecessors; }
223 VPBlocksTy &getPredecessors() { return Predecessors; }
224
225 /// \return the successor of this VPBlockBase if it has a single successor.
226 /// Otherwise return a null pointer.
228 return (Successors.size() == 1 ? *Successors.begin() : nullptr);
229 }
230
231 /// \return the predecessor of this VPBlockBase if it has a single
232 /// predecessor. Otherwise return a null pointer.
234 return (Predecessors.size() == 1 ? *Predecessors.begin() : nullptr);
235 }
236
237 size_t getNumSuccessors() const { return Successors.size(); }
238 size_t getNumPredecessors() const { return Predecessors.size(); }
239
240 /// An Enclosing Block of a block B is any block containing B, including B
241 /// itself. \return the closest enclosing block starting from "this", which
242 /// has successors. \return the root enclosing block if all enclosing blocks
243 /// have no successors.
244 VPBlockBase *getEnclosingBlockWithSuccessors();
245
246 /// \return the closest enclosing block starting from "this", which has
247 /// predecessors. \return the root enclosing block if all enclosing blocks
248 /// have no predecessors.
249 VPBlockBase *getEnclosingBlockWithPredecessors();
250
251 /// \return the successors either attached directly to this VPBlockBase or, if
252 /// this VPBlockBase is the exit block of a VPRegionBlock and has no
253 /// successors of its own, search recursively for the first enclosing
254 /// VPRegionBlock that has successors and return them. If no such
255 /// VPRegionBlock exists, return the (empty) successors of the topmost
256 /// VPBlockBase reached.
258 return getEnclosingBlockWithSuccessors()->getSuccessors();
259 }
260
261 /// \return the hierarchical successor of this VPBlockBase if it has a single
262 /// hierarchical successor. Otherwise return a null pointer.
264 return getEnclosingBlockWithSuccessors()->getSingleSuccessor();
265 }
266
267 /// \return the predecessors either attached directly to this VPBlockBase or,
268 /// if this VPBlockBase is the entry block of a VPRegionBlock and has no
269 /// predecessors of its own, search recursively for the first enclosing
270 /// VPRegionBlock that has predecessors and return them. If no such
271 /// VPRegionBlock exists, return the (empty) predecessors of the topmost
272 /// VPBlockBase reached.
274 return getEnclosingBlockWithPredecessors()->getPredecessors();
275 }
276
277 /// \return the hierarchical predecessor of this VPBlockBase if it has a
278 /// single hierarchical predecessor. Otherwise return a null pointer.
282
283 /// Set a given VPBlockBase \p Successor as the single successor of this
284 /// VPBlockBase. This VPBlockBase is not added as predecessor of \p Successor.
285 /// This VPBlockBase must have no successors.
287 assert(Successors.empty() && "Setting one successor when others exist.");
288 assert(Successor->getParent() == getParent() &&
289 "connected blocks must have the same parent");
290 appendSuccessor(Successor);
291 }
292
293 /// Set two given VPBlockBases \p IfTrue and \p IfFalse to be the two
294 /// successors of this VPBlockBase. This VPBlockBase is not added as
295 /// predecessor of \p IfTrue or \p IfFalse. This VPBlockBase must have no
296 /// successors.
297 void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse) {
298 assert(Successors.empty() && "Setting two successors when others exist.");
299 appendSuccessor(IfTrue);
300 appendSuccessor(IfFalse);
301 }
302
303 /// Set each VPBasicBlock in \p NewPreds as predecessor of this VPBlockBase.
304 /// This VPBlockBase must have no predecessors. This VPBlockBase is not added
305 /// as successor of any VPBasicBlock in \p NewPreds.
307 assert(Predecessors.empty() && "Block predecessors already set.");
308 for (auto *Pred : NewPreds)
309 appendPredecessor(Pred);
310 }
311
312 /// Set each VPBasicBlock in \p NewSuccss as successor of this VPBlockBase.
313 /// This VPBlockBase must have no successors. This VPBlockBase is not added
314 /// as predecessor of any VPBasicBlock in \p NewSuccs.
316 assert(Successors.empty() && "Block successors already set.");
317 for (auto *Succ : NewSuccs)
318 appendSuccessor(Succ);
319 }
320
321 /// Remove all the predecessor of this block.
322 void clearPredecessors() { Predecessors.clear(); }
323
324 /// Remove all the successors of this block.
325 void clearSuccessors() { Successors.clear(); }
326
327 /// Swap predecessors of the block. The block must have exactly 2
328 /// predecessors.
330 assert(Predecessors.size() == 2 && "must have 2 predecessors to swap");
331 std::swap(Predecessors[0], Predecessors[1]);
332 }
333
334 /// Swap successors of the block. The block must have exactly 2 successors.
335 // TODO: This should be part of introducing conditional branch recipes rather
336 // than being independent.
338 assert(Successors.size() == 2 && "must have 2 successors to swap");
339 std::swap(Successors[0], Successors[1]);
340 }
341
342 /// Returns the index for \p Pred in the blocks predecessors list.
343 unsigned getIndexForPredecessor(const VPBlockBase *Pred) const {
344 assert(count(Predecessors, Pred) == 1 &&
345 "must have Pred exactly once in Predecessors");
346 return std::distance(Predecessors.begin(), find(Predecessors, Pred));
347 }
348
349 /// Returns the index for \p Succ in the blocks successor list.
350 unsigned getIndexForSuccessor(const VPBlockBase *Succ) const {
351 assert(count(Successors, Succ) == 1 &&
352 "must have Succ exactly once in Successors");
353 return std::distance(Successors.begin(), find(Successors, Succ));
354 }
355
356 /// The method which generates the output IR that correspond to this
357 /// VPBlockBase, thereby "executing" the VPlan.
358 virtual void execute(VPTransformState *State) = 0;
359
360 /// Return the cost of the block.
362
363#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
364 void printAsOperand(raw_ostream &OS, bool PrintType = false) const {
365 OS << getName();
366 }
367
368 /// Print plain-text dump of this VPBlockBase to \p O, prefixing all lines
369 /// with \p Indent. \p SlotTracker is used to print unnamed VPValue's using
370 /// consequtive numbers.
371 ///
372 /// Note that the numbering is applied to the whole VPlan, so printing
373 /// individual blocks is consistent with the whole VPlan printing.
374 virtual void print(raw_ostream &O, const Twine &Indent,
375 VPSlotTracker &SlotTracker) const = 0;
376
377 /// Print plain-text dump of this VPlan to \p O.
378 void print(raw_ostream &O) const;
379
380 /// Print the successors of this block to \p O, prefixing all lines with \p
381 /// Indent.
382 void printSuccessors(raw_ostream &O, const Twine &Indent) const;
383
384 /// Dump this VPBlockBase to dbgs().
385 LLVM_DUMP_METHOD void dump() const { print(dbgs()); }
386#endif
387
388 /// Clone the current block and it's recipes without updating the operands of
389 /// the cloned recipes, including all blocks in the single-entry single-exit
390 /// region for VPRegionBlocks.
391 virtual VPBlockBase *clone() = 0;
392};
393
394/// VPRecipeBase is a base class modeling a sequence of one or more output IR
395/// instructions. VPRecipeBase owns the VPValues it defines through VPDef
396/// and is responsible for deleting its defined values. Single-value
397/// recipes must inherit from VPSingleDef instead of inheriting from both
398/// VPRecipeBase and VPValue separately.
400 : public ilist_node_with_parent<VPRecipeBase, VPBasicBlock>,
401 public VPDef,
402 public VPUser {
403 friend VPBasicBlock;
404 friend class VPBlockUtils;
405
406 /// Subclass identifier (for isa/dyn_cast).
407 const unsigned char SubclassID;
408
409 /// Each VPRecipe belongs to a single VPBasicBlock.
410 VPBasicBlock *Parent = nullptr;
411
412 /// The debug location for the recipe.
413 DebugLoc DL;
414
415public:
416 /// An enumeration for keeping track of the concrete subclass of VPRecipeBase
417 /// that is actually instantiated. Values of this enumeration are kept in the
418 /// SubclassID field of the VPRecipeBase objects. They are used for concrete
419 /// type identification.
420 using VPRecipeTy = enum {
421 VPBranchOnMaskSC,
422 VPDerivedIVSC,
423 VPExpandSCEVSC,
424 VPExpressionSC,
425 VPIRInstructionSC,
426 VPInstructionSC,
427 VPInterleaveEVLSC,
428 VPInterleaveSC,
429 VPReductionEVLSC,
430 VPReductionSC,
431 VPReplicateSC,
432 VPScalarIVStepsSC,
433 VPVectorPointerSC,
434 VPVectorEndPointerSC,
435 VPWidenCallSC,
436 VPWidenCanonicalIVSC,
437 VPWidenCastSC,
438 VPWidenGEPSC,
439 VPWidenIntrinsicSC,
440 VPWidenMemIntrinsicSC,
441 VPWidenLoadEVLSC,
442 VPWidenLoadSC,
443 VPWidenStoreEVLSC,
444 VPWidenStoreSC,
445 VPWidenSC,
446 VPBlendSC,
447 VPHistogramSC,
448 // START: Phi-like recipes. Need to be kept together.
449 VPWidenPHISC,
450 VPPredInstPHISC,
451 // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
452 // VPHeaderPHIRecipe need to be kept together.
453 VPCurrentIterationPHISC,
454 VPActiveLaneMaskPHISC,
455 VPFirstOrderRecurrencePHISC,
456 VPWidenIntOrFpInductionSC,
457 VPWidenPointerInductionSC,
458 VPReductionPHISC,
459 // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
460 // END: Phi-like recipes
461 VPFirstPHISC = VPWidenPHISC,
462 VPFirstHeaderPHISC = VPCurrentIterationPHISC,
463 VPLastHeaderPHISC = VPReductionPHISC,
464 VPLastPHISC = VPReductionPHISC,
465 };
466
467 VPRecipeBase(const unsigned char SC, ArrayRef<VPValue *> Operands,
469 : VPDef(), VPUser(Operands), SubclassID(SC), DL(DL) {}
470
471 ~VPRecipeBase() override = default;
472
473 /// Clone the current recipe.
474 virtual VPRecipeBase *clone() = 0;
475
476 /// \return the VPBasicBlock which this VPRecipe belongs to.
477 VPBasicBlock *getParent() { return Parent; }
478 const VPBasicBlock *getParent() const { return Parent; }
479
480 /// \return the VPRegionBlock which the recipe belongs to.
481 VPRegionBlock *getRegion();
482 const VPRegionBlock *getRegion() const;
483
484 /// The method which generates the output IR instructions that correspond to
485 /// this VPRecipe, thereby "executing" the VPlan.
486 virtual void execute(VPTransformState &State) = 0;
487
488 /// Return the cost of this recipe, taking into account if the cost
489 /// computation should be skipped and the ForceTargetInstructionCost flag.
490 /// Also takes care of printing the cost for debugging.
492
493 /// Insert an unlinked recipe into a basic block immediately before
494 /// the specified recipe.
495 void insertBefore(VPRecipeBase *InsertPos);
496 /// Insert an unlinked recipe into \p BB immediately before the insertion
497 /// point \p IP;
498 void insertBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator IP);
499
500 /// Insert an unlinked Recipe into a basic block immediately after
501 /// the specified Recipe.
502 void insertAfter(VPRecipeBase *InsertPos);
503
504 /// Unlink this recipe from its current VPBasicBlock and insert it into
505 /// the VPBasicBlock that MovePos lives in, right after MovePos.
506 void moveAfter(VPRecipeBase *MovePos);
507
508 /// Unlink this recipe and insert into BB before I.
509 ///
510 /// \pre I is a valid iterator into BB.
511 void moveBefore(VPBasicBlock &BB, iplist<VPRecipeBase>::iterator I);
512
513 /// This method unlinks 'this' from the containing basic block, but does not
514 /// delete it.
515 void removeFromParent();
516
517 /// This method unlinks 'this' from the containing basic block and deletes it.
518 ///
519 /// \returns an iterator pointing to the element after the erased one
521
522 /// \return an ID for the concrete type of this object.
523 unsigned getVPRecipeID() const { return SubclassID; }
524
525 /// Method to support type inquiry through isa, cast, and dyn_cast.
526 static inline bool classof(const VPDef *D) {
527 // All VPDefs are also VPRecipeBases.
528 return true;
529 }
530
531 static inline bool classof(const VPUser *U) { return true; }
532
533 /// Returns true if the recipe may have side-effects.
534 bool mayHaveSideEffects() const;
535
536 /// Return true if we can safely execute this recipe unconditionally even if
537 /// it is masked originally.
538 bool isSafeToSpeculativelyExecute() const;
539
540 /// Returns true for PHI-like recipes.
541 bool isPhi() const;
542
543 /// Returns true if the recipe may read from memory.
544 bool mayReadFromMemory() const;
545
546 /// Returns true if the recipe may write to memory.
547 bool mayWriteToMemory() const;
548
549 /// Returns true if the recipe may read from or write to memory.
550 bool mayReadOrWriteMemory() const {
552 }
553
554 /// Returns the debug location of the recipe.
555 DebugLoc getDebugLoc() const { return DL; }
556
557 /// Set the recipe's debug location to \p NewDL.
558 void setDebugLoc(DebugLoc NewDL) { DL = NewDL; }
559
560#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
561 /// Dump the recipe to stderr (for debugging).
562 LLVM_ABI_FOR_TEST void dump() const;
563
564 /// Print the recipe, delegating to printRecipe().
565 void print(raw_ostream &O, const Twine &Indent,
567#endif
568
569protected:
570 /// Compute the cost of this recipe either using a recipe's specialized
571 /// implementation or using the legacy cost model and the underlying
572 /// instructions.
573 virtual InstructionCost computeCost(ElementCount VF,
574 VPCostContext &Ctx) const;
575
576#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
577 /// Each concrete VPRecipe prints itself, without printing common information,
578 /// like debug info or metadata.
579 virtual void printRecipe(raw_ostream &O, const Twine &Indent,
580 VPSlotTracker &SlotTracker) const = 0;
581#endif
582};
583
584// Helper macro to define common classof implementations for recipes.
585#define VP_CLASSOF_IMPL(VPRecipeID) \
586 static inline bool classof(const VPRecipeBase *R) { \
587 return R->getVPRecipeID() == VPRecipeID; \
588 } \
589 static inline bool classof(const VPValue *V) { \
590 auto *R = V->getDefiningRecipe(); \
591 return R && R->getVPRecipeID() == VPRecipeID; \
592 } \
593 static inline bool classof(const VPUser *U) { \
594 auto *R = dyn_cast<VPRecipeBase>(U); \
595 return R && R->getVPRecipeID() == VPRecipeID; \
596 } \
597 static inline bool classof(const VPSingleDefRecipe *R) { \
598 return R->getVPRecipeID() == VPRecipeID; \
599 }
600
601/// Compute the scalar result type for an IR \p Opcode given \p Operands.
602LLVM_ABI Type *computeScalarTypeForInstruction(unsigned Opcode,
603 ArrayRef<VPValue *> Operands);
604
605/// VPSingleDefRecipe is a base class for recipes that model a sequence of one
606/// or more output IR that define a single result VPValue. Note that
607/// VPSingleDefRecipe must inherit from VPRecipeBase before VPSingleDefValue.
609public:
610 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
612 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this) {}
613
614 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
616 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV) {}
617
618 VPSingleDefRecipe(const unsigned char SC, ArrayRef<VPValue *> Operands,
619 Type *ResultTy, Value *UV = nullptr,
621 : VPRecipeBase(SC, Operands, DL), VPSingleDefValue(this, UV, ResultTy) {}
622
623 static inline bool classof(const VPRecipeBase *R) {
624 switch (R->getVPRecipeID()) {
625 case VPRecipeBase::VPDerivedIVSC:
626 case VPRecipeBase::VPExpandSCEVSC:
627 case VPRecipeBase::VPExpressionSC:
628 case VPRecipeBase::VPInstructionSC:
629 case VPRecipeBase::VPReductionEVLSC:
630 case VPRecipeBase::VPReductionSC:
631 case VPRecipeBase::VPReplicateSC:
632 case VPRecipeBase::VPScalarIVStepsSC:
633 case VPRecipeBase::VPVectorPointerSC:
634 case VPRecipeBase::VPVectorEndPointerSC:
635 case VPRecipeBase::VPWidenCallSC:
636 case VPRecipeBase::VPWidenCanonicalIVSC:
637 case VPRecipeBase::VPWidenCastSC:
638 case VPRecipeBase::VPWidenGEPSC:
639 case VPRecipeBase::VPWidenIntrinsicSC:
640 case VPRecipeBase::VPWidenMemIntrinsicSC:
641 case VPRecipeBase::VPWidenSC:
642 case VPRecipeBase::VPBlendSC:
643 case VPRecipeBase::VPPredInstPHISC:
644 case VPRecipeBase::VPCurrentIterationPHISC:
645 case VPRecipeBase::VPActiveLaneMaskPHISC:
646 case VPRecipeBase::VPFirstOrderRecurrencePHISC:
647 case VPRecipeBase::VPWidenPHISC:
648 case VPRecipeBase::VPWidenIntOrFpInductionSC:
649 case VPRecipeBase::VPWidenPointerInductionSC:
650 case VPRecipeBase::VPReductionPHISC:
651 case VPRecipeBase::VPWidenLoadEVLSC:
652 case VPRecipeBase::VPWidenLoadSC:
653 return true;
654 case VPRecipeBase::VPBranchOnMaskSC:
655 case VPRecipeBase::VPInterleaveEVLSC:
656 case VPRecipeBase::VPInterleaveSC:
657 case VPRecipeBase::VPIRInstructionSC:
658 case VPRecipeBase::VPWidenStoreEVLSC:
659 case VPRecipeBase::VPWidenStoreSC:
660 case VPRecipeBase::VPHistogramSC:
661 return false;
662 }
663 llvm_unreachable("Unhandled VPRecipeID");
664 }
665
666 static inline bool classof(const VPValue *V) {
667 auto *R = V->getDefiningRecipe();
668 return R && classof(R);
669 }
670
671 static inline bool classof(const VPUser *U) {
672 auto *R = dyn_cast<VPRecipeBase>(U);
673 return R && classof(R);
674 }
675
676 VPSingleDefRecipe *clone() override = 0;
677
678 /// Returns the underlying instruction.
685
686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
687 /// Print this VPSingleDefRecipe to dbgs() (for debugging).
689#endif
690};
691
692/// Class to record and manage LLVM IR flags.
695 enum class OperationType : unsigned char {
696 Cmp,
697 FCmp,
698 OverflowingBinOp,
699 Trunc,
700 DisjointOp,
701 PossiblyExactOp,
702 GEPOp,
703 FPMathOp,
704 NonNegOp,
705 ReductionOp,
706 Other
707 };
708
709public:
710 struct WrapFlagsTy {
711 char HasNUW : 1;
712 char HasNSW : 1;
713
715 };
716
718 char HasNUW : 1;
719 char HasNSW : 1;
720
722 };
723
728
730 char NonNeg : 1;
731 NonNegFlagsTy(bool IsNonNeg) : NonNeg(IsNonNeg) {}
732 };
733
734private:
735 struct ExactFlagsTy {
736 char IsExact : 1;
737 ExactFlagsTy(bool Exact) : IsExact(Exact) {}
738 };
739 struct FastMathFlagsTy {
740 char AllowReassoc : 1;
741 char NoNaNs : 1;
742 char NoInfs : 1;
743 char NoSignedZeros : 1;
744 char AllowReciprocal : 1;
745 char AllowContract : 1;
746 char ApproxFunc : 1;
747
748 LLVM_ABI_FOR_TEST FastMathFlagsTy(const FastMathFlags &FMF);
749 };
750 /// Holds both the predicate and fast-math flags for floating-point
751 /// comparisons.
752 struct FCmpFlagsTy {
753 uint8_t CmpPredStorage;
754 FastMathFlagsTy FMFs;
755 };
756 /// Holds reduction-specific flags: RecurKind, IsOrdered, IsInLoop, and FMFs.
757 struct ReductionFlagsTy {
758 // RecurKind has ~26 values, needs 5 bits but uses 6 bits to account for
759 // additional kinds.
760 unsigned char Kind : 6;
761 // TODO: Derive order/in-loop from plan and remove here.
762 unsigned char IsOrdered : 1;
763 unsigned char IsInLoop : 1;
764 FastMathFlagsTy FMFs;
765
766 ReductionFlagsTy(RecurKind Kind, bool IsOrdered, bool IsInLoop,
767 FastMathFlags FMFs)
768 : Kind(static_cast<unsigned char>(Kind)), IsOrdered(IsOrdered),
769 IsInLoop(IsInLoop), FMFs(FMFs) {}
770 };
771
772 OperationType OpType;
773
774 union {
779 ExactFlagsTy ExactFlags;
782 FastMathFlagsTy FMFs;
783 FCmpFlagsTy FCmpFlags;
784 ReductionFlagsTy ReductionFlags;
786 };
787
788public:
789 VPIRFlags() : OpType(OperationType::Other), AllFlags() {}
790
792 if (auto *FCmp = dyn_cast<FCmpInst>(&I)) {
793 OpType = OperationType::FCmp;
795 FCmp->getPredicate());
796 assert(getPredicate() == FCmp->getPredicate() && "predicate truncated");
797 FCmpFlags.FMFs = FCmp->getFastMathFlags();
798 } else if (auto *Op = dyn_cast<CmpInst>(&I)) {
799 OpType = OperationType::Cmp;
801 Op->getPredicate());
802 assert(getPredicate() == Op->getPredicate() && "predicate truncated");
803 } else if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I)) {
804 OpType = OperationType::DisjointOp;
805 DisjointFlags.IsDisjoint = Op->isDisjoint();
806 } else if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
807 OpType = OperationType::OverflowingBinOp;
808 WrapFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
809 } else if (auto *Op = dyn_cast<TruncInst>(&I)) {
810 OpType = OperationType::Trunc;
811 TruncFlags = {Op->hasNoUnsignedWrap(), Op->hasNoSignedWrap()};
812 } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
813 OpType = OperationType::PossiblyExactOp;
814 ExactFlags.IsExact = Op->isExact();
815 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
816 OpType = OperationType::GEPOp;
817 GEPFlagsStorage = GEP->getNoWrapFlags().getRaw();
818 assert(getGEPNoWrapFlags() == GEP->getNoWrapFlags() &&
819 "wrap flags truncated");
820 } else if (auto *PNNI = dyn_cast<PossiblyNonNegInst>(&I)) {
821 OpType = OperationType::NonNegOp;
822 NonNegFlags.NonNeg = PNNI->hasNonNeg();
823 } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
824 OpType = OperationType::FPMathOp;
825 FMFs = Op->getFastMathFlags();
826 }
827 }
828
829 VPIRFlags(CmpInst::Predicate Pred) : OpType(OperationType::Cmp), AllFlags() {
831 assert(getPredicate() == Pred && "predicate truncated");
832 }
833
835 : OpType(OperationType::FCmp), AllFlags() {
837 assert(getPredicate() == Pred && "predicate truncated");
838 FCmpFlags.FMFs = FMFs;
839 }
840
842 : OpType(OperationType::OverflowingBinOp), AllFlags() {
843 this->WrapFlags = WrapFlags;
844 }
845
847 : OpType(OperationType::Trunc), AllFlags() {
848 this->TruncFlags = TruncFlags;
849 }
850
851 VPIRFlags(FastMathFlags FMFs) : OpType(OperationType::FPMathOp), AllFlags() {
852 this->FMFs = FMFs;
853 }
854
856 : OpType(OperationType::DisjointOp), AllFlags() {
857 this->DisjointFlags = DisjointFlags;
858 }
859
861 : OpType(OperationType::NonNegOp), AllFlags() {
862 this->NonNegFlags = NonNegFlags;
863 }
864
865 VPIRFlags(ExactFlagsTy ExactFlags)
866 : OpType(OperationType::PossiblyExactOp), AllFlags() {
867 this->ExactFlags = ExactFlags;
868 }
869
871 : OpType(OperationType::GEPOp), AllFlags() {
872 GEPFlagsStorage = GEPFlags.getRaw();
873 }
874
875 VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
876 : OpType(OperationType::ReductionOp), AllFlags() {
877 ReductionFlags = ReductionFlagsTy(Kind, IsOrdered, IsInLoop, FMFs);
878 }
879
881 OpType = Other.OpType;
882 AllFlags[0] = Other.AllFlags[0];
883 AllFlags[1] = Other.AllFlags[1];
884 }
885
886 /// Only keep flags also present in \p Other. \p Other must have the same
887 /// OpType as the current object.
888 void intersectFlags(const VPIRFlags &Other);
889
890 /// Drop all poison-generating flags.
892 // NOTE: This needs to be kept in-sync with
893 // Instruction::dropPoisonGeneratingFlags.
894 switch (OpType) {
895 case OperationType::OverflowingBinOp:
896 WrapFlags.HasNUW = false;
897 WrapFlags.HasNSW = false;
898 break;
899 case OperationType::Trunc:
900 TruncFlags.HasNUW = false;
901 TruncFlags.HasNSW = false;
902 break;
903 case OperationType::DisjointOp:
904 DisjointFlags.IsDisjoint = false;
905 break;
906 case OperationType::PossiblyExactOp:
907 ExactFlags.IsExact = false;
908 break;
909 case OperationType::GEPOp:
910 GEPFlagsStorage = 0;
911 break;
912 case OperationType::FPMathOp:
913 case OperationType::FCmp:
914 case OperationType::ReductionOp:
915 getFMFsRef().NoNaNs = false;
916 getFMFsRef().NoInfs = false;
917 break;
918 case OperationType::NonNegOp:
919 NonNegFlags.NonNeg = false;
920 break;
921 case OperationType::Cmp:
922 case OperationType::Other:
923 break;
924 }
925 }
926
927 /// Apply the IR flags to \p I.
928 void applyFlags(Instruction &I) const {
929 switch (OpType) {
930 case OperationType::OverflowingBinOp:
931 I.setHasNoUnsignedWrap(WrapFlags.HasNUW);
932 I.setHasNoSignedWrap(WrapFlags.HasNSW);
933 break;
934 case OperationType::Trunc:
935 I.setHasNoUnsignedWrap(TruncFlags.HasNUW);
936 I.setHasNoSignedWrap(TruncFlags.HasNSW);
937 break;
938 case OperationType::DisjointOp:
939 cast<PossiblyDisjointInst>(&I)->setIsDisjoint(DisjointFlags.IsDisjoint);
940 break;
941 case OperationType::PossiblyExactOp:
942 I.setIsExact(ExactFlags.IsExact);
943 break;
944 case OperationType::GEPOp:
945 cast<GetElementPtrInst>(&I)->setNoWrapFlags(
947 break;
948 case OperationType::FPMathOp:
949 case OperationType::FCmp: {
950 const FastMathFlagsTy &F = getFMFsRef();
951 I.setHasAllowReassoc(F.AllowReassoc);
952 I.setHasNoNaNs(F.NoNaNs);
953 I.setHasNoInfs(F.NoInfs);
954 I.setHasNoSignedZeros(F.NoSignedZeros);
955 I.setHasAllowReciprocal(F.AllowReciprocal);
956 I.setHasAllowContract(F.AllowContract);
957 I.setHasApproxFunc(F.ApproxFunc);
958 break;
959 }
960 case OperationType::NonNegOp:
961 I.setNonNeg(NonNegFlags.NonNeg);
962 break;
963 case OperationType::ReductionOp:
964 llvm_unreachable("reduction ops should not use applyFlags");
965 case OperationType::Cmp:
966 case OperationType::Other:
967 break;
968 }
969 }
970
972 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
973 "recipe doesn't have a compare predicate");
974 uint8_t Storage = OpType == OperationType::FCmp ? FCmpFlags.CmpPredStorage
977 }
978
980 assert((OpType == OperationType::Cmp || OpType == OperationType::FCmp) &&
981 "recipe doesn't have a compare predicate");
982 if (OpType == OperationType::FCmp)
984 else
986 assert(getPredicate() == Pred && "predicate truncated");
987 }
988
992
993 /// Returns true if the recipe has a comparison predicate.
994 bool hasPredicate() const {
995 return OpType == OperationType::Cmp || OpType == OperationType::FCmp;
996 }
997
998 /// Returns true if the recipe has fast-math flags.
999 bool hasFastMathFlags() const {
1000 return OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||
1001 OpType == OperationType::ReductionOp;
1002 }
1003
1005
1006 /// Returns true if the recipe has non-negative flag.
1007 bool hasNonNegFlag() const { return OpType == OperationType::NonNegOp; }
1008
1009 bool isNonNeg() const {
1010 assert(OpType == OperationType::NonNegOp &&
1011 "recipe doesn't have a NNEG flag");
1012 return NonNegFlags.NonNeg;
1013 }
1014
1015 bool hasNoUnsignedWrap() const {
1016 switch (OpType) {
1017 case OperationType::OverflowingBinOp:
1018 return WrapFlags.HasNUW;
1019 case OperationType::Trunc:
1020 return TruncFlags.HasNUW;
1021 default:
1022 llvm_unreachable("recipe doesn't have a NUW flag");
1023 }
1024 }
1025
1026 bool hasNoSignedWrap() const {
1027 switch (OpType) {
1028 case OperationType::OverflowingBinOp:
1029 return WrapFlags.HasNSW;
1030 case OperationType::Trunc:
1031 return TruncFlags.HasNSW;
1032 default:
1033 llvm_unreachable("recipe doesn't have a NSW flag");
1034 }
1035 }
1036
1037 bool hasNoWrapFlags() const {
1038 switch (OpType) {
1039 case OperationType::OverflowingBinOp:
1040 case OperationType::Trunc:
1041 return true;
1042 default:
1043 return false;
1044 }
1045 }
1046
1048 return {hasNoUnsignedWrap(), hasNoSignedWrap()};
1049 }
1050
1051 bool isDisjoint() const {
1052 assert(OpType == OperationType::DisjointOp &&
1053 "recipe cannot have a disjoing flag");
1054 return DisjointFlags.IsDisjoint;
1055 }
1056
1058 assert(OpType == OperationType::ReductionOp &&
1059 "recipe doesn't have reduction flags");
1060 return static_cast<RecurKind>(ReductionFlags.Kind);
1061 }
1062
1063 bool isReductionOrdered() const {
1064 assert(OpType == OperationType::ReductionOp &&
1065 "recipe doesn't have reduction flags");
1066 return ReductionFlags.IsOrdered;
1067 }
1068
1069 bool isReductionInLoop() const {
1070 assert(OpType == OperationType::ReductionOp &&
1071 "recipe doesn't have reduction flags");
1072 return ReductionFlags.IsInLoop;
1073 }
1074
1075private:
1076 /// Get a reference to the fast-math flags for FPMathOp, FCmp or ReductionOp.
1077 FastMathFlagsTy &getFMFsRef() {
1078 if (OpType == OperationType::FCmp)
1079 return FCmpFlags.FMFs;
1080 if (OpType == OperationType::ReductionOp)
1081 return ReductionFlags.FMFs;
1082 return FMFs;
1083 }
1084 const FastMathFlagsTy &getFMFsRef() const {
1085 if (OpType == OperationType::FCmp)
1086 return FCmpFlags.FMFs;
1087 if (OpType == OperationType::ReductionOp)
1088 return ReductionFlags.FMFs;
1089 return FMFs;
1090 }
1091
1092public:
1093 /// Returns default flags for \p Opcode for opcodes that support it, asserts
1094 /// otherwise. Opcodes not supporting default flags include compares and
1095 /// ComputeReductionResult.
1096 static VPIRFlags getDefaultFlags(unsigned Opcode);
1097
1098#if !defined(NDEBUG)
1099 /// Returns true if the set flags are valid for \p Opcode.
1100 LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const;
1101
1102 /// Returns true if \p Opcode has its required flags set.
1103 LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const;
1104#endif
1105
1106#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1107 void printFlags(raw_ostream &O) const;
1108#endif
1109};
1111
1112static_assert(sizeof(VPIRFlags) <= 3, "VPIRFlags should not grow");
1113
1114/// A pure-virtual common base class for recipes defining a single VPValue and
1115/// using IR flags.
1117 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1118 const VPIRFlags &Flags,
1120 : VPSingleDefRecipe(SC, Operands, DL), VPIRFlags(Flags) {}
1121
1122 VPRecipeWithIRFlags(const unsigned char SC, ArrayRef<VPValue *> Operands,
1123 Type *ResultTy, const VPIRFlags &Flags,
1125 : VPSingleDefRecipe(SC, Operands, ResultTy, /*UV=*/nullptr, DL),
1126 VPIRFlags(Flags) {}
1127
1128 static inline bool classof(const VPRecipeBase *R) {
1129 return R->getVPRecipeID() == VPRecipeBase::VPBlendSC ||
1130 R->getVPRecipeID() == VPRecipeBase::VPInstructionSC ||
1131 R->getVPRecipeID() == VPRecipeBase::VPWidenSC ||
1132 R->getVPRecipeID() == VPRecipeBase::VPWidenGEPSC ||
1133 R->getVPRecipeID() == VPRecipeBase::VPWidenCallSC ||
1134 R->getVPRecipeID() == VPRecipeBase::VPWidenCastSC ||
1135 R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1136 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC ||
1137 R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
1138 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC ||
1139 R->getVPRecipeID() == VPRecipeBase::VPReplicateSC ||
1140 R->getVPRecipeID() == VPRecipeBase::VPVectorEndPointerSC ||
1141 R->getVPRecipeID() == VPRecipeBase::VPVectorPointerSC ||
1142 R->getVPRecipeID() == VPRecipeBase::VPWidenCanonicalIVSC;
1143 }
1144
1145 static inline bool classof(const VPUser *U) {
1146 auto *R = dyn_cast<VPRecipeBase>(U);
1147 return R && classof(R);
1148 }
1149
1150 static inline bool classof(const VPValue *V) {
1151 auto *R = V->getDefiningRecipe();
1152 return R && classof(R);
1153 }
1154
1156
1157 static inline bool classof(const VPSingleDefRecipe *R) {
1158 return classof(static_cast<const VPRecipeBase *>(R));
1159 }
1160
1161 void execute(VPTransformState &State) override = 0;
1162
1163 /// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx.
1165 VPCostContext &Ctx) const;
1166};
1167
1168/// Helper to manage IR metadata for recipes. It filters out metadata that
1169/// cannot be propagated.
1172
1173public:
1174 VPIRMetadata() = default;
1175
1176 /// Adds metatadata that can be preserved from the original instruction
1177 /// \p I.
1179
1180 /// Copy constructor for cloning.
1182
1184
1185 /// Add all metadata to \p I.
1186 void applyMetadata(Instruction &I) const;
1187
1188 /// Set metadata with kind \p Kind to \p Node. If metadata with \p Kind
1189 /// already exists, it will be replaced. Otherwise, it will be added.
1190 void setMetadata(unsigned Kind, MDNode *Node) {
1191 auto It =
1192 llvm::find_if(Metadata, [Kind](const std::pair<unsigned, MDNode *> &P) {
1193 return P.first == Kind;
1194 });
1195 if (It != Metadata.end())
1196 It->second = Node;
1197 else
1198 Metadata.emplace_back(Kind, Node);
1199 }
1200
1201 /// Intersect this VPIRMetadata object with \p MD, keeping only metadata
1202 /// nodes that are common to both.
1203 void intersect(const VPIRMetadata &MD);
1204
1205 /// Get metadata of kind \p Kind. Returns nullptr if not found.
1206 MDNode *getMetadata(unsigned Kind) const {
1207 auto It =
1208 find_if(Metadata, [Kind](const auto &P) { return P.first == Kind; });
1209 return It != Metadata.end() ? It->second : nullptr;
1210 }
1211
1212#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1213 /// Print metadata with node IDs.
1214 void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
1215#endif
1216};
1217
1218/// This is a concrete Recipe that models a single VPlan-level instruction.
1219/// While as any Recipe it may generate a sequence of IR instructions when
1220/// executed, these instructions would always form a single-def expression as
1221/// the VPInstruction is also a single def-use vertex. Most VPInstruction
1222/// opcodes can take an optional mask. Masks may be assigned during
1223/// predication.
1225 public VPIRMetadata {
1226public:
1227 /// VPlan opcodes, extending LLVM IR with idiomatics instructions.
1228 enum {
1230 Instruction::OtherOpsEnd + 1, // Combines the incoming and previous
1231 // values of a first-order recurrence.
1233 // Creates a mask where each lane is active (true) whilst the current
1234 // counter (first operand + index) is less than the second operand. i.e.
1235 // mask[i] = icmpt ult (op0 + i), op1
1236 // The size of the mask returned is VF * Multiplier (UF, third op).
1239 // Represents the incoming loop-invariant alias-mask. All memory accesses
1240 // in the loop must stay within the active lanes.
1243 // Increment the canonical IV separately for each unrolled part.
1245 // Abstract instruction that compares two values and branches. This is
1246 // lowered to ICmp + BranchOnCond during VPlan to VPlan transformation.
1249 // Branch with 2 boolean condition operands and 3 successors. If condition
1250 // 0 is true, branches to successor 0; if condition 1 is true, branches to
1251 // successor 1; otherwise branches to successor 2. Expanded after region
1252 // dissolution into: (1) an OR of the two conditions branching to
1253 // middle.split or successor 2, and (2) middle.split branching to successor
1254 // 0 or successor 1 based on condition 0.
1257 /// Given operands of (the same) struct type, creates a struct of fixed-
1258 /// width vectors each containing a struct field of all operands. The
1259 /// number of operands matches the element count of every vector.
1261 /// Creates a fixed-width vector containing all operands. The number of
1262 /// operands matches the vector element count.
1264 /// Extracts all lanes from its (non-scalable) vector operand. This is an
1265 /// abstract VPInstruction whose single defined VPValue represents VF
1266 /// scalars extracted from a vector, to be replaced by VF ExtractElement
1267 /// VPInstructions.
1269 /// Reduce the operands to the final reduction result using the operation
1270 /// specified via the operation's VPIRFlags.
1272 // Extracts the last part of its operand. Removed during unrolling.
1274 // Extracts the last lane of its vector operand, per part.
1276 // Extracts the second-to-last lane from its operand or the second-to-last
1277 // part if it is scalar. In the latter case, the recipe will be removed
1278 // during unrolling.
1280 LogicalAnd, // Non-poison propagating logical And.
1281 LogicalOr, // Non-poison propagating logical Or.
1282 NumActiveLanes, // Counts the number of active lanes in a mask.
1283 // Add an offset in bytes (second operand) to a base pointer (first
1284 // operand). Only generates scalar values (either for the first lane only or
1285 // for all lanes, depending on its uses).
1287 // Add a vector offset in bytes (second operand) to a scalar base pointer
1288 // (first operand).
1290 // Returns a scalar boolean value, which is true if any lane of its
1291 // (boolean) vector operands is true. It produces the reduced value across
1292 // all unrolled iterations. Unrolling will add all copies of its original
1293 // operand as additional operands. AnyOf is poison-safe as all operands
1294 // will be frozen.
1296 // Calculates the first active lane index of the vector predicate operands.
1297 // It produces the lane index across all unrolled iterations. Unrolling will
1298 // add all copies of its original operand as additional operands.
1299 // Implemented with @llvm.experimental.cttz.elts, but returns the expected
1300 // result even with operands that are all zeroes.
1302 // Calculates the last active lane index of the vector predicate operands.
1303 // The predicates must be prefix-masks (all 1s before all 0s). Used when
1304 // tail-folding to extract the correct live-out value from the last active
1305 // iteration. It produces the lane index across all unrolled iterations.
1306 // Unrolling will add all copies of its original operand as additional
1307 // operands.
1309 // Returns a reversed vector for the operand.
1311 /// Start vector for reductions with 3 operands: the original start value,
1312 /// the identity value for the reduction and an integer indicating the
1313 /// scaling factor.
1315 /// Extracts a single lane (first operand) from a set of vector operands.
1316 /// The lane specifies an index into a vector formed by combining all vector
1317 /// operands (all operands after the first one).
1319 /// Explicit user for the resume phi of the canonical induction in the main
1320 /// VPlan, used by the epilogue vector loop.
1322 /// Extracts the last active lane from a set of vectors. The first operand
1323 /// is the default value if no lanes in the masks are active. Conceptually,
1324 /// this concatenates all data vectors (odd operands), concatenates all
1325 /// masks (even operands -- ignoring the default value), and returns the
1326 /// last active value from the combined data vector using the combined mask.
1328 /// Compute the exiting value of a wide induction after vectorization, that
1329 /// is the value of the last lane of the induction increment (i.e. its
1330 /// backedge value). Has the wide induction recipe as operand.
1333
1334 // The opcodes below are used for VPInstructionWithType.
1335 // NOTE: VPInstructionWithType classes are also used for:
1336 // 1. All CastInst variants - see createVPInstructionsForVPBB, and other
1337 // cases where createScalarCast, createScalarZExtOrTrunc and
1338 // createScalarSExtOrTrunc are invoked.
1339 // 2. Scalar load instructions - see createVPInstructionsForVPBB.
1340
1341 /// Scale the first operand (vector step) by the second operand
1342 /// (scalar-step). Casts both operands to the result type if needed.
1344 // Creates a step vector starting from 0 to VF with a step of 1.
1346 /// Returns the value for vscale.
1348
1350 };
1351
1352 /// Returns true if this VPInstruction generates scalar values for all lanes.
1353 /// Most VPInstructions generate a single value per part, either vector or
1354 /// scalar. VPReplicateRecipe takes care of generating multiple (scalar)
1355 /// values per all lanes, stemming from an original ingredient. This method
1356 /// identifies the (rare) cases of VPInstructions that do so as well, w/o an
1357 /// underlying ingredient.
1358 bool doesGeneratePerAllLanes() const;
1359
1360 /// Return the number of operands determined by the opcode of the
1361 /// VPInstruction, excluding mask. Returns -1u if the number of operands
1362 /// cannot be determined directly by the opcode.
1363 unsigned getNumOperandsForOpcode() const;
1364
1365private:
1366 typedef unsigned char OpcodeTy;
1367 OpcodeTy Opcode;
1368
1369 /// An optional name that can be used for the generated IR instruction.
1370 std::string Name;
1371
1372 /// Returns true if we can generate a scalar for the first lane only if
1373 /// needed.
1374 bool canGenerateScalarForFirstLane() const;
1375
1376 /// Utility methods serving execute(): generates a single vector instance of
1377 /// the modeled instruction. \returns the generated value. . In some cases an
1378 /// existing value is returned rather than a generated one.
1379 Value *generate(VPTransformState &State);
1380
1381 /// Returns true if the VPInstruction does not need masking.
1382 bool alwaysUnmasked() const {
1383 if (Opcode == VPInstruction::MaskedCond)
1384 return false;
1385
1386 // For now only VPInstructions with underlying values use masks.
1387 // TODO: provide masks to VPInstructions w/o underlying values.
1388 if (!getUnderlyingValue())
1389 return true;
1390
1391 return Instruction::isCast(Opcode) || Opcode == Instruction::PHI ||
1392 Opcode == Instruction::GetElementPtr;
1393 }
1394
1395public:
1396 VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
1397 const VPIRFlags &Flags = {}, const VPIRMetadata &MD = {},
1398 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "",
1399 Type *ResultTy = nullptr);
1400
1401 VP_CLASSOF_IMPL(VPRecipeBase::VPInstructionSC)
1402
1403 VPInstruction *clone() override {
1405 }
1406
1408 Type *ResultTy = nullptr) {
1409 auto *New = new VPInstruction(Opcode, NewOperands, *this, *this,
1410 getDebugLoc(), Name, ResultTy);
1411 if (getUnderlyingValue())
1412 New->setUnderlyingValue(getUnderlyingInstr());
1413 return New;
1414 }
1415
1416 unsigned getOpcode() const { return Opcode; }
1417
1418 /// Add \p Op as operand of this VPInstruction. Only supported for AnyOf,
1419 /// ComputeReductionResult, BuildVector, BuildStructVector, ExtractLane,
1420 /// ExtractLastActive, FirstActiveLane, LastActiveLane.
1421 void addOperand(VPValue *Op);
1422
1423 /// Generate the instruction.
1424 /// TODO: We currently execute only per-part unless a specific instance is
1425 /// provided.
1426 void execute(VPTransformState &State) override;
1427
1428 /// Return the cost of this VPInstruction.
1429 InstructionCost computeCost(ElementCount VF,
1430 VPCostContext &Ctx) const override;
1431
1432#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1433 /// Print the VPInstruction to dbgs() (for debugging).
1434 LLVM_DUMP_METHOD void dump() const;
1435#endif
1436
1437 bool hasResult() const {
1438 // CallInst may or may not have a result, depending on the called function.
1439 // Conservatively return calls have results for now.
1440 switch (getOpcode()) {
1441 case Instruction::Ret:
1442 case Instruction::UncondBr:
1443 case Instruction::CondBr:
1444 case Instruction::Store:
1445 case Instruction::Switch:
1446 case Instruction::IndirectBr:
1447 case Instruction::Resume:
1448 case Instruction::CatchRet:
1449 case Instruction::Unreachable:
1450 case Instruction::Fence:
1451 case Instruction::AtomicRMW:
1455 return false;
1456 default:
1457 return true;
1458 }
1459 }
1460
1461 /// Returns true if the VPInstruction has a mask operand.
1462 bool isMasked() const {
1463 unsigned NumOpsForOpcode = getNumOperandsForOpcode();
1464 // VPInstructions without a fixed number of operands cannot be masked.
1465 if (NumOpsForOpcode == -1u)
1466 return false;
1467 return NumOpsForOpcode + 1 == getNumOperands();
1468 }
1469
1470 /// Returns the number of operands, excluding the mask if the VPInstruction is
1471 /// masked.
1472 unsigned getNumOperandsWithoutMask() const {
1473 return getNumOperands() - isMasked();
1474 }
1475
1476 /// Add mask \p Mask to an unmasked VPInstruction, if it needs masking.
1477 void addMask(VPValue *Mask) {
1478 assert(!isMasked() && "recipe is already masked");
1479 if (alwaysUnmasked())
1480 return;
1481 assert(Mask->getScalarType()->isIntegerTy(1) &&
1482 "Mask must be an i1 (vector)");
1483 VPUser::addOperand(Mask);
1484 }
1485
1486 /// Returns the mask for the VPInstruction. Returns nullptr for unmasked
1487 /// VPInstructions.
1488 VPValue *getMask() const {
1489 return isMasked() ? getOperand(getNumOperands() - 1) : nullptr;
1490 }
1491
1492 /// Returns an iterator range over the operands excluding the mask operand
1493 /// if present.
1500
1501 /// Returns true if the underlying opcode may read from or write to memory.
1502 bool opcodeMayReadOrWriteFromMemory() const;
1503
1504 /// Returns true if the recipe only uses the first lane of operand \p Op.
1505 bool usesFirstLaneOnly(const VPValue *Op) const override;
1506
1507 /// Returns true if the recipe only uses the first part of operand \p Op.
1508 bool usesFirstPartOnly(const VPValue *Op) const override;
1509
1510 /// Returns true if this VPInstruction produces a scalar value from a vector,
1511 /// e.g. by performing a reduction or extracting a lane.
1512 bool isVectorToScalar() const;
1513
1514 /// Returns true if this VPInstruction's operands are single scalars and the
1515 /// result is also a single scalar.
1516 bool isSingleScalar() const;
1517
1518 /// Returns the symbolic name assigned to the VPInstruction.
1519 StringRef getName() const { return Name; }
1520
1521 /// Set the symbolic name for the VPInstruction.
1522 void setName(StringRef NewName) { Name = NewName.str(); }
1523
1524protected:
1525#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1526 /// Print the VPInstruction to \p O.
1527 void printRecipe(raw_ostream &O, const Twine &Indent,
1528 VPSlotTracker &SlotTracker) const override;
1529#endif
1530};
1531
1532/// A specialization of VPInstruction augmenting it with a dedicated result
1533/// type, to be used when the opcode and operands of the VPInstruction don't
1534/// directly determine the result type. Note that there is no separate recipe ID
1535/// for VPInstructionWithType; it shares the same ID as VPInstruction and is
1536/// distinguished purely by the opcode.
1537/// TODO: Merge with VPInstruction, now that VPRecipeValue provides the type.
1539public:
1541 Type *ResultTy, const VPIRFlags &Flags = {},
1542 const VPIRMetadata &Metadata = {},
1544 const Twine &Name = "")
1545 : VPInstruction(Opcode, Operands, Flags, Metadata, DL, Name, ResultTy) {}
1546
1547 static inline bool classof(const VPRecipeBase *R) {
1548 // VPInstructionWithType are VPInstructions with specific opcodes requiring
1549 // type information.
1550 auto *VPI = dyn_cast<VPInstruction>(R);
1551 if (!VPI)
1552 return false;
1553 unsigned Opc = VPI->getOpcode();
1555 return true;
1556 switch (Opc) {
1560 case Instruction::Load:
1561 return true;
1562 default:
1563 return false;
1564 }
1565 }
1566
1567 static inline bool classof(const VPUser *R) {
1569 }
1570
1571 VPInstruction *clone() override {
1572 auto *New =
1574 *this, *this, getDebugLoc(), getName());
1575 New->setUnderlyingValue(getUnderlyingValue());
1576 return New;
1577 }
1578
1579 void execute(VPTransformState &State) override;
1580
1581 /// Return the cost of this VPInstruction.
1583 VPCostContext &Ctx) const override {
1584 // TODO: Compute accurate cost after retiring the legacy cost model.
1585 return 0;
1586 }
1587
1588 Type *getResultType() const { return getScalarType(); }
1589
1590protected:
1591#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1592 /// Print the recipe.
1593 void printRecipe(raw_ostream &O, const Twine &Indent,
1594 VPSlotTracker &SlotTracker) const override;
1595#endif
1596};
1597
1598/// Helper type to provide functions to access incoming values and blocks for
1599/// phi-like recipes.
1601protected:
1602 /// Return a VPRecipeBase* to the current object.
1603 virtual const VPRecipeBase *getAsRecipe() const = 0;
1604
1605public:
1606 virtual ~VPPhiAccessors() = default;
1607
1608 /// Returns the incoming VPValue with index \p Idx.
1609 VPValue *getIncomingValue(unsigned Idx) const {
1610 return getAsRecipe()->getOperand(Idx);
1611 }
1612
1613 /// Returns the incoming block with index \p Idx.
1614 const VPBasicBlock *getIncomingBlock(unsigned Idx) const;
1615
1616 /// Returns the incoming value for \p VPBB. \p VPBB must be an incoming block.
1617 VPValue *getIncomingValueForBlock(const VPBasicBlock *VPBB) const;
1618
1619 /// Sets the incoming value for \p VPBB to \p V. \p VPBB must be an incoming
1620 /// block.
1621 void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const;
1622
1623 /// Returns the number of incoming values, also number of incoming blocks.
1624 virtual unsigned getNumIncoming() const {
1625 return getAsRecipe()->getNumOperands();
1626 }
1627
1628 /// Returns an interator range over the incoming values.
1630 return make_range(getAsRecipe()->op_begin(),
1631 getAsRecipe()->op_begin() + getNumIncoming());
1632 }
1633
1635 detail::index_iterator, std::function<const VPBasicBlock *(size_t)>>>;
1636
1637 /// Returns an iterator range over the incoming blocks.
1639 std::function<const VPBasicBlock *(size_t)> GetBlock = [this](size_t Idx) {
1640 return getIncomingBlock(Idx);
1641 };
1642 return map_range(index_range(0, getNumIncoming()), GetBlock);
1643 }
1644
1645 /// Returns an iterator range over pairs of incoming values and corresponding
1646 /// incoming blocks.
1652
1653 /// Removes the incoming value for \p IncomingBlock, which must be a
1654 /// predecessor.
1655 void removeIncomingValueFor(VPBlockBase *IncomingBlock) const;
1656
1657 /// Append \p IncomingV as an incoming value to the phi-like recipe.
1658 void addIncoming(VPValue *IncomingV) {
1659 auto *R = const_cast<VPRecipeBase *>(getAsRecipe());
1660 assert((R->getNumOperands() == 0 ||
1661 IncomingV->getScalarType() == R->getOperand(0)->getScalarType()) &&
1662 "all incoming values must have the same type");
1663 R->addOperand(IncomingV);
1664 }
1665
1666#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1667 /// Print the recipe.
1669#endif
1670};
1671
1674 const Twine &Name = "", Type *ResultTy = nullptr)
1675 : VPInstruction(Instruction::PHI, Operands, Flags, {}, DL, Name,
1676 ResultTy) {}
1677
1678 static inline bool classof(const VPUser *U) {
1679 auto *VPI = dyn_cast<VPInstruction>(U);
1680 return VPI && VPI->getOpcode() == Instruction::PHI;
1681 }
1682
1683 static inline bool classof(const VPValue *V) {
1684 auto *VPI = dyn_cast<VPInstruction>(V);
1685 return VPI && VPI->getOpcode() == Instruction::PHI;
1686 }
1687
1688 static inline bool classof(const VPSingleDefRecipe *SDR) {
1689 auto *VPI = dyn_cast<VPInstruction>(SDR);
1690 return VPI && VPI->getOpcode() == Instruction::PHI;
1691 }
1692
1693 VPPhi *clone() override {
1694 auto *PhiR = new VPPhi(operands(), *this, getDebugLoc(), getName());
1695 PhiR->setUnderlyingValue(getUnderlyingValue());
1696 return PhiR;
1697 }
1698
1699 void execute(VPTransformState &State) override;
1700
1701protected:
1702#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1703 /// Print the recipe.
1704 void printRecipe(raw_ostream &O, const Twine &Indent,
1705 VPSlotTracker &SlotTracker) const override;
1706#endif
1707
1708 const VPRecipeBase *getAsRecipe() const override { return this; }
1709};
1710
1711/// A recipe to wrap on original IR instruction not to be modified during
1712/// execution, except for PHIs. PHIs are modeled via the VPIRPhi subclass.
1713/// Expect PHIs, VPIRInstructions cannot have any operands.
1715 Instruction &I;
1716
1717protected:
1718 /// VPIRInstruction::create() should be used to create VPIRInstructions, as
1719 /// subclasses may need to be created, e.g. VPIRPhi.
1721 : VPRecipeBase(VPRecipeBase::VPIRInstructionSC, {}), I(I) {}
1722
1723public:
1724 ~VPIRInstruction() override = default;
1725
1726 /// Create a new VPIRPhi for \p \I, if it is a PHINode, otherwise create a
1727 /// VPIRInstruction.
1729
1730 VP_CLASSOF_IMPL(VPRecipeBase::VPIRInstructionSC)
1731
1733 auto *R = create(I);
1734 for (auto *Op : operands())
1735 R->addOperand(Op);
1736 return R;
1737 }
1738
1739 void execute(VPTransformState &State) override;
1740
1741 /// Return the cost of this VPIRInstruction.
1743 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1744
1745 Instruction &getInstruction() const { return I; }
1746
1747 bool usesScalars(const VPValue *Op) const override {
1749 "Op must be an operand of the recipe");
1750 return true;
1751 }
1752
1753 bool usesFirstPartOnly(const VPValue *Op) const override {
1755 "Op must be an operand of the recipe");
1756 return true;
1757 }
1758
1759 bool usesFirstLaneOnly(const VPValue *Op) const override {
1761 "Op must be an operand of the recipe");
1762 return true;
1763 }
1764
1765protected:
1766#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1767 /// Print the recipe.
1768 void printRecipe(raw_ostream &O, const Twine &Indent,
1769 VPSlotTracker &SlotTracker) const override;
1770#endif
1771};
1772
1773/// An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use
1774/// cast/dyn_cast/isa and execute() implementation. A single VPValue operand is
1775/// allowed, and it is used to add a new incoming value for the single
1776/// predecessor VPBB.
1778 public VPPhiAccessors {
1780
1781 static inline bool classof(const VPRecipeBase *U) {
1782 auto *R = dyn_cast<VPIRInstruction>(U);
1783 return R && isa<PHINode>(R->getInstruction());
1784 }
1785
1786 static inline bool classof(const VPUser *U) {
1787 auto *R = dyn_cast<VPRecipeBase>(U);
1788 return R && classof(R);
1789 }
1790
1792
1793 void execute(VPTransformState &State) override;
1794
1795protected:
1796#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1797 /// Print the recipe.
1798 void printRecipe(raw_ostream &O, const Twine &Indent,
1799 VPSlotTracker &SlotTracker) const override;
1800#endif
1801
1802 const VPRecipeBase *getAsRecipe() const override { return this; }
1803};
1804
1805/// VPWidenRecipe is a recipe for producing a widened instruction using the
1806/// opcode and operands of the recipe. This recipe covers most of the
1807/// traditional vectorization cases where each recipe transforms into a
1808/// vectorized version of itself.
1810 public VPIRMetadata {
1811 unsigned Opcode;
1812
1813public:
1815 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1816 DebugLoc DL = {})
1817 : VPWidenRecipe(I.getOpcode(), Operands, Flags, Metadata, DL) {
1818 setUnderlyingValue(&I);
1819 }
1820
1821 VPWidenRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
1822 const VPIRFlags &Flags = {}, const VPIRMetadata &Metadata = {},
1823 DebugLoc DL = {})
1824 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenSC, Operands,
1825 computeScalarTypeForInstruction(Opcode, Operands),
1826 Flags, DL),
1827 VPIRMetadata(Metadata), Opcode(Opcode) {}
1828
1829 ~VPWidenRecipe() override = default;
1830
1832
1834 if (auto *UV = getUnderlyingValue())
1835 return new VPWidenRecipe(*cast<Instruction>(UV), NewOperands, *this,
1836 *this, getDebugLoc());
1837 return new VPWidenRecipe(Opcode, NewOperands, *this, *this, getDebugLoc());
1838 }
1839
1840 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenSC)
1841
1842 /// Produce a widened instruction using the opcode and operands of the recipe,
1843 /// processing State.VF elements.
1844 void execute(VPTransformState &State) override;
1845
1846 /// Return the cost of this VPWidenRecipe.
1847 InstructionCost computeCost(ElementCount VF,
1848 VPCostContext &Ctx) const override;
1849
1850 unsigned getOpcode() const { return Opcode; }
1851
1852protected:
1853#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1854 /// Print the recipe.
1855 void printRecipe(raw_ostream &O, const Twine &Indent,
1856 VPSlotTracker &SlotTracker) const override;
1857#endif
1858
1859 /// Returns true if the recipe only uses the first lane of operand \p Op.
1860 bool usesFirstLaneOnly(const VPValue *Op) const override {
1862 "Op must be an operand of the recipe");
1863 return Opcode == Instruction::Select && Op == getOperand(0) &&
1864 Op->isDefinedOutsideLoopRegions();
1865 }
1866};
1867
1868/// VPWidenCastRecipe is a recipe to create vector cast instructions.
1869/// TODO: Merge with VPWidenRecipe now that type is associated to every
1870/// VPRecipeValue.
1872 /// Cast instruction opcode.
1873 Instruction::CastOps Opcode;
1874
1875public:
1877 CastInst *CI = nullptr, const VPIRFlags &Flags = {},
1878 const VPIRMetadata &Metadata = {},
1880 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCastSC, Op, ResultTy, Flags,
1881 DL),
1882 VPIRMetadata(Metadata), Opcode(Opcode) {
1883 assert(flagsValidForOpcode(Opcode) &&
1884 "Set flags not supported for the provided opcode");
1886 "Opcode requires specific flags to be set");
1888 }
1889
1890 ~VPWidenCastRecipe() override = default;
1891
1893 return new VPWidenCastRecipe(Opcode, getOperand(0), getScalarType(),
1895 *this, *this, getDebugLoc());
1896 }
1897
1898 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCastSC)
1899
1900 /// Produce widened copies of the cast.
1901 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
1902
1903 /// Return the cost of this VPWidenCastRecipe.
1905 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
1906
1907 Instruction::CastOps getOpcode() const { return Opcode; }
1908
1909protected:
1910#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1911 /// Print the recipe.
1912 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
1913 VPSlotTracker &SlotTracker) const override;
1914#endif
1915};
1916
1917/// A recipe for widening vector intrinsics.
1919 /// ID of the vector intrinsic to widen.
1920 Intrinsic::ID VectorIntrinsicID;
1921
1922 /// True if the intrinsic may read from memory.
1923 bool MayReadFromMemory;
1924
1925 /// True if the intrinsic may read write to memory.
1926 bool MayWriteToMemory;
1927
1928 /// True if the intrinsic may have side-effects.
1929 bool MayHaveSideEffects;
1930
1931protected:
1932 VPWidenIntrinsicRecipe(const unsigned char SC,
1933 Intrinsic::ID VectorIntrinsicID,
1934 ArrayRef<VPValue *> CallArguments, Type *Ty,
1935 const VPIRFlags &Flags = {},
1936 const VPIRMetadata &MD = {},
1938 : VPRecipeWithIRFlags(SC, CallArguments, Ty, Flags, DL), VPIRMetadata(MD),
1939 VectorIntrinsicID(VectorIntrinsicID) {
1940 LLVMContext &Ctx = Ty->getContext();
1941 AttributeSet Attrs = Intrinsic::getFnAttributes(Ctx, VectorIntrinsicID);
1942 MemoryEffects ME = Attrs.getMemoryEffects();
1943 MayReadFromMemory = !ME.onlyWritesMemory();
1944 MayWriteToMemory = !ME.onlyReadsMemory();
1945 MayHaveSideEffects = MayWriteToMemory ||
1946 !Attrs.hasAttribute(Attribute::NoUnwind) ||
1947 !Attrs.hasAttribute(Attribute::WillReturn);
1948 }
1949
1950 /// Helper function to produce the widened intrinsic call.
1951 CallInst *createVectorCall(VPTransformState &State);
1952
1953public:
1955 ArrayRef<VPValue *> CallArguments, Type *Ty,
1956 const VPIRFlags &Flags = {},
1957 const VPIRMetadata &MD = {},
1959 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenIntrinsicSC, CallArguments, Ty,
1960 Flags, DL),
1961 VPIRMetadata(MD), VectorIntrinsicID(VectorIntrinsicID),
1962 MayReadFromMemory(CI.mayReadFromMemory()),
1963 MayWriteToMemory(CI.mayWriteToMemory()),
1964 MayHaveSideEffects(CI.mayHaveSideEffects()) {
1965 setUnderlyingValue(&CI);
1966 }
1967
1969 ArrayRef<VPValue *> CallArguments, Type *Ty,
1970 const VPIRFlags &Flags = {},
1971 const VPIRMetadata &Metadata = {},
1973 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenIntrinsicSC,
1974 VectorIntrinsicID, CallArguments, Ty, Flags,
1975 Metadata, DL) {}
1976
1977 ~VPWidenIntrinsicRecipe() override = default;
1978
1980 if (Value *CI = getUnderlyingValue())
1981 return new VPWidenIntrinsicRecipe(*cast<CallInst>(CI), VectorIntrinsicID,
1982 operands(), getScalarType(), *this,
1983 *this, getDebugLoc());
1984 return new VPWidenIntrinsicRecipe(VectorIntrinsicID, operands(),
1985 getScalarType(), *this, *this,
1986 getDebugLoc());
1987 }
1988
1989 static inline bool classof(const VPRecipeBase *R) {
1990 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntrinsicSC ||
1991 R->getVPRecipeID() == VPRecipeBase::VPWidenMemIntrinsicSC;
1992 }
1993
1994 static inline bool classof(const VPUser *U) {
1995 auto *R = dyn_cast<VPRecipeBase>(U);
1996 return R && classof(R);
1997 }
1998
1999 static inline bool classof(const VPValue *V) {
2000 auto *R = V->getDefiningRecipe();
2001 return R && classof(R);
2002 }
2003
2004 static inline bool classof(const VPSingleDefRecipe *R) {
2005 return classof(static_cast<const VPRecipeBase *>(R));
2006 }
2007
2008 /// Produce a widened version of the vector intrinsic.
2009 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
2010
2011 /// Compute the cost of a vector intrinsic with \p ID and \p Operands.
2014 const VPRecipeWithIRFlags &R,
2015 ElementCount VF, VPCostContext &Ctx);
2016
2017 /// Return the cost of this vector intrinsic.
2019 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
2020
2021 /// Return the ID of the intrinsic.
2022 Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
2023
2024 /// Return to name of the intrinsic as string.
2026
2027 /// Returns true if the intrinsic may read from memory.
2028 bool mayReadFromMemory() const { return MayReadFromMemory; }
2029
2030 /// Returns true if the intrinsic may write to memory.
2031 bool mayWriteToMemory() const { return MayWriteToMemory; }
2032
2033 /// Returns true if the intrinsic may have side-effects.
2034 bool mayHaveSideEffects() const { return MayHaveSideEffects; }
2035
2036 LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override;
2037
2038protected:
2039#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2040 /// Print the recipe.
2041 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
2042 VPSlotTracker &SlotTracker) const override;
2043#endif
2044};
2045
2046/// A recipe for widening vector memory intrinsics.
2048 /// Alignment information for this memory access.
2049 Align Alignment;
2050
2051public:
2052 // TODO: support StoreInst for strided store
2054 ArrayRef<VPValue *> CallArguments, Type *Ty,
2055 Align Alignment, const VPIRMetadata &MD = {},
2057 : VPWidenIntrinsicRecipe(VPRecipeBase::VPWidenMemIntrinsicSC,
2058 VectorIntrinsicID, CallArguments, Ty, {}, MD,
2059 DL),
2060 Alignment(Alignment) {
2061 assert(VectorIntrinsicID == Intrinsic::experimental_vp_strided_load &&
2062 "Unexpected intrinsic");
2063 }
2064
2065 ~VPWidenMemIntrinsicRecipe() override = default;
2066
2069 getScalarType(), Alignment, *this,
2070 getDebugLoc());
2071 }
2072
2073 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenMemIntrinsicSC)
2074
2075 /// Produce a widened version of the vector memory intrinsic.
2076 void execute(VPTransformState &State) override;
2077
2078 /// Helper function for computing the cost of vector memory intrinsic.
2080 bool IsMasked, Align Alignment,
2081 VPCostContext &Ctx);
2082
2083 /// Return the cost of this vector memory intrinsic.
2085 VPCostContext &Ctx) const override;
2086};
2087
2088/// A recipe for widening Call instructions using library calls.
2090 public VPIRMetadata {
2091 /// Variant stores a pointer to the chosen function. There is a 1:1 mapping
2092 /// between a given VF and the chosen vectorized variant, so there will be a
2093 /// different VPlan for each VF with a valid variant.
2094 Function *Variant;
2095
2096public:
2098 ArrayRef<VPValue *> CallArguments,
2099 const VPIRFlags &Flags = {},
2100 const VPIRMetadata &Metadata = {}, DebugLoc DL = {})
2101 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCallSC, CallArguments,
2102 toScalarizedTy(Variant->getReturnType()), Flags,
2103 DL),
2104 VPIRMetadata(Metadata), Variant(Variant) {
2105 setUnderlyingValue(UV);
2106 assert(
2107 isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
2108 "last operand must be the called function");
2109 assert(cast<Function>(CallArguments.back()->getLiveInIRValue())
2110 ->getReturnType() == getScalarType() &&
2111 "Scalar type must match return type of called scalar function");
2112 }
2113
2114 ~VPWidenCallRecipe() override = default;
2115
2117 return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
2118 *this, *this, getDebugLoc());
2119 }
2120
2121 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCallSC)
2122
2123 /// Produce a widened version of the call instruction.
2124 void execute(VPTransformState &State) override;
2125
2126 /// Return the cost of this VPWidenCallRecipe.
2127 InstructionCost computeCost(ElementCount VF,
2128 VPCostContext &Ctx) const override;
2129
2130 /// Return the cost of widening a call using the vector function \p Variant.
2131 static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx);
2132
2136
2139
2140 /// Returns true if the recipe only uses the first lane of operand \p Op.
2141 bool usesFirstLaneOnly(const VPValue *Op) const override;
2142
2143protected:
2144#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2145 /// Print the recipe.
2146 void printRecipe(raw_ostream &O, const Twine &Indent,
2147 VPSlotTracker &SlotTracker) const override;
2148#endif
2149};
2150
2151/// A recipe representing a sequence of load -> update -> store as part of
2152/// a histogram operation. This means there may be aliasing between vector
2153/// lanes, which is handled by the llvm.experimental.vector.histogram family
2154/// of intrinsics. The only update operations currently supported are
2155/// 'add' and 'sub' where the other term is loop-invariant.
2157 /// Opcode of the update operation, currently either add or sub.
2158 unsigned Opcode;
2159
2160public:
2161 VPHistogramRecipe(unsigned Opcode, ArrayRef<VPValue *> Operands,
2162 const VPIRMetadata &Metadata = {},
2164 : VPRecipeBase(VPRecipeBase::VPHistogramSC, Operands, DL),
2165 VPIRMetadata(Metadata), Opcode(Opcode) {}
2166
2167 ~VPHistogramRecipe() override = default;
2168
2170 return new VPHistogramRecipe(Opcode, operands(), *this, getDebugLoc());
2171 }
2172
2173 VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC);
2174
2175 /// Produce a vectorized histogram operation.
2176 void execute(VPTransformState &State) override;
2177
2178 /// Return the cost of this VPHistogramRecipe.
2180 VPCostContext &Ctx) const override;
2181
2182 unsigned getOpcode() const { return Opcode; }
2183
2184 /// Return the mask operand if one was provided, or a null pointer if all
2185 /// lanes should be executed unconditionally.
2186 VPValue *getMask() const {
2187 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2188 }
2189
2190protected:
2191#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2192 /// Print the recipe
2193 void printRecipe(raw_ostream &O, const Twine &Indent,
2194 VPSlotTracker &SlotTracker) const override;
2195#endif
2196};
2197
2198/// A recipe for handling GEP instructions.
2200 Type *SourceElementTy;
2201
2202 bool isPointerLoopInvariant() const {
2203 return getOperand(0)->isDefinedOutsideLoopRegions();
2204 }
2205
2206 bool isIndexLoopInvariant(unsigned I) const {
2207 return getOperand(I + 1)->isDefinedOutsideLoopRegions();
2208 }
2209
2210public:
2211 VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef<VPValue *> Operands,
2212 const VPIRFlags &Flags = {},
2214 GetElementPtrInst *UV = nullptr)
2215 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenGEPSC, Operands,
2216 Operands[0]->getScalarType(), Flags, DL),
2217 SourceElementTy(SourceElementTy) {
2218 if (UV) {
2219 setUnderlyingValue(UV);
2222 assert(Metadata.empty() && "unexpected metadata on GEP");
2223 }
2224 }
2225
2226 ~VPWidenGEPRecipe() override = default;
2227
2233
2234 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenGEPSC)
2235
2236 /// This recipe generates a GEP instruction.
2237 unsigned getOpcode() const { return Instruction::GetElementPtr; }
2238
2239 /// Generate the gep nodes.
2240 void execute(VPTransformState &State) override;
2241
2242 Type *getSourceElementType() const { return SourceElementTy; }
2243
2244 /// Return the cost of this VPWidenGEPRecipe.
2246 VPCostContext &Ctx) const override {
2247 // TODO: Compute accurate cost after retiring the legacy cost model.
2248 return 0;
2249 }
2250
2251 /// Returns true if the recipe only uses the first lane of operand \p Op.
2252 bool usesFirstLaneOnly(const VPValue *Op) const override;
2253
2254protected:
2255#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2256 /// Print the recipe.
2257 void printRecipe(raw_ostream &O, const Twine &Indent,
2258 VPSlotTracker &SlotTracker) const override;
2259#endif
2260};
2261
2262/// A recipe to compute a pointer to the last element of each part of a widened
2263/// memory access for widened memory accesses of SourceElementTy. Used for
2264/// VPWidenMemoryRecipes or VPInterleaveRecipes that are reversed. An extra
2265/// Offset operand is added by convertToConcreteRecipes when UF = 1, and by the
2266/// unroller otherwise.
2268 Type *SourceElementTy;
2269
2270 /// The constant stride of the pointer computed by this recipe, expressed in
2271 /// units of SourceElementTy.
2272 int64_t Stride;
2273
2274public:
2275 VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy,
2276 int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
2277 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorEndPointerSC, {Ptr, VF},
2278 Ptr->getScalarType(), GEPFlags, DL),
2279 SourceElementTy(SourceElementTy), Stride(Stride) {
2280 assert(Stride < 0 && "Stride must be negative");
2281 }
2282
2283 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorEndPointerSC)
2284
2285 Type *getSourceElementType() const { return SourceElementTy; }
2286 int64_t getStride() const { return Stride; }
2287 VPValue *getPointer() const { return getOperand(0); }
2288 VPValue *getVFValue() const { return getOperand(1); }
2290 return getNumOperands() == 3 ? getOperand(2) : nullptr;
2291 }
2292
2293 /// Adds the offset operand to the recipe.
2294 /// Offset = Stride * (VF - 1) + Part * Stride * VF.
2295 void materializeOffset(unsigned Part = 0);
2296
2297 /// Append \p Offset as the offset operand. The offset is an integer index
2298 /// expressed in units of SourceElementTy.
2300 assert(Offset->getScalarType()->isIntegerTy() &&
2301 "offset must be an integer index");
2303 }
2304
2305 void execute(VPTransformState &State) override;
2306
2307 bool usesFirstLaneOnly(const VPValue *Op) const override {
2309 "Op must be an operand of the recipe");
2310 return true;
2311 }
2312
2313 /// Return the cost of this VPVectorPointerRecipe.
2315 VPCostContext &Ctx) const override {
2316 // TODO: Compute accurate cost after retiring the legacy cost model.
2317 return 0;
2318 }
2319
2320 /// Returns true if the recipe only uses the first part of operand \p Op.
2321 bool usesFirstPartOnly(const VPValue *Op) const override {
2323 "Op must be an operand of the recipe");
2324 assert(getNumOperands() <= 2 && "must have at most two operands");
2325 return true;
2326 }
2327
2329 auto *VEPR = new VPVectorEndPointerRecipe(
2332 if (auto *Offset = getOffset())
2333 VEPR->addOffset(Offset);
2334 return VEPR;
2335 }
2336
2337protected:
2338#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2339 /// Print the recipe.
2340 void printRecipe(raw_ostream &O, const Twine &Indent,
2341 VPSlotTracker &SlotTracker) const override;
2342#endif
2343};
2344
2345/// A recipe to compute the pointers for widened memory accesses of \p
2346/// SourceElementTy, with the \p Stride expressed in units of \p
2347/// SourceElementTy. Unrolling adds an extra \p VFxPart operand for unrolled
2348/// parts > 0 and it produces `GEP SourceElementTy Ptr, VFxPart * Stride`.
2350 Type *SourceElementTy;
2351
2352public:
2353 VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride,
2354 GEPNoWrapFlags GEPFlags, DebugLoc DL)
2355 : VPRecipeWithIRFlags(VPRecipeBase::VPVectorPointerSC,
2356 ArrayRef<VPValue *>({Ptr, Stride}),
2357 Ptr->getScalarType(), GEPFlags, DL),
2358 SourceElementTy(SourceElementTy) {}
2359
2360 VP_CLASSOF_IMPL(VPRecipeBase::VPVectorPointerSC)
2361
2362 VPValue *getStride() const { return getOperand(1); }
2363
2365 return getNumOperands() > 2 ? getOperand(2) : nullptr;
2366 }
2367
2368 /// Add the per-part offset (VFxPart) used for unrolled parts > 0.
2369 void addPerPartOffset(VPValue *VFxPart) {
2370 assert(VFxPart->getScalarType()->isIntegerTy() &&
2371 "per-part offset must be an integer index");
2372 VPUser::addOperand(VFxPart);
2373 }
2374
2375 void execute(VPTransformState &State) override;
2376
2377 Type *getSourceElementType() const { return SourceElementTy; }
2378
2379 bool usesFirstLaneOnly(const VPValue *Op) const override {
2381 "Op must be an operand of the recipe");
2382 return true;
2383 }
2384
2385 /// Returns true if the recipe only uses the first part of operand \p Op.
2386 bool usesFirstPartOnly(const VPValue *Op) const override {
2388 "Op must be an operand of the recipe");
2389 assert(getNumOperands() <= 2 && "must have at most two operands");
2390 return true;
2391 }
2392
2394 auto *Clone =
2395 new VPVectorPointerRecipe(getOperand(0), SourceElementTy, getStride(),
2397 if (auto *VFxPart = getVFxPart())
2398 Clone->addPerPartOffset(VFxPart);
2399 return Clone;
2400 }
2401
2402 /// Return the cost of this VPHeaderPHIRecipe.
2404 VPCostContext &Ctx) const override {
2405 // TODO: Compute accurate cost after retiring the legacy cost model.
2406 return 0;
2407 }
2408
2409protected:
2410#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2411 /// Print the recipe.
2412 void printRecipe(raw_ostream &O, const Twine &Indent,
2413 VPSlotTracker &SlotTracker) const override;
2414#endif
2415};
2416
2417/// A pure virtual base class for all recipes modeling header phis, including
2418/// phis for first order recurrences, pointer inductions and reductions. The
2419/// start value is the first operand of the recipe and the incoming value from
2420/// the backedge is the second operand.
2421///
2422/// Inductions are modeled using the following sub-classes:
2423/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
2424/// floating point inductions with arbitrary start and step values. Produces
2425/// a vector PHI per-part.
2426/// * VPWidenPointerInductionRecipe: Generate vector and scalar values for a
2427/// pointer induction. Produces either a vector PHI per-part or scalar values
2428/// per-lane based on the canonical induction.
2429/// * VPFirstOrderRecurrencePHIRecipe
2430/// * VPReductionPHIRecipe
2431/// * VPActiveLaneMaskPHIRecipe
2432/// * VPEVLBasedIVPHIRecipe
2433///
2434/// Note that the canonical IV is modeled as a VPRegionValue associated with
2435/// its loop region.
2437 public VPPhiAccessors {
2438protected:
2439 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2440 VPValue *Start, DebugLoc DL = DebugLoc::getUnknown())
2441 : VPHeaderPHIRecipe(VPRecipeID, UnderlyingInstr, Start,
2442 Start->getScalarType(), DL) {}
2443
2444 VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr,
2445 VPValue *Start, Type *ResultTy, DebugLoc DL)
2446 : VPSingleDefRecipe(VPRecipeID, Start, ResultTy, UnderlyingInstr, DL) {}
2447
2448 const VPRecipeBase *getAsRecipe() const override { return this; }
2449
2450public:
2451 ~VPHeaderPHIRecipe() override = default;
2452
2453 /// Method to support type inquiry through isa, cast, and dyn_cast.
2454 static inline bool classof(const VPRecipeBase *R) {
2455 return R->getVPRecipeID() >= VPRecipeBase::VPFirstHeaderPHISC &&
2456 R->getVPRecipeID() <= VPRecipeBase::VPLastHeaderPHISC;
2457 }
2458 static inline bool classof(const VPValue *V) {
2459 return isa<VPHeaderPHIRecipe>(V->getDefiningRecipe());
2460 }
2461 static inline bool classof(const VPSingleDefRecipe *R) {
2462 return isa<VPHeaderPHIRecipe>(static_cast<const VPRecipeBase *>(R));
2463 }
2464
2465 /// Generate the phi nodes.
2466 void execute(VPTransformState &State) override = 0;
2467
2468 /// Return the cost of this header phi recipe.
2470 VPCostContext &Ctx) const override;
2471
2472 /// Returns the start value of the phi, if one is set.
2474 return getNumOperands() == 0 ? nullptr : getOperand(0);
2475 }
2477 return getNumOperands() == 0 ? nullptr : getOperand(0);
2478 }
2479
2480 /// Update the start value of the recipe.
2482
2483 /// Returns the incoming value from the loop backedge.
2485 return getOperand(1);
2486 }
2487
2488 /// Update the incoming value from the loop backedge.
2490
2491 /// Add \p V as the incoming value from the loop backedge.
2493 assert(getNumOperands() == 1 &&
2494 "backedge value must be appended right after construction");
2495 assert(V->getScalarType() == getScalarType() &&
2496 "backedge value must have the same type as the start value");
2498 }
2499
2500 /// Returns the backedge value as a recipe. The backedge value is guaranteed
2501 /// to be a recipe.
2503 return *getBackedgeValue()->getDefiningRecipe();
2504 }
2505
2506protected:
2507#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2508 /// Print the recipe.
2509 void printRecipe(raw_ostream &O, const Twine &Indent,
2510 VPSlotTracker &SlotTracker) const override = 0;
2511#endif
2512};
2513
2514/// Base class for widened induction (VPWidenIntOrFpInductionRecipe and
2515/// VPWidenPointerInductionRecipe), providing shared functionality, including
2516/// retrieving the step value, induction descriptor and original phi node.
2518 InductionDescriptor IndDesc;
2519
2520public:
2521 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2522 VPValue *Step, const InductionDescriptor &IndDesc,
2523 DebugLoc DL)
2524 : VPWidenInductionRecipe(Kind, IV, Start, Step, IndDesc,
2525 Start->getScalarType(), DL) {}
2526
2527 VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start,
2528 VPValue *Step, const InductionDescriptor &IndDesc,
2529 Type *ResultTy, DebugLoc DL)
2530 : VPHeaderPHIRecipe(Kind, IV, Start, ResultTy, DL), IndDesc(IndDesc) {
2531 addOperand(Step);
2532 }
2533
2534 /// After unrolling, append the splat-VF step (`VF * step`) and the value of
2535 /// the induction at the last unrolled part.
2536 void addUnrolledPartOperands(VPValue *SplatVFStep, VPValue *LastPart) {
2537 assert(LastPart->getScalarType() == getScalarType() &&
2538 "last-part value must match the induction recipe's scalar type");
2540 ? SplatVFStep->getScalarType()->isIntegerTy()
2541 : SplatVFStep->getScalarType() == getScalarType()) &&
2542 "splat-step must match the induction type for non-pointer "
2543 "inductions, or be an integer index for pointer inductions");
2544 VPUser::addOperand(SplatVFStep);
2545 VPUser::addOperand(LastPart);
2546 }
2547
2548 static inline bool classof(const VPRecipeBase *R) {
2549 return R->getVPRecipeID() == VPRecipeBase::VPWidenIntOrFpInductionSC ||
2550 R->getVPRecipeID() == VPRecipeBase::VPWidenPointerInductionSC;
2551 }
2552
2553 static inline bool classof(const VPValue *V) {
2554 auto *R = V->getDefiningRecipe();
2555 return R && classof(R);
2556 }
2557
2558 static inline bool classof(const VPSingleDefRecipe *R) {
2559 return classof(static_cast<const VPRecipeBase *>(R));
2560 }
2561
2562 void execute(VPTransformState &State) override = 0;
2563
2564 /// Returns the start value of the induction.
2566
2567 /// Returns the step value of the induction.
2569 const VPValue *getStepValue() const { return getOperand(1); }
2570
2571 /// Update the step value of the recipe.
2572 void setStepValue(VPValue *V) { setOperand(1, V); }
2573
2575 const VPValue *getVFValue() const { return getOperand(2); }
2576
2577 /// Returns the number of incoming values, also number of incoming blocks.
2578 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2579 /// incoming value, its start value.
2580 unsigned getNumIncoming() const override { return 1; }
2581
2582 /// Returns the underlying PHINode if one exists, or null otherwise.
2586
2587 /// Returns the induction descriptor for the recipe.
2588 const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
2589
2590 /// Returns the SCEV predicates associated with this induction.
2592 return IndDesc.getNoWrapPredicates();
2593 }
2594
2596 // TODO: All operands of base recipe must exist and be at same index in
2597 // derived recipe.
2599 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2600 }
2601
2603 // TODO: All operands of base recipe must exist and be at same index in
2604 // derived recipe.
2606 "VPWidenIntOrFpInductionRecipe generates its own backedge value");
2607 }
2608
2609 /// Returns true if the recipe only uses the first lane of operand \p Op.
2610 bool usesFirstLaneOnly(const VPValue *Op) const override {
2612 "Op must be an operand of the recipe");
2613 // The recipe creates its own wide start value, so it only requests the
2614 // first lane of the operand.
2615 // TODO: Remove once creating the start value is modeled separately.
2616 return Op == getStartValue() || Op == getStepValue();
2617 }
2618};
2619
2620/// A recipe for handling phi nodes of integer and floating-point inductions,
2621/// producing their vector values. This is an abstract recipe and must be
2622/// converted to concrete recipes before executing.
2624 public VPIRFlags {
2625 TruncInst *Trunc;
2626
2627 // If this recipe is unrolled it will have 2 additional operands.
2628 bool isUnrolled() const { return getNumOperands() == 5; }
2629
2630public:
2632 VPValue *VF, const InductionDescriptor &IndDesc,
2633 const VPIRFlags &Flags, DebugLoc DL)
2634 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2635 Start, Step, IndDesc, DL),
2636 VPIRFlags(Flags), Trunc(nullptr) {
2637 addOperand(VF);
2638 }
2639
2641 VPValue *VF, const InductionDescriptor &IndDesc,
2642 TruncInst *Trunc, const VPIRFlags &Flags,
2643 DebugLoc DL)
2644 : VPWidenInductionRecipe(VPRecipeBase::VPWidenIntOrFpInductionSC, IV,
2645 Start, Step, IndDesc,
2646 Trunc ? Trunc->getType() : Start->getType(), DL),
2647 VPIRFlags(Flags), Trunc(Trunc) {
2648 addOperand(VF);
2650 if (Trunc)
2652 assert(Metadata.empty() && "unexpected metadata on Trunc");
2653 }
2654
2656
2662
2663 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenIntOrFpInductionSC)
2664
2665 void execute(VPTransformState &State) override {
2666 llvm_unreachable("cannot execute this recipe, should be expanded via "
2667 "expandVPWidenIntOrFpInductionRecipe");
2668 }
2669
2670 /// Returns the start value of the induction.
2672
2673 /// If the recipe has been unrolled, return the VPValue for the induction
2674 /// increment, otherwise return null.
2676 return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
2677 }
2678
2679 /// Returns the number of incoming values, also number of incoming blocks.
2680 /// Note that at the moment, VPWidenIntOrFpInductionRecipes only have a single
2681 /// incoming value, its start value.
2682 unsigned getNumIncoming() const override { return 1; }
2683
2684 /// Returns the first defined value as TruncInst, if it is one or nullptr
2685 /// otherwise.
2686 TruncInst *getTruncInst() { return Trunc; }
2687 const TruncInst *getTruncInst() const { return Trunc; }
2688
2689 /// Returns true if the induction is canonical, i.e. starting at 0 and
2690 /// incremented by UF * VF (= the original IV is incremented by 1) and has the
2691 /// same type as the canonical induction.
2692 bool isCanonical() const;
2693
2694 /// Returns the VPValue representing the value of this induction at
2695 /// the last unrolled part, if it exists. Returns itself if unrolling did not
2696 /// take place.
2698 return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
2699 }
2700
2701protected:
2702#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2703 /// Print the recipe.
2704 void printRecipe(raw_ostream &O, const Twine &Indent,
2705 VPSlotTracker &SlotTracker) const override;
2706#endif
2707};
2708
2710public:
2711 /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p
2712 /// Start and the number of elements unrolled \p NumUnrolledElems, typically
2713 /// VF*UF.
2715 VPValue *NumUnrolledElems,
2716 const InductionDescriptor &IndDesc, DebugLoc DL)
2717 : VPWidenInductionRecipe(VPRecipeBase::VPWidenPointerInductionSC, Phi,
2718 Start, Step, IndDesc, DL) {
2719 addOperand(NumUnrolledElems);
2720 }
2721
2723
2729
2730 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPointerInductionSC)
2731
2732 /// Generate vector values for the pointer induction.
2733 void execute(VPTransformState &State) override {
2734 llvm_unreachable("cannot execute this recipe, should be expanded via "
2735 "expandVPWidenPointerInduction");
2736 };
2737
2738 /// Returns true if only scalar values will be generated.
2739 bool onlyScalarsGenerated(bool IsScalable);
2740
2741protected:
2742#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2743 /// Print the recipe.
2744 void printRecipe(raw_ostream &O, const Twine &Indent,
2745 VPSlotTracker &SlotTracker) const override;
2746#endif
2747};
2748
2749/// A recipe for widened phis. Incoming values are operands of the recipe and
2750/// their operand index corresponds to the incoming predecessor block. If the
2751/// recipe is placed in an entry block to a (non-replicate) region, it must have
2752/// exactly 2 incoming values, the first from the predecessor of the region and
2753/// the second from the exiting block of the region.
2755 public VPPhiAccessors {
2756 /// Name to use for the generated IR instruction for the widened phi.
2757 std::string Name;
2758
2759public:
2760 /// Create a new VPWidenPHIRecipe with incoming values \p IncomingValues,
2761 /// debug location \p DL and \p Name.
2763 DebugLoc DL = DebugLoc::getUnknown(), const Twine &Name = "")
2764 : VPSingleDefRecipe(VPRecipeBase::VPWidenPHISC, IncomingValues,
2765 IncomingValues[0]->getScalarType(),
2766 /*UV=*/nullptr, DL),
2767 Name(Name.str()) {
2768 assert(all_of(IncomingValues,
2769 [this](VPValue *VPV) {
2770 return VPV->getScalarType() == getScalarType();
2771 }) &&
2772 "all incoming values must have the same type");
2773 }
2774
2776 return new VPWidenPHIRecipe(operands(), getDebugLoc(), Name);
2777 }
2778
2779 ~VPWidenPHIRecipe() override = default;
2780
2781 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenPHISC)
2782
2783 /// Generate the phi/select nodes.
2784 void execute(VPTransformState &State) override;
2785
2786 /// Return the cost of this VPWidenPHIRecipe.
2788 VPCostContext &Ctx) const override;
2789
2790protected:
2791#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2792 /// Print the recipe.
2793 void printRecipe(raw_ostream &O, const Twine &Indent,
2794 VPSlotTracker &SlotTracker) const override;
2795#endif
2796
2797 const VPRecipeBase *getAsRecipe() const override { return this; }
2798};
2799
2800/// A recipe for handling first-order recurrence phis. The start value is the
2801/// first operand of the recipe and the incoming value from the backedge is the
2802/// second operand.
2805 VPValue &BackedgeValue)
2806 : VPHeaderPHIRecipe(VPRecipeBase::VPFirstOrderRecurrencePHISC, Phi,
2807 &Start) {
2808 addOperand(&BackedgeValue);
2809 }
2810
2811 VP_CLASSOF_IMPL(VPRecipeBase::VPFirstOrderRecurrencePHISC)
2812
2817
2818 void execute(VPTransformState &State) override;
2819
2820 /// Return the cost of this first-order recurrence phi recipe.
2822 VPCostContext &Ctx) const override;
2823
2824 /// Returns true if the recipe only uses the first lane of operand \p Op.
2825 bool usesFirstLaneOnly(const VPValue *Op) const override {
2827 "Op must be an operand of the recipe");
2828 return Op == getStartValue();
2829 }
2830
2831protected:
2832#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2833 /// Print the recipe.
2834 void printRecipe(raw_ostream &O, const Twine &Indent,
2835 VPSlotTracker &SlotTracker) const override;
2836#endif
2837};
2838
2839/// Possible variants of a reduction.
2840
2841/// This reduction is ordered and in-loop.
2842struct RdxOrdered {};
2843/// This reduction is in-loop.
2844struct RdxInLoop {};
2845/// This reduction is unordered with the partial result scaled down by some
2846/// factor.
2849};
2850using ReductionStyle = std::variant<RdxOrdered, RdxInLoop, RdxUnordered>;
2851
2852inline ReductionStyle getReductionStyle(bool InLoop, bool Ordered,
2853 unsigned ScaleFactor) {
2854 assert((!Ordered || InLoop) && "Ordered implies in-loop");
2855 if (Ordered)
2856 return RdxOrdered{};
2857 if (InLoop)
2858 return RdxInLoop{};
2859 return RdxUnordered{/*VFScaleFactor=*/ScaleFactor};
2860}
2861
2862/// A recipe for handling reduction phis. The start value is the first operand
2863/// of the recipe and the incoming value from the backedge is the second
2864/// operand.
2866 /// The recurrence kind of the reduction.
2867 const RecurKind Kind;
2868
2869 ReductionStyle Style;
2870
2871 /// The phi is part of a multi-use reduction (e.g., used in FindIV
2872 /// patterns for argmin/argmax).
2873 /// TODO: Also support cases where the phi itself has a single use, but its
2874 /// compare has multiple uses.
2875 bool HasUsesOutsideReductionChain;
2876
2877public:
2878 /// Create a new VPReductionPHIRecipe for the reduction \p Phi.
2880 VPValue &BackedgeValue, ReductionStyle Style,
2881 const VPIRFlags &Flags,
2882 bool HasUsesOutsideReductionChain = false)
2883 : VPHeaderPHIRecipe(VPRecipeBase::VPReductionPHISC, Phi, &Start),
2884 VPIRFlags(Flags), Kind(Kind), Style(Style),
2885 HasUsesOutsideReductionChain(HasUsesOutsideReductionChain) {
2886 addOperand(&BackedgeValue);
2887 }
2888
2889 ~VPReductionPHIRecipe() override = default;
2890
2892 VPValue *BackedgeValue) {
2893 return new VPReductionPHIRecipe(
2895 *Start, *BackedgeValue, Style, *this, HasUsesOutsideReductionChain);
2896 }
2897
2901
2902 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionPHISC)
2903
2904 /// Generate the phi/select nodes.
2905 void execute(VPTransformState &State) override;
2906
2907 /// Get the factor that the VF of this recipe's output should be scaled by, or
2908 /// 1 if it isn't scaled.
2909 unsigned getVFScaleFactor() const {
2910 auto *Partial = std::get_if<RdxUnordered>(&Style);
2911 return Partial ? Partial->VFScaleFactor : 1;
2912 }
2913
2914 /// Set the VFScaleFactor for this reduction phi. Can only be set to a factor
2915 /// > 1.
2916 void setVFScaleFactor(unsigned ScaleFactor) {
2917 assert(ScaleFactor > 1 && "must set to scale factor > 1");
2918 Style = RdxUnordered{ScaleFactor};
2919 }
2920
2921 /// Returns the number of incoming values, also number of incoming blocks.
2922 /// Note that at the moment, VPWidenPointerInductionRecipe only has a single
2923 /// incoming value, its start value.
2924 unsigned getNumIncoming() const override { return 2; }
2925
2926 /// Returns the recurrence kind of the reduction.
2927 RecurKind getRecurrenceKind() const { return Kind; }
2928
2929 /// Returns true, if the phi is part of an ordered reduction.
2930 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); }
2931
2932 /// Returns true if the phi is part of an in-loop reduction.
2933 bool isInLoop() const {
2934 return std::holds_alternative<RdxInLoop>(Style) ||
2935 std::holds_alternative<RdxOrdered>(Style);
2936 }
2937
2938 /// Returns true if the reduction outputs a vector with a scaled down VF.
2939 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
2940
2941 /// Returns true, if the phi is part of a multi-use reduction.
2943 return HasUsesOutsideReductionChain;
2944 }
2945
2946 /// Returns true if the recipe only uses the first lane of operand \p Op.
2947 bool usesFirstLaneOnly(const VPValue *Op) const override {
2949 "Op must be an operand of the recipe");
2950 return isOrdered() || isInLoop();
2951 }
2952
2953protected:
2954#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2955 /// Print the recipe.
2956 void printRecipe(raw_ostream &O, const Twine &Indent,
2957 VPSlotTracker &SlotTracker) const override;
2958#endif
2959};
2960
2961/// A recipe for vectorizing a phi-node as a sequence of mask-based select
2962/// instructions.
2964public:
2965 /// The blend operation is a User of the incoming values and of their
2966 /// respective masks, ordered [I0, M0, I1, M1, I2, M2, ...]. Note that M0 can
2967 /// be omitted (implied by passing an odd number of operands) in which case
2968 /// all other incoming values are merged into it.
2970 const VPIRFlags &Flags, DebugLoc DL)
2971 : VPRecipeWithIRFlags(VPRecipeBase::VPBlendSC, Operands,
2972 Operands[0]->getScalarType(), Flags, DL) {
2973 assert(Operands.size() >= 2 && "Expected at least two operands!");
2975 [this](unsigned I) {
2976 return getIncomingValue(I)->getScalarType() ==
2977 getScalarType();
2978 }) &&
2979 "all incoming values must have the same type");
2981 [this](unsigned I) {
2982 return getMask(I)->getScalarType()->isIntegerTy(1);
2983 }) &&
2984 "masks must be a bool");
2985 setUnderlyingValue(Phi);
2986 }
2987
2989
2992 NewOperands, *this, getDebugLoc());
2993 }
2994
2995 VP_CLASSOF_IMPL(VPRecipeBase::VPBlendSC)
2996
2997 /// A normalized blend is one that has an odd number of operands, whereby the
2998 /// first operand does not have an associated mask.
2999 bool isNormalized() const { return getNumOperands() % 2; }
3000
3001 /// Return the number of incoming values, taking into account when normalized
3002 /// the first incoming value will have no mask.
3003 unsigned getNumIncomingValues() const {
3004 return (getNumOperands() + isNormalized()) / 2;
3005 }
3006
3007 /// Return incoming value number \p Idx.
3008 VPValue *getIncomingValue(unsigned Idx) const {
3009 return Idx == 0 ? getOperand(0) : getOperand(Idx * 2 - isNormalized());
3010 }
3011
3012 /// Return mask number \p Idx.
3013 VPValue *getMask(unsigned Idx) const {
3014 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
3015 return Idx == 0 ? getOperand(1) : getOperand(Idx * 2 + !isNormalized());
3016 }
3017
3018 /// Set mask number \p Idx to \p V.
3019 void setMask(unsigned Idx, VPValue *V) {
3020 assert((Idx > 0 || !isNormalized()) && "First index has no mask!");
3021 assert(V->getScalarType()->isIntegerTy(1) && "Mask must be an i1 (vector)");
3022 Idx == 0 ? setOperand(1, V) : setOperand(Idx * 2 + !isNormalized(), V);
3023 }
3024
3025 void execute(VPTransformState &State) override {
3026 llvm_unreachable("VPBlendRecipe should be expanded by simplifyBlends");
3027 }
3028
3029 /// Return the cost of this VPWidenMemoryRecipe.
3030 InstructionCost computeCost(ElementCount VF,
3031 VPCostContext &Ctx) const override;
3032
3033 /// Returns true if the recipe only uses the first lane of operand \p Op.
3034 bool usesFirstLaneOnly(const VPValue *Op) const override;
3035
3036protected:
3037#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3038 /// Print the recipe.
3039 void printRecipe(raw_ostream &O, const Twine &Indent,
3040 VPSlotTracker &SlotTracker) const override;
3041#endif
3042};
3043
3044/// A common base class for interleaved memory operations.
3045/// An Interleaved memory operation is a memory access method that combines
3046/// multiple strided loads/stores into a single wide load/store with shuffles.
3047/// The first operand is the start address. The optional operands are, in order,
3048/// the stored values and the mask.
3050 public VPIRMetadata {
3052
3053 /// Indicates if the interleave group is in a conditional block and requires a
3054 /// mask.
3055 bool HasMask = false;
3056
3057 /// Indicates if gaps between members of the group need to be masked out or if
3058 /// unusued gaps can be loaded speculatively.
3059 bool NeedsMaskForGaps = false;
3060
3061protected:
3062 VPInterleaveBase(const unsigned char SC,
3064 ArrayRef<VPValue *> Operands,
3065 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3066 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3067 : VPRecipeBase(SC, Operands, DL), VPIRMetadata(MD), IG(IG),
3068 NeedsMaskForGaps(NeedsMaskForGaps) {
3069 // TODO: extend the masked interleaved-group support to reversed access.
3070 assert((!Mask || !IG->isReverse()) &&
3071 "Reversed masked interleave-group not supported.");
3072 if (StoredValues.empty()) {
3073 for (Instruction *Inst : IG->members()) {
3074 assert(!Inst->getType()->isVoidTy() && "must have result");
3075 new VPMultiDefValue(this, Inst, Inst->getType());
3076 }
3077 } else {
3078 for (auto *SV : StoredValues)
3079 addOperand(SV);
3080 }
3081 if (Mask) {
3082 HasMask = true;
3083 addOperand(Mask);
3084 }
3085 }
3086
3087public:
3088 VPInterleaveBase *clone() override = 0;
3089
3090 static inline bool classof(const VPRecipeBase *R) {
3091 return R->getVPRecipeID() == VPRecipeBase::VPInterleaveSC ||
3092 R->getVPRecipeID() == VPRecipeBase::VPInterleaveEVLSC;
3093 }
3094
3095 static inline bool classof(const VPUser *U) {
3096 auto *R = dyn_cast<VPRecipeBase>(U);
3097 return R && classof(R);
3098 }
3099
3100 /// Return the address accessed by this recipe.
3101 VPValue *getAddr() const {
3102 return getOperand(0); // Address is the 1st, mandatory operand.
3103 }
3104
3105 /// Return the mask used by this recipe. Note that a full mask is represented
3106 /// by a nullptr.
3107 VPValue *getMask() const {
3108 // Mask is optional and the last operand.
3109 return HasMask ? getOperand(getNumOperands() - 1) : nullptr;
3110 }
3111
3112 /// Return true if the access needs a mask because of the gaps.
3113 bool needsMaskForGaps() const { return NeedsMaskForGaps; }
3114
3116
3117 Instruction *getInsertPos() const { return IG->getInsertPos(); }
3118
3119 void execute(VPTransformState &State) override {
3120 llvm_unreachable("VPInterleaveBase should not be instantiated.");
3121 }
3122
3123 /// Return the cost of this recipe.
3124 InstructionCost computeCost(ElementCount VF,
3125 VPCostContext &Ctx) const override;
3126
3127 /// Returns true if the recipe only uses the first lane of operand \p Op.
3128 bool usesFirstLaneOnly(const VPValue *Op) const override = 0;
3129
3130 /// Returns the number of stored operands of this interleave group. Returns 0
3131 /// for load interleave groups.
3132 virtual unsigned getNumStoreOperands() const = 0;
3133
3134 /// Return the VPValues stored by this interleave group. If it is a load
3135 /// interleave group, return an empty ArrayRef.
3137 return {op_end() - (getNumStoreOperands() + (HasMask ? 1 : 0)),
3139 }
3140};
3141
3142/// VPInterleaveRecipe is a recipe for transforming an interleave group of load
3143/// or stores into one wide load/store and shuffles. The first operand of a
3144/// VPInterleave recipe is the address, followed by the stored values, followed
3145/// by an optional mask.
3147public:
3149 ArrayRef<VPValue *> StoredValues, VPValue *Mask,
3150 bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
3151 : VPInterleaveBase(VPRecipeBase::VPInterleaveSC, IG, Addr, StoredValues,
3152 Mask, NeedsMaskForGaps, MD, DL) {}
3153
3154 ~VPInterleaveRecipe() override = default;
3155
3159 needsMaskForGaps(), *this, getDebugLoc());
3160 }
3161
3162 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveSC)
3163
3164 /// Generate the wide load or store, and shuffles.
3165 void execute(VPTransformState &State) override;
3166
3167 bool usesFirstLaneOnly(const VPValue *Op) const override {
3169 "Op must be an operand of the recipe");
3170 return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op);
3171 }
3172
3173 unsigned getNumStoreOperands() const override {
3174 return getNumOperands() - (getMask() ? 2 : 1);
3175 }
3176
3177protected:
3178#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3179 /// Print the recipe.
3180 void printRecipe(raw_ostream &O, const Twine &Indent,
3181 VPSlotTracker &SlotTracker) const override;
3182#endif
3183};
3184
3185/// A recipe for interleaved memory operations with vector-predication
3186/// intrinsics. The first operand is the address, the second operand is the
3187/// explicit vector length. Stored values and mask are optional operands.
3189public:
3191 : VPInterleaveBase(VPRecipeBase::VPInterleaveEVLSC,
3192 R.getInterleaveGroup(), {R.getAddr(), &EVL},
3193 R.getStoredValues(), Mask, R.needsMaskForGaps(), R,
3194 R.getDebugLoc()) {
3195 assert(!getInterleaveGroup()->isReverse() &&
3196 "Reversed interleave-group with tail folding is not supported.");
3197 assert(!needsMaskForGaps() && "Interleaved access with gap mask is not "
3198 "supported for scalable vector.");
3199 }
3200
3201 ~VPInterleaveEVLRecipe() override = default;
3202
3204 llvm_unreachable("cloning not implemented yet");
3205 }
3206
3207 VP_CLASSOF_IMPL(VPRecipeBase::VPInterleaveEVLSC)
3208
3209 /// The VPValue of the explicit vector length.
3210 VPValue *getEVL() const { return getOperand(1); }
3211
3212 /// Generate the wide load or store, and shuffles.
3213 void execute(VPTransformState &State) override;
3214
3215 /// The recipe only uses the first lane of the address, and EVL operand.
3216 bool usesFirstLaneOnly(const VPValue *Op) const override {
3218 "Op must be an operand of the recipe");
3219 return (Op == getAddr() && !llvm::is_contained(getStoredValues(), Op)) ||
3220 Op == getEVL();
3221 }
3222
3223 unsigned getNumStoreOperands() const override {
3224 return getNumOperands() - (getMask() ? 3 : 2);
3225 }
3226
3227protected:
3228#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3229 /// Print the recipe.
3230 void printRecipe(raw_ostream &O, const Twine &Indent,
3231 VPSlotTracker &SlotTracker) const override;
3232#endif
3233};
3234
3235/// A recipe to represent inloop, ordered or partial reduction operations. It
3236/// performs a reduction on a vector operand into a scalar (vector in the case
3237/// of a partial reduction) value, and adds the result to a chain. The Operands
3238/// are {ChainOp, VecOp, [Condition]}.
3240
3241 /// The recurrence kind for the reduction in question.
3242 RecurKind RdxKind;
3243 /// Whether the reduction is conditional.
3244 bool IsConditional = false;
3245 ReductionStyle Style;
3246
3247protected:
3248 VPReductionRecipe(const unsigned char SC, RecurKind RdxKind,
3250 ArrayRef<VPValue *> Operands, VPValue *CondOp,
3251 ReductionStyle Style, DebugLoc DL)
3252 : VPRecipeWithIRFlags(SC, Operands, Operands[0]->getScalarType(), FMFs,
3253 DL),
3254 RdxKind(RdxKind), Style(Style) {
3255 assert(all_of(Operands,
3256 [this](VPValue *VPV) {
3257 return VPV->getScalarType() == getScalarType() ||
3258 (isa<VPInstruction>(VPV) &&
3259 cast<VPInstruction>(VPV)->getOpcode() ==
3261 }) &&
3262 "all incoming values must have the same type");
3263 if (CondOp) {
3264 assert(CondOp->getScalarType()->isIntegerTy(1) &&
3265 "CondOp must be a bool");
3266 IsConditional = true;
3267 addOperand(CondOp);
3268 }
3270 }
3271
3272public:
3274 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3276 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, I,
3277 {ChainOp, VecOp}, CondOp, Style, DL) {}
3278
3280 VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp,
3282 : VPReductionRecipe(VPRecipeBase::VPReductionSC, RdxKind, FMFs, nullptr,
3283 {ChainOp, VecOp}, CondOp, Style, DL) {}
3284
3285 ~VPReductionRecipe() override = default;
3286
3288 return new VPReductionRecipe(RdxKind, getFastMathFlags(),
3290 getCondOp(), Style, getDebugLoc());
3291 }
3292
3293 static inline bool classof(const VPRecipeBase *R) {
3294 return R->getVPRecipeID() == VPRecipeBase::VPReductionSC ||
3295 R->getVPRecipeID() == VPRecipeBase::VPReductionEVLSC;
3296 }
3297
3298 static inline bool classof(const VPUser *U) {
3299 auto *R = dyn_cast<VPRecipeBase>(U);
3300 return R && classof(R);
3301 }
3302
3303 static inline bool classof(const VPValue *VPV) {
3304 const VPRecipeBase *R = VPV->getDefiningRecipe();
3305 return R && classof(R);
3306 }
3307
3308 static inline bool classof(const VPSingleDefRecipe *R) {
3309 return classof(static_cast<const VPRecipeBase *>(R));
3310 }
3311
3312 /// Generate the reduction in the loop.
3313 void execute(VPTransformState &State) override;
3314
3315 /// Return the cost of VPReductionRecipe.
3316 InstructionCost computeCost(ElementCount VF,
3317 VPCostContext &Ctx) const override;
3318
3319 /// Return the recurrence kind for the in-loop reduction.
3320 RecurKind getRecurrenceKind() const { return RdxKind; }
3321 /// Return true if the in-loop reduction is ordered.
3322 bool isOrdered() const { return std::holds_alternative<RdxOrdered>(Style); };
3323 /// Return true if the in-loop reduction is conditional.
3324 bool isConditional() const { return IsConditional; };
3325 /// Returns true if the reduction outputs a vector with a scaled down VF.
3326 bool isPartialReduction() const { return getVFScaleFactor() > 1; }
3327 /// Returns true if the reduction is in-loop.
3328 bool isInLoop() const {
3329 return std::holds_alternative<RdxInLoop>(Style) ||
3330 std::holds_alternative<RdxOrdered>(Style);
3331 }
3332 /// The VPValue of the scalar Chain being accumulated.
3333 VPValue *getChainOp() const { return getOperand(0); }
3334 /// The VPValue of the vector value to be reduced.
3335 VPValue *getVecOp() const { return getOperand(1); }
3336 /// The VPValue of the condition for the block.
3338 return isConditional() ? getOperand(getNumOperands() - 1) : nullptr;
3339 }
3340 /// Get the factor that the VF of this recipe's output should be scaled by, or
3341 /// 1 if it isn't scaled.
3342 unsigned getVFScaleFactor() const {
3343 auto *Partial = std::get_if<RdxUnordered>(&Style);
3344 return Partial ? Partial->VFScaleFactor : 1;
3345 }
3346
3347protected:
3348#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3349 /// Print the recipe.
3350 void printRecipe(raw_ostream &O, const Twine &Indent,
3351 VPSlotTracker &SlotTracker) const override;
3352#endif
3353};
3354
3355/// A recipe to represent inloop reduction operations with vector-predication
3356/// intrinsics, performing a reduction on a vector operand with the explicit
3357/// vector length (EVL) into a scalar value, and adding the result to a chain.
3358/// The Operands are {ChainOp, VecOp, EVL, [Condition]}.
3360public:
3363 : VPReductionRecipe(VPRecipeBase::VPReductionEVLSC, R.getRecurrenceKind(),
3364 R.getFastMathFlags(),
3366 {R.getChainOp(), R.getVecOp(), &EVL}, CondOp,
3367 getReductionStyle(/*InLoop=*/true, R.isOrdered(), 1),
3368 DL) {}
3369
3370 ~VPReductionEVLRecipe() override = default;
3371
3373 llvm_unreachable("cloning not implemented yet");
3374 }
3375
3376 VP_CLASSOF_IMPL(VPRecipeBase::VPReductionEVLSC)
3377
3378 /// Generate the reduction in the loop
3379 void execute(VPTransformState &State) override;
3380
3381 /// The VPValue of the explicit vector length.
3382 VPValue *getEVL() const { return getOperand(2); }
3383
3384 /// Returns true if the recipe only uses the first lane of operand \p Op.
3385 bool usesFirstLaneOnly(const VPValue *Op) const override {
3387 "Op must be an operand of the recipe");
3388 return Op == getEVL();
3389 }
3390
3391protected:
3392#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3393 /// Print the recipe.
3394 void printRecipe(raw_ostream &O, const Twine &Indent,
3395 VPSlotTracker &SlotTracker) const override;
3396#endif
3397};
3398
3399/// VPReplicateRecipe replicates a given instruction producing multiple scalar
3400/// copies of the original scalar type, one per lane, instead of producing a
3401/// single copy of widened type for all lanes. If the instruction is known to be
3402/// a single scalar, only one copy will be generated.
3404 public VPIRMetadata {
3405 /// Indicator if only a single replica per lane is needed.
3406 bool IsSingleScalar;
3407
3408 /// Indicator if the replicas are also predicated.
3409 bool IsPredicated;
3410
3411public:
3413 bool IsSingleScalar, VPValue *Mask = nullptr,
3414 const VPIRFlags &Flags = {}, VPIRMetadata Metadata = {},
3415 DebugLoc DL = DebugLoc::getUnknown())
3416 : VPRecipeWithIRFlags(VPRecipeBase::VPReplicateSC, Operands,
3417 computeScalarType(I, Operands), Flags, DL),
3418 VPIRMetadata(Metadata), IsSingleScalar(IsSingleScalar),
3419 IsPredicated(Mask) {
3420 setUnderlyingValue(I);
3421 if (Mask)
3422 addOperand(Mask);
3423 }
3424
3425 ~VPReplicateRecipe() override = default;
3426
3427 /// Compute the scalar result type for a VPReplicateRecipe wrapping \p I with
3428 /// \p Operands (excluding any predicate mask).
3429 static Type *computeScalarType(const Instruction *I,
3430 ArrayRef<VPValue *> Operands);
3431
3433
3435 auto *Copy = new VPReplicateRecipe(
3436 getUnderlyingInstr(), NewOperands, IsSingleScalar,
3437 isPredicated() ? getMask() : nullptr, *this, *this, getDebugLoc());
3438 Copy->transferFlags(*this);
3439 return Copy;
3440 }
3441
3442 VP_CLASSOF_IMPL(VPRecipeBase::VPReplicateSC)
3443
3444 /// Generate replicas of the desired Ingredient. Replicas will be generated
3445 /// for all parts and lanes unless a specific part and lane are specified in
3446 /// the \p State.
3447 void execute(VPTransformState &State) override;
3448
3449 /// Return the cost of this VPReplicateRecipe.
3450 InstructionCost computeCost(ElementCount VF,
3451 VPCostContext &Ctx) const override;
3452
3453 /// Return the cost of scalarizing a call to \p CalledFn with argument
3454 /// operands \p ArgOps for a given \p VF.
3455 static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy,
3457 bool IsSingleScalar, ElementCount VF,
3458 VPCostContext &Ctx);
3459
3460 bool isSingleScalar() const { return IsSingleScalar; }
3461
3462 bool isPredicated() const { return IsPredicated; }
3463
3464 /// Returns true if the recipe only uses the first lane of operand \p Op.
3465 bool usesFirstLaneOnly(const VPValue *Op) const override {
3467 "Op must be an operand of the recipe");
3468 return isSingleScalar();
3469 }
3470
3471 /// Returns true if the recipe uses scalars of operand \p Op.
3472 bool usesScalars(const VPValue *Op) const override {
3474 "Op must be an operand of the recipe");
3475 return true;
3476 }
3477
3478 /// Return the mask of a predicated VPReplicateRecipe.
3480 assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
3481 return getOperand(getNumOperands() - 1);
3482 }
3483
3484 unsigned getOpcode() const { return getUnderlyingInstr()->getOpcode(); }
3485
3486protected:
3487#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3488 /// Print the recipe.
3489 void printRecipe(raw_ostream &O, const Twine &Indent,
3490 VPSlotTracker &SlotTracker) const override;
3491#endif
3492};
3493
3494/// A recipe for generating conditional branches on the bits of a mask.
3496public:
3498 : VPRecipeBase(VPRecipeBase::VPBranchOnMaskSC, {BlockInMask}, DL) {}
3499
3502 }
3503
3504 VP_CLASSOF_IMPL(VPRecipeBase::VPBranchOnMaskSC)
3505
3506 /// Generate the extraction of the appropriate bit from the block mask and the
3507 /// conditional branch.
3508 void execute(VPTransformState &State) override;
3509
3510 /// Return the cost of this VPBranchOnMaskRecipe.
3511 InstructionCost computeCost(ElementCount VF,
3512 VPCostContext &Ctx) const override;
3513
3514#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3515 /// Print the recipe.
3516 void printRecipe(raw_ostream &O, const Twine &Indent,
3517 VPSlotTracker &SlotTracker) const override {
3518 O << Indent << "BRANCH-ON-MASK ";
3520 }
3521#endif
3522
3523 /// Returns true if the recipe uses scalars of operand \p Op.
3524 bool usesScalars(const VPValue *Op) const override {
3526 "Op must be an operand of the recipe");
3527 return true;
3528 }
3529};
3530
3531/// A recipe to combine multiple recipes into a single 'expression' recipe,
3532/// which should be considered a single entity for cost-modeling and transforms.
3533/// The recipe needs to be 'decomposed', i.e. replaced by its individual
3534/// expression recipes, before execute. The individual expression recipes are
3535/// completely disconnected from the def-use graph of other recipes not part of
3536/// the expression. Def-use edges between pairs of expression recipes remain
3537/// intact, whereas every edge between an expression recipe and a recipe outside
3538/// the expression is elevated to connect the non-expression recipe with the
3539/// VPExpressionRecipe itself.
3540class VPExpressionRecipe : public VPSingleDefRecipe {
3541 /// Recipes included in this VPExpressionRecipe. This could contain
3542 /// duplicates.
3543 SmallVector<VPSingleDefRecipe *> ExpressionRecipes;
3544
3545 /// Temporary VPValues used for external operands of the expression, i.e.
3546 /// operands not defined by recipes in the expression.
3547 SmallVector<VPValue *> LiveInPlaceholders;
3548
3549 enum class ExpressionTypes {
3550 /// Represents an inloop extended reduction operation, performing a
3551 /// reduction on an extended vector operand into a scalar value, and adding
3552 /// the result to a chain.
3553 ExtendedReduction,
3554 /// Represents an inloop extended reduction operation, which is negated,
3555 /// then reduced before adding the result to a chain.
3556 NegatedExtendedReduction,
3557 /// Represent an inloop multiply-accumulate reduction, multiplying the
3558 /// extended vector operands, performing a reduction.add on the result, and
3559 /// adding the scalar result to a chain.
3560 ExtMulAccReduction,
3561 /// Represent an inloop multiply-accumulate reduction, multiplying the
3562 /// vector operands, performing a reduction.add on the result, and adding
3563 /// the scalar result to a chain.
3564 MulAccReduction,
3565 /// Represent an inloop multiply-accumulate reduction, multiplying the
3566 /// extended vector operands, negating the multiplication, performing a
3567 /// reduction.add on the result, and adding the scalar result to a chain.
3568 ExtNegatedMulAccReduction,
3569 };
3570
3571 /// Type of the expression.
3572 ExpressionTypes ExpressionType;
3573
3574 /// Construct a new VPExpressionRecipe by internalizing recipes in \p
3575 /// ExpressionRecipes. External operands (i.e. not defined by another recipe
3576 /// in the expression) are replaced by temporary VPValues and the original
3577 /// operands are transferred to the VPExpressionRecipe itself. Clone recipes
3578 /// as needed (excluding last) to ensure they are only used by other recipes
3579 /// in the expression.
3580 VPExpressionRecipe(ExpressionTypes ExpressionType,
3581 ArrayRef<VPSingleDefRecipe *> ExpressionRecipes);
3582
3583public:
3585 : VPExpressionRecipe(ExpressionTypes::ExtendedReduction, {Ext, Red}) {}
3587 VPReductionRecipe *Red)
3588 : VPExpressionRecipe(ExpressionTypes::NegatedExtendedReduction,
3589 {Ext, Neg, Red}) {
3590 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3591 Red->getRecurrenceKind() == RecurKind::FAdd) &&
3592 "Expected an add reduction");
3593 if (Neg->getOpcode() == Instruction::Sub) {
3594 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(1));
3595 assert(SubConst && SubConst->isZero() && "Expected a negating sub");
3596 } else
3597 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3598 }
3600 : VPExpressionRecipe(ExpressionTypes::MulAccReduction, {Mul, Red}) {}
3603 : VPExpressionRecipe(ExpressionTypes::ExtMulAccReduction,
3604 {Ext0, Ext1, Mul, Red}) {}
3607 VPReductionRecipe *Red)
3608 : VPExpressionRecipe(ExpressionTypes::ExtNegatedMulAccReduction,
3609 {Ext0, Ext1, Mul, Neg, Red}) {
3610 assert((Mul->getOpcode() == Instruction::Mul ||
3611 Mul->getOpcode() == Instruction::FMul) &&
3612 "Expected a mul");
3613 assert((Red->getRecurrenceKind() == RecurKind::Add ||
3614 Red->getRecurrenceKind() == RecurKind::FAdd) &&
3615 "Expected an add reduction");
3616 assert(getNumOperands() >= 3 && "Expected at least three operands");
3617 if (Neg->getOpcode() == Instruction::Sub) {
3618 [[maybe_unused]] auto *SubConst = dyn_cast<VPConstantInt>(getOperand(2));
3619 assert(SubConst && SubConst->isZero() &&
3620 Neg->getOpcode() == Instruction::Sub && "Expected a negating sub");
3621 } else
3622 assert(Neg->getOpcode() == Instruction::FNeg && "Unexpected opcode");
3623 }
3624
3626 SmallPtrSet<VPSingleDefRecipe *, 4> ExpressionRecipesSeen;
3627 for (auto *R : reverse(ExpressionRecipes)) {
3628 if (ExpressionRecipesSeen.insert(R).second)
3629 delete R;
3630 }
3631 for (VPValue *T : LiveInPlaceholders)
3632 delete T;
3633 }
3634
3635 VP_CLASSOF_IMPL(VPRecipeBase::VPExpressionSC)
3636
3637 VPExpressionRecipe *clone() override {
3638 assert(!ExpressionRecipes.empty() && "empty expressions should be removed");
3639 SmallVector<VPSingleDefRecipe *> NewExpressiondRecipes;
3640 for (auto *R : ExpressionRecipes)
3641 NewExpressiondRecipes.push_back(R->clone());
3642 for (auto *New : NewExpressiondRecipes) {
3643 for (const auto &[Idx, Old] : enumerate(ExpressionRecipes))
3644 New->replaceUsesOfWith(Old, NewExpressiondRecipes[Idx]);
3645 // Update placeholder operands in the cloned recipe to use the external
3646 // operands, to be internalized when the cloned expression is constructed.
3647 for (const auto &[Placeholder, OutsideOp] :
3648 zip(LiveInPlaceholders, operands()))
3649 New->replaceUsesOfWith(Placeholder, OutsideOp);
3650 }
3651 return new VPExpressionRecipe(ExpressionType, NewExpressiondRecipes);
3652 }
3653
3654 /// Return the VPValue to use to infer the result type of the recipe.
3656 unsigned OpIdx =
3657 cast<VPReductionRecipe>(ExpressionRecipes.back())->isConditional() ? 2
3658 : 1;
3659 return getOperand(getNumOperands() - OpIdx);
3660 }
3661
3662 /// Insert the recipes of the expression back into the VPlan, directly before
3663 /// the current recipe. Leaves the expression recipe empty, which must be
3664 /// removed before codegen.
3665 void decompose();
3666
3667 unsigned getVFScaleFactor() const {
3668 auto *PR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());
3669 return PR ? PR->getVFScaleFactor() : 1;
3670 }
3671
3672 /// Method for generating code, must not be called as this recipe is abstract.
3673 void execute(VPTransformState &State) override {
3674 llvm_unreachable("recipe must be removed before execute");
3675 }
3676
3678 VPCostContext &Ctx) const override;
3679
3680 /// Returns true if this expression contains recipes that may read from or
3681 /// write to memory.
3682 bool mayReadOrWriteMemory() const;
3683
3684 /// Returns true if this expression contains recipes that may have side
3685 /// effects.
3686 bool mayHaveSideEffects() const;
3687
3688 /// Returns true if this VPExpressionRecipe produces a single scalar.
3689 bool isVectorToScalar() const;
3690
3691protected:
3692#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3693 /// Print the recipe.
3694 void printRecipe(raw_ostream &O, const Twine &Indent,
3695 VPSlotTracker &SlotTracker) const override;
3696#endif
3697};
3698
3699/// VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when
3700/// control converges back from a Branch-on-Mask. The phi nodes are needed in
3701/// order to merge values that are set under such a branch and feed their uses.
3702/// The phi nodes can be scalar or vector depending on the users of the value.
3703/// This recipe works in concert with VPBranchOnMaskRecipe.
3705public:
3706 /// Construct a VPPredInstPHIRecipe given \p PredInst whose value needs a phi
3707 /// nodes after merging back from a Branch-on-Mask.
3709 : VPSingleDefRecipe(VPRecipeBase::VPPredInstPHISC, PredV,
3710 PredV->getScalarType(), /*UV=*/nullptr, DL) {}
3711 ~VPPredInstPHIRecipe() override = default;
3712
3714 return new VPPredInstPHIRecipe(getOperand(0), getDebugLoc());
3715 }
3716
3717 VP_CLASSOF_IMPL(VPRecipeBase::VPPredInstPHISC)
3718
3719 /// Generates phi nodes for live-outs (from a replicate region) as needed to
3720 /// retain SSA form.
3721 void execute(VPTransformState &State) override;
3722
3723 /// Return the cost of this VPPredInstPHIRecipe.
3725 VPCostContext &Ctx) const override {
3726 // TODO: Compute accurate cost after retiring the legacy cost model.
3727 return 0;
3728 }
3729
3730protected:
3731#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3732 /// Print the recipe.
3733 void printRecipe(raw_ostream &O, const Twine &Indent,
3734 VPSlotTracker &SlotTracker) const override;
3735#endif
3736};
3737
3738/// A common mixin class for widening memory operations. An optional mask can be
3739/// provided as the last operand.
3741protected:
3743
3744 /// Alignment information for this memory access.
3746
3747 /// Whether the accessed addresses are consecutive.
3749
3750 /// Whether the memory access is masked.
3751 bool IsMasked = false;
3752
3753 void setMask(VPValue *Mask) {
3754 assert(!IsMasked && "cannot re-set mask");
3755 if (!Mask)
3756 return;
3757 assert(Mask->getScalarType()->isIntegerTy(1) &&
3758 "Mask must be an i1 (vector)");
3759 getAsRecipe()->addOperand(Mask);
3760 IsMasked = true;
3761 }
3762
3767
3768public:
3769 virtual ~VPWidenMemoryRecipe() = default;
3770
3771 /// Return a VPRecipeBase* to the current object.
3773 virtual const VPRecipeBase *getAsRecipe() const = 0;
3774
3775 /// Return whether the loaded-from / stored-to addresses are consecutive.
3776 bool isConsecutive() const { return Consecutive; }
3777
3778 /// Return the address accessed by this recipe.
3779 VPValue *getAddr() const { return getAsRecipe()->getOperand(0); }
3780
3781 /// Returns true if the recipe is masked.
3782 bool isMasked() const { return IsMasked; }
3783
3784 /// Return the mask used by this recipe. Note that a full mask is represented
3785 /// by a nullptr.
3786 VPValue *getMask() const {
3787 // Mask is optional and therefore the last operand.
3788 const VPRecipeBase *R = getAsRecipe();
3789 return isMasked() ? R->getOperand(R->getNumOperands() - 1) : nullptr;
3790 }
3791
3792 /// Returns the alignment of the memory access.
3793 Align getAlign() const { return Alignment; }
3794
3795 /// Return the cost of this VPWidenMemoryRecipe.
3796 InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const;
3797
3799};
3800
3801/// A recipe for widening load operations, using the address to load from and an
3802/// optional mask.
3804 public VPWidenMemoryRecipe {
3806 bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
3807 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadSC, {Addr}, Load.getType(),
3808 &Load, DL),
3809 VPWidenMemoryRecipe(Load, Consecutive, Metadata) {
3810 setMask(Mask);
3811 }
3812
3815 getMask(), Consecutive, *this, getDebugLoc());
3816 }
3817
3818 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC);
3819
3820 /// Generate a wide load or gather.
3821 void execute(VPTransformState &State) override;
3822
3823 /// Return the cost of this VPWidenLoadRecipe.
3825 VPCostContext &Ctx) const override {
3826 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3827 }
3828
3829 /// Returns true if the recipe only uses the first lane of operand \p Op.
3830 bool usesFirstLaneOnly(const VPValue *Op) const override {
3832 "Op must be an operand of the recipe");
3833 // Widened, consecutive loads operations only demand the first lane of
3834 // their address.
3835 return Op == getAddr() && isConsecutive();
3836 }
3837
3838protected:
3839 VPRecipeBase *getAsRecipe() override { return this; }
3840 const VPRecipeBase *getAsRecipe() const override { return this; }
3841
3842#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3843 /// Print the recipe.
3844 void printRecipe(raw_ostream &O, const Twine &Indent,
3845 VPSlotTracker &SlotTracker) const override;
3846#endif
3847};
3848
3849/// A recipe for widening load operations with vector-predication intrinsics,
3850/// using the address to load from, the explicit vector length and an optional
3851/// mask.
3853 public VPWidenMemoryRecipe {
3855 VPValue *Mask)
3856 : VPSingleDefRecipe(VPRecipeBase::VPWidenLoadEVLSC, {Addr, &EVL},
3857 L.getIngredient().getType(), &L.getIngredient(),
3858 L.getDebugLoc()),
3859 VPWidenMemoryRecipe(L.getIngredient(), L.isConsecutive(), L) {
3860 setMask(Mask);
3861 }
3862
3864 llvm_unreachable("cloning not supported");
3865 }
3866
3867 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadEVLSC)
3868
3869 /// Return the EVL operand.
3870 VPValue *getEVL() const { return getOperand(1); }
3871
3872 /// Generate the wide load or gather.
3873 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3874
3875 /// Return the cost of this VPWidenLoadEVLRecipe.
3877 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3878
3879 /// Returns true if the recipe only uses the first lane of operand \p Op.
3880 bool usesFirstLaneOnly(const VPValue *Op) const override {
3882 "Op must be an operand of the recipe");
3883 // Widened loads only demand the first lane of EVL and consecutive loads
3884 // only demand the first lane of their address.
3885 return Op == getEVL() || (Op == getAddr() && isConsecutive());
3886 }
3887
3888protected:
3889 VPRecipeBase *getAsRecipe() override { return this; }
3890 const VPRecipeBase *getAsRecipe() const override { return this; }
3891
3892#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3893 /// Print the recipe.
3894 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
3895 VPSlotTracker &SlotTracker) const override;
3896#endif
3897};
3898
3899/// A recipe for widening store operations, using the stored value, the address
3900/// to store to and an optional mask.
3902 public VPWidenMemoryRecipe {
3903 VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal,
3904 VPValue *Mask, bool Consecutive,
3905 const VPIRMetadata &Metadata, DebugLoc DL)
3906 : VPRecipeBase(VPRecipeBase::VPWidenStoreSC, {Addr, StoredVal}, DL),
3907 VPWidenMemoryRecipe(Store, Consecutive, Metadata) {
3908 setMask(Mask);
3909 }
3910
3914 *this, getDebugLoc());
3915 }
3916
3917 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC);
3918
3919 /// Return the value stored by this recipe.
3920 VPValue *getStoredValue() const { return getOperand(1); }
3921
3922 /// Generate a wide store or scatter.
3923 void execute(VPTransformState &State) override;
3924
3925 /// Return the cost of this VPWidenStoreRecipe.
3927 VPCostContext &Ctx) const override {
3928 return VPWidenMemoryRecipe::computeCost(VF, Ctx);
3929 }
3930
3931 /// Returns true if the recipe only uses the first lane of operand \p Op.
3932 bool usesFirstLaneOnly(const VPValue *Op) const override {
3934 "Op must be an operand of the recipe");
3935 // Widened, consecutive stores only demand the first lane of their address,
3936 // unless the same operand is also stored.
3937 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3938 }
3939
3940protected:
3941 VPRecipeBase *getAsRecipe() override { return this; }
3942 const VPRecipeBase *getAsRecipe() const override { return this; }
3943
3944#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3945 /// Print the recipe.
3946 void printRecipe(raw_ostream &O, const Twine &Indent,
3947 VPSlotTracker &SlotTracker) const override;
3948#endif
3949};
3950
3951/// A recipe for widening store operations with vector-predication intrinsics,
3952/// using the value to store, the address to store to, the explicit vector
3953/// length and an optional mask.
3955 public VPWidenMemoryRecipe {
3957 VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
3958 : VPRecipeBase(VPRecipeBase::VPWidenStoreEVLSC, {Addr, StoredVal, &EVL},
3959 S.getDebugLoc()),
3960 VPWidenMemoryRecipe(S.getIngredient(), S.isConsecutive(), S) {
3961 setMask(Mask);
3962 }
3963
3965 llvm_unreachable("cloning not supported");
3966 }
3967
3968 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreEVLSC)
3969
3970 /// Return the address accessed by this recipe.
3971 VPValue *getStoredValue() const { return getOperand(1); }
3972
3973 /// Return the EVL operand.
3974 VPValue *getEVL() const { return getOperand(2); }
3975
3976 /// Generate the wide store or scatter.
3977 LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override;
3978
3979 /// Return the cost of this VPWidenStoreEVLRecipe.
3981 computeCost(ElementCount VF, VPCostContext &Ctx) const override;
3982
3983 /// Returns true if the recipe only uses the first lane of operand \p Op.
3984 bool usesFirstLaneOnly(const VPValue *Op) const override {
3986 "Op must be an operand of the recipe");
3987 if (Op == getEVL()) {
3988 assert(getStoredValue() != Op && "unexpected store of EVL");
3989 return true;
3990 }
3991 // Widened, consecutive memory operations only demand the first lane of
3992 // their address, unless the same operand is also stored. That latter can
3993 // happen with opaque pointers.
3994 return Op == getAddr() && isConsecutive() && Op != getStoredValue();
3995 }
3996
3997protected:
3998 VPRecipeBase *getAsRecipe() override { return this; }
3999 const VPRecipeBase *getAsRecipe() const override { return this; }
4000
4001#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4002 /// Print the recipe.
4003 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4004 VPSlotTracker &SlotTracker) const override;
4005#endif
4006};
4007
4008/// Recipe to expand a SCEV expression.
4010 const SCEV *Expr;
4011
4012public:
4013 VPExpandSCEVRecipe(const SCEV *Expr);
4014
4015 ~VPExpandSCEVRecipe() override = default;
4016
4017 VPExpandSCEVRecipe *clone() override { return new VPExpandSCEVRecipe(Expr); }
4018
4019 VP_CLASSOF_IMPL(VPRecipeBase::VPExpandSCEVSC)
4020
4021 void execute(VPTransformState &State) override {
4022 llvm_unreachable("SCEV expressions must be expanded before final execute");
4023 }
4024
4025 /// Return the cost of this VPExpandSCEVRecipe.
4027 VPCostContext &Ctx) const override {
4028 // TODO: Compute accurate cost after retiring the legacy cost model.
4029 return 0;
4030 }
4031
4032 const SCEV *getSCEV() const { return Expr; }
4033
4034protected:
4035#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4036 /// Print the recipe.
4037 void printRecipe(raw_ostream &O, const Twine &Indent,
4038 VPSlotTracker &SlotTracker) const override;
4039#endif
4040};
4041
4042/// A recipe for generating the active lane mask for the vector loop that is
4043/// used to predicate the vector operations.
4045public:
4047 : VPHeaderPHIRecipe(VPRecipeBase::VPActiveLaneMaskPHISC, nullptr,
4048 StartMask, DL) {}
4049
4050 ~VPActiveLaneMaskPHIRecipe() override = default;
4051
4054 if (getNumOperands() == 2)
4055 R->addBackedgeValue(getOperand(1));
4056 return R;
4057 }
4058
4059 VP_CLASSOF_IMPL(VPRecipeBase::VPActiveLaneMaskPHISC)
4060
4061 /// Generate the active lane mask phi of the vector loop.
4062 void execute(VPTransformState &State) override;
4063
4064protected:
4065#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4066 /// Print the recipe.
4067 void printRecipe(raw_ostream &O, const Twine &Indent,
4068 VPSlotTracker &SlotTracker) const override;
4069#endif
4070};
4071
4072/// A recipe for generating the phi node tracking the current scalar iteration
4073/// index. It starts at the start value of the canonical induction and gets
4074/// incremented by the number of scalar iterations processed by the vector loop
4075/// iteration. The increment does not have to be loop invariant.
4077public:
4079 : VPHeaderPHIRecipe(VPRecipeBase::VPCurrentIterationPHISC, nullptr,
4080 StartIV, DL) {}
4081
4082 ~VPCurrentIterationPHIRecipe() override = default;
4083
4085 llvm_unreachable("cloning not implemented yet");
4086 }
4087
4088 VP_CLASSOF_IMPL(VPRecipeBase::VPCurrentIterationPHISC)
4089
4090 void execute(VPTransformState &State) override {
4091 llvm_unreachable("cannot execute this recipe, should be replaced by a "
4092 "scalar phi recipe");
4093 }
4094
4095 /// Return the cost of this VPCurrentIterationPHIRecipe.
4097 VPCostContext &Ctx) const override {
4098 // For now, match the behavior of the legacy cost model.
4099 return 0;
4100 }
4101
4102 /// Returns true if the recipe only uses the first lane of operand \p Op.
4103 bool usesFirstLaneOnly(const VPValue *Op) const override {
4105 "Op must be an operand of the recipe");
4106 return true;
4107 }
4108
4109protected:
4110#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4111 /// Print the recipe.
4112 LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent,
4113 VPSlotTracker &SlotTracker) const override;
4114#endif
4115};
4116
4117/// A Recipe for widening the canonical induction variable of the vector loop.
4118/// First operand is the canonical IV recipe, a second step operand (VF * Part)
4119/// is added during unrolling.
4121public:
4123 const VPIRFlags::WrapFlagsTy &Flags = {false, false})
4124 : VPRecipeWithIRFlags(VPRecipeBase::VPWidenCanonicalIVSC, CanonicalIV,
4125 CanonicalIV->getType(), Flags) {}
4126
4127 ~VPWidenCanonicalIVRecipe() override = default;
4128
4130 auto *WideCanIV =
4132 if (VPValue *Step = getStepValue())
4133 WideCanIV->addPerPartStep(Step);
4134 return WideCanIV;
4135 }
4136
4137 VP_CLASSOF_IMPL(VPRecipeBase::VPWidenCanonicalIVSC)
4138
4139 void execute(VPTransformState &State) override {
4140 llvm_unreachable("Expected prior expansion of WidenCanonicalIV recipes");
4141 }
4142
4143 /// Return the cost of this VPWidenCanonicalIVPHIRecipe.
4145 VPCostContext &Ctx) const override {
4146 // TODO: Compute accurate cost after retiring the legacy cost model.
4147 return 0;
4148 }
4149
4150 /// Return the canonical IV being widened.
4154
4156 return getNumOperands() == 2 ? getOperand(1) : nullptr;
4157 }
4158
4159 /// Add the per-part step (VF * Part) used for unrolled parts.
4161 assert(Step->getScalarType() == getScalarType() &&
4162 "per-part step must have the same type as the canonical IV");
4163 VPUser::addOperand(Step);
4164 }
4165
4166protected:
4167#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4168 /// Print the recipe.
4169 void printRecipe(raw_ostream &O, const Twine &Indent,
4170 VPSlotTracker &SlotTracker) const override;
4171#endif
4172};
4173
4174/// A recipe for converting the input value \p IV value to the corresponding
4175/// value of an IV with different start and step values, using Start + IV *
4176/// Step.
4178 /// Kind of the induction.
4180 /// If not nullptr, the floating point induction binary operator. Must be set
4181 /// for floating point inductions.
4182 const FPMathOperator *FPBinOp;
4183
4184public:
4186 VPValue *CanonicalIV, VPValue *Step)
4188 IndDesc.getKind(),
4189 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
4190 Start, CanonicalIV, Step) {}
4191
4193 const FPMathOperator *FPBinOp, VPIRValue *Start,
4194 VPValue *IV, VPValue *Step)
4195 : VPSingleDefRecipe(VPRecipeBase::VPDerivedIVSC, {Start, IV, Step},
4196 Start->getScalarType(), nullptr),
4197 Kind(Kind), FPBinOp(FPBinOp) {}
4198
4199 ~VPDerivedIVRecipe() override = default;
4200
4202 return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1),
4203 getStepValue());
4204 }
4205
4206 VP_CLASSOF_IMPL(VPRecipeBase::VPDerivedIVSC)
4207
4208 void execute(VPTransformState &State) override {
4209 llvm_unreachable("Expected prior expansion of this recipe");
4210 }
4211
4212 /// Return the cost of this VPDerivedIVRecipe.
4214 VPCostContext &Ctx) const override;
4215
4217 VPValue *getIndex() const { return getOperand(1); }
4218 VPValue *getStepValue() const { return getOperand(2); }
4219 const FPMathOperator *getFPBinOp() const { return FPBinOp; }
4221
4222 /// Returns true if the recipe only uses the first lane of operand \p Op.
4223 bool usesFirstLaneOnly(const VPValue *Op) const override {
4225 "Op must be an operand of the recipe");
4226 return true;
4227 }
4228
4229protected:
4230#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4231 /// Print the recipe.
4232 void printRecipe(raw_ostream &O, const Twine &Indent,
4233 VPSlotTracker &SlotTracker) const override;
4234#endif
4235};
4236
4237/// A recipe for handling phi nodes of integer and floating-point inductions,
4238/// producing their scalar values. Before unrolling by UF the recipe represents
4239/// the VF*UF scalar values to be produced, or UF scalar values if only first
4240/// lane is used, and has 3 operands: IV, step and VF. Unrolling adds one extra
4241/// operand StartIndex to all unroll parts except part 0, as the recipe
4242/// represents the VF scalar values (this number of values is taken from
4243/// State.VF rather than from the VF operand) starting at IV + StartIndex.
4245 Instruction::BinaryOps InductionOpcode;
4246
4247public:
4250 DebugLoc DL)
4251 : VPRecipeWithIRFlags(VPRecipeBase::VPScalarIVStepsSC, {IV, Step, VF},
4252 IV->getScalarType(), FMFs, DL),
4253 InductionOpcode(Opcode) {}
4254
4256 VPValue *Step, VPValue *VF,
4259 IV, Step, VF, IndDesc.getInductionOpcode(),
4260 dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
4261 ? IndDesc.getInductionBinOp()->getFastMathFlags()
4262 : FastMathFlags(),
4263 DL) {}
4264
4265 ~VPScalarIVStepsRecipe() override = default;
4266
4268 auto *NewR = new VPScalarIVStepsRecipe(getOperand(0), getOperand(1),
4269 getOperand(2), InductionOpcode,
4271 if (VPValue *StartIndex = getStartIndex())
4272 NewR->setStartIndex(StartIndex);
4273 return NewR;
4274 }
4275
4276 VP_CLASSOF_IMPL(VPRecipeBase::VPScalarIVStepsSC)
4277
4278 /// Generate the scalarized versions of the phi node as needed by their users.
4279 void execute(VPTransformState &State) override;
4280
4281 /// Return the cost of this VPScalarIVStepsRecipe.
4283 VPCostContext &Ctx) const override {
4284 // TODO: Compute accurate cost after retiring the legacy cost model.
4285 return 0;
4286 }
4287
4288 VPValue *getStepValue() const { return getOperand(1); }
4289
4290 /// Return the number of scalars to produce per unroll part, used to compute
4291 /// StartIndex during unrolling.
4292 VPValue *getVFValue() const { return getOperand(2); }
4293
4294 /// Return the StartIndex, or null if known to be zero, valid only after
4295 /// unrolling.
4297 return getNumOperands() == 4 ? getOperand(3) : nullptr;
4298 }
4299
4300 /// Set or add the StartIndex operand.
4301 void setStartIndex(VPValue *StartIndex) {
4302 if (getNumOperands() == 4)
4303 setOperand(3, StartIndex);
4304 else
4305 addOperand(StartIndex);
4306 }
4307
4308 /// Returns true if the recipe only uses the first lane of operand \p Op.
4309 bool usesFirstLaneOnly(const VPValue *Op) const override {
4311 "Op must be an operand of the recipe");
4312 return true;
4313 }
4314
4315 Instruction::BinaryOps getInductionOpcode() const { return InductionOpcode; }
4316
4317protected:
4318#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4319 /// Print the recipe.
4320 void printRecipe(raw_ostream &O, const Twine &Indent,
4321 VPSlotTracker &SlotTracker) const override;
4322#endif
4323};
4324
4325/// CastInfo helper for casting from VPRecipeBase to a mixin class that is not
4326/// part of the VPRecipeBase class hierarchy (e.g. VPPhiAccessors,
4327/// VPIRMetadata).
4328namespace vpdetail {
4329template <typename VPMixin, typename... RecipeTys>
4331 : public DefaultDoCastIfPossible<VPMixin *, VPRecipeBase *,
4332 CastInfoMixinImpl<VPMixin, RecipeTys...>> {
4333 static_assert((std::is_base_of_v<VPMixin, RecipeTys> && ...),
4334 "Each type in RecipeTys must derive from VPMixin");
4335
4336 /// Used by isa.
4337 static bool isPossible(VPRecipeBase *R) { return isa<RecipeTys...>(R); }
4338
4339 /// Used by cast.
4340 static VPMixin *doCast(VPRecipeBase *R) {
4341 VPMixin *Out = nullptr;
4342 ((Out = dyn_cast<RecipeTys>(R)) || ...);
4343 assert(Out && "Illegal recipe for cast");
4344 return Out;
4345 }
4346 static VPMixin *castFailed() { return nullptr; }
4347};
4348} // namespace vpdetail
4349
4350/// Support casting from VPRecipeBase -> VPPhiAccessors.
4351template <>
4355
4356template <>
4361template <>
4363 : public ForwardToPointerCast<VPPhiAccessors, VPRecipeBase *,
4364 CastInfo<VPPhiAccessors, VPRecipeBase *>> {};
4365
4366/// Support casting from VPRecipeBase / VPUser -> VPWidenMemoryRecipe.
4367template <>
4372template <>
4377
4378/// Support casting from VPRecipeBase -> VPIRMetadata.
4379template <>
4385
4386template <>
4391template <>
4393 : public ForwardToPointerCast<VPIRMetadata, VPRecipeBase *,
4394 CastInfo<VPIRMetadata, VPRecipeBase *>> {};
4395
4396/// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
4397/// holds a sequence of zero or more VPRecipe's each representing a sequence of
4398/// output IR instructions. All PHI-like recipes must come before any non-PHI recipes.
4399class LLVM_ABI_FOR_TEST VPBasicBlock : public VPBlockBase {
4400 friend class VPlan;
4401
4402 /// Use VPlan::createVPBasicBlock to create VPBasicBlocks.
4403 VPBasicBlock(const Twine &Name = "", VPRecipeBase *Recipe = nullptr)
4404 : VPBlockBase(VPBasicBlockSC, Name.str()) {
4405 if (Recipe)
4406 appendRecipe(Recipe);
4407 }
4408
4409public:
4411
4412protected:
4413 /// The VPRecipes held in the order of output instructions to generate.
4415
4416 VPBasicBlock(const unsigned char BlockSC, const Twine &Name = "")
4417 : VPBlockBase(BlockSC, Name.str()) {}
4418
4419public:
4420 ~VPBasicBlock() override {
4421 while (!Recipes.empty())
4422 Recipes.pop_back();
4423 }
4424
4425 /// Instruction iterators...
4430
4431 //===--------------------------------------------------------------------===//
4432 /// Recipe iterator methods
4433 ///
4434 inline iterator begin() { return Recipes.begin(); }
4435 inline const_iterator begin() const { return Recipes.begin(); }
4436 inline iterator end() { return Recipes.end(); }
4437 inline const_iterator end() const { return Recipes.end(); }
4438
4439 inline reverse_iterator rbegin() { return Recipes.rbegin(); }
4440 inline const_reverse_iterator rbegin() const { return Recipes.rbegin(); }
4441 inline reverse_iterator rend() { return Recipes.rend(); }
4442 inline const_reverse_iterator rend() const { return Recipes.rend(); }
4443
4444 inline size_t size() const { return Recipes.size(); }
4445 inline bool empty() const { return Recipes.empty(); }
4446 inline const VPRecipeBase &front() const { return Recipes.front(); }
4447 inline VPRecipeBase &front() { return Recipes.front(); }
4448 inline const VPRecipeBase &back() const { return Recipes.back(); }
4449 inline VPRecipeBase &back() { return Recipes.back(); }
4450
4451 /// Returns a reference to the list of recipes.
4453
4454 /// Returns a pointer to a member of the recipe list.
4455 static RecipeListTy VPBasicBlock::*getSublistAccess(VPRecipeBase *) {
4456 return &VPBasicBlock::Recipes;
4457 }
4458
4459 /// Method to support type inquiry through isa, cast, and dyn_cast.
4460 static inline bool classof(const VPBlockBase *V) {
4461 return V->getVPBlockID() == VPBlockBase::VPBasicBlockSC ||
4462 V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4463 }
4464
4465 void insert(VPRecipeBase *Recipe, iterator InsertPt) {
4466 assert(Recipe && "No recipe to append.");
4467 assert(!Recipe->Parent && "Recipe already in VPlan");
4468 Recipe->Parent = this;
4469 Recipes.insert(InsertPt, Recipe);
4470 }
4471
4472 /// Augment the existing recipes of a VPBasicBlock with an additional
4473 /// \p Recipe as the last recipe.
4474 void appendRecipe(VPRecipeBase *Recipe) { insert(Recipe, end()); }
4475
4476 /// The method which generates the output IR instructions that correspond to
4477 /// this VPBasicBlock, thereby "executing" the VPlan.
4478 void execute(VPTransformState *State) override;
4479
4480 /// Return the cost of this VPBasicBlock.
4481 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4482
4483 /// Return the position of the first non-phi node recipe in the block.
4484 iterator getFirstNonPhi();
4485
4486 /// Returns an iterator range over the PHI-like recipes in the block.
4490
4491 /// Split current block at \p SplitAt by inserting a new block between the
4492 /// current block and its successors and moving all recipes starting at
4493 /// SplitAt to the new block. Returns the new block.
4494 VPBasicBlock *splitAt(iterator SplitAt);
4495
4496 VPRegionBlock *getEnclosingLoopRegion();
4497 const VPRegionBlock *getEnclosingLoopRegion() const;
4498
4499#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4500 /// Print this VPBsicBlock to \p O, prefixing all lines with \p Indent. \p
4501 /// SlotTracker is used to print unnamed VPValue's using consequtive numbers.
4502 ///
4503 /// Note that the numbering is applied to the whole VPlan, so printing
4504 /// individual blocks is consistent with the whole VPlan printing.
4505 void print(raw_ostream &O, const Twine &Indent,
4506 VPSlotTracker &SlotTracker) const override;
4507 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4508#endif
4509
4510 /// If the block has multiple successors, return the branch recipe terminating
4511 /// the block. If there are no or only a single successor, return nullptr;
4512 VPRecipeBase *getTerminator();
4513 const VPRecipeBase *getTerminator() const;
4514
4515 /// Returns true if the block is exiting it's parent region.
4516 bool isExiting() const;
4517
4518 /// Clone the current block and it's recipes, without updating the operands of
4519 /// the cloned recipes.
4520 VPBasicBlock *clone() override;
4521
4522 /// Returns the predecessor block at index \p Idx with the predecessors as per
4523 /// the corresponding plain CFG. If the block is an entry block to a region,
4524 /// the first predecessor is the single predecessor of a region, and the
4525 /// second predecessor is the exiting block of the region.
4526 const VPBasicBlock *getCFGPredecessor(unsigned Idx) const;
4527
4528protected:
4529 /// Execute the recipes in the IR basic block \p BB.
4530 void executeRecipes(VPTransformState *State, BasicBlock *BB);
4531
4532 /// Connect the VPBBs predecessors' in the VPlan CFG to the IR basic block
4533 /// generated for this VPBB.
4534 void connectToPredecessors(VPTransformState &State);
4535
4536private:
4537 /// Create an IR BasicBlock to hold the output instructions generated by this
4538 /// VPBasicBlock, and return it. Update the CFGState accordingly.
4539 BasicBlock *createEmptyBasicBlock(VPTransformState &State);
4540};
4541
4542inline const VPBasicBlock *
4544 return getAsRecipe()->getParent()->getCFGPredecessor(Idx);
4545}
4546
4547/// A special type of VPBasicBlock that wraps an existing IR basic block.
4548/// Recipes of the block get added before the first non-phi instruction in the
4549/// wrapped block.
4550/// Note: At the moment, VPIRBasicBlock can only be used to wrap VPlan's
4551/// preheader block.
4552class VPIRBasicBlock : public VPBasicBlock {
4553 friend class VPlan;
4554
4555 BasicBlock *IRBB;
4556
4557 /// Use VPlan::createVPIRBasicBlock to create VPIRBasicBlocks.
4558 VPIRBasicBlock(BasicBlock *IRBB)
4559 : VPBasicBlock(VPIRBasicBlockSC,
4560 (Twine("ir-bb<") + IRBB->getName() + Twine(">")).str()),
4561 IRBB(IRBB) {}
4562
4563public:
4564 ~VPIRBasicBlock() override = default;
4565
4566 static inline bool classof(const VPBlockBase *V) {
4567 return V->getVPBlockID() == VPBlockBase::VPIRBasicBlockSC;
4568 }
4569
4570 /// The method which generates the output IR instructions that correspond to
4571 /// this VPBasicBlock, thereby "executing" the VPlan.
4572 void execute(VPTransformState *State) override;
4573
4574 VPIRBasicBlock *clone() override;
4575
4576 BasicBlock *getIRBasicBlock() const { return IRBB; }
4577};
4578
4579/// Track information about the canonical IV value of a region.
4580/// TODO: Have it also track the canonical IV increment, subject of NUW flag.
4582 /// VPRegionValue for the canonical IV, whose allocation is managed by
4583 /// VPCanonicalIVInfo.
4584 std::unique_ptr<VPRegionValue> CanIV;
4585
4586 /// Whether the increment of the canonical IV may unsigned wrap or not.
4587 bool HasNUW = true;
4588
4589public:
4591 : CanIV(std::make_unique<VPRegionValue>(Ty, DL, Region)) {}
4592
4593 VPRegionValue *getRegionValue() { return CanIV.get(); }
4594 const VPRegionValue *getRegionValue() const { return CanIV.get(); }
4595
4596 bool hasNUW() const { return HasNUW; }
4597
4598 void clearNUW() { HasNUW = false; }
4599};
4600
4601/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
4602/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
4603/// A VPRegionBlock may indicate that its contents are to be replicated several
4604/// times. This is designed to support predicated scalarization, in which a
4605/// scalar if-then code structure needs to be generated VF * UF times. Having
4606/// this replication indicator helps to keep a single model for multiple
4607/// candidate VF's. The actual replication takes place only once the desired VF
4608/// and UF have been determined.
4609class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
4610 friend class VPlan;
4611
4612 /// Hold the Single Entry of the SESE region modelled by the VPRegionBlock.
4613 VPBlockBase *Entry;
4614
4615 /// Hold the Single Exiting block of the SESE region modelled by the
4616 /// VPRegionBlock.
4617 VPBlockBase *Exiting;
4618
4619 /// Holds the Canonical IV of the loop region along with additional
4620 /// information. If CanIVInfo is nullptr, the region is a replicating region.
4621 /// Loop regions retain their canonical IVs until they are dissolved, even if
4622 /// the canonical IV has no users.
4623 std::unique_ptr<VPCanonicalIVInfo> CanIVInfo;
4624
4625 /// Use VPlan::createLoopRegion() and VPlan::createReplicateRegion() to create
4626 /// VPRegionBlocks.
4627 VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
4628 const std::string &Name = "")
4629 : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
4630 if (Entry) {
4631 assert(!Entry->hasPredecessors() && "Entry block has predecessors.");
4632 assert(Exiting && "Must also pass Exiting if Entry is passed.");
4633 assert(!Exiting->hasSuccessors() && "Exit block has successors.");
4634 Entry->setParent(this);
4635 Exiting->setParent(this);
4636 }
4637 }
4638
4639 VPRegionBlock(Type *CanIVTy, DebugLoc DL, VPBlockBase *Entry,
4640 VPBlockBase *Exiting, const std::string &Name = "")
4641 : VPRegionBlock(Entry, Exiting, Name) {
4642 CanIVInfo = std::make_unique<VPCanonicalIVInfo>(CanIVTy, DL, this);
4643 }
4644
4645public:
4646 ~VPRegionBlock() override = default;
4647
4648 /// Method to support type inquiry through isa, cast, and dyn_cast.
4649 static inline bool classof(const VPBlockBase *V) {
4650 return V->getVPBlockID() == VPBlockBase::VPRegionBlockSC;
4651 }
4652
4653 const VPBlockBase *getEntry() const { return Entry; }
4654 VPBlockBase *getEntry() { return Entry; }
4655
4656 /// Set \p EntryBlock as the entry VPBlockBase of this VPRegionBlock. \p
4657 /// EntryBlock must have no predecessors.
4658 void setEntry(VPBlockBase *EntryBlock) {
4659 assert(!EntryBlock->hasPredecessors() &&
4660 "Entry block cannot have predecessors.");
4661 Entry = EntryBlock;
4662 EntryBlock->setParent(this);
4663 }
4664
4665 const VPBlockBase *getExiting() const { return Exiting; }
4666 VPBlockBase *getExiting() { return Exiting; }
4667
4668 /// Set \p ExitingBlock as the exiting VPBlockBase of this VPRegionBlock. \p
4669 /// ExitingBlock must have no successors.
4670 void setExiting(VPBlockBase *ExitingBlock) {
4671 assert(!ExitingBlock->hasSuccessors() &&
4672 "Exit block cannot have successors.");
4673 Exiting = ExitingBlock;
4674 ExitingBlock->setParent(this);
4675 }
4676
4677 /// Returns the pre-header VPBasicBlock of the loop region.
4679 assert(!isReplicator() && "should only get pre-header of loop regions");
4680 return getSinglePredecessor()->getExitingBasicBlock();
4681 }
4682
4683 /// An indicator whether this region is to generate multiple replicated
4684 /// instances of output IR corresponding to its VPBlockBases.
4685 bool isReplicator() const { return !CanIVInfo; }
4686
4687 /// The method which generates the output IR instructions that correspond to
4688 /// this VPRegionBlock, thereby "executing" the VPlan.
4689 void execute(VPTransformState *State) override;
4690
4691 // Return the cost of this region.
4692 InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override;
4693
4694#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
4695 /// Print this VPRegionBlock to \p O (recursively), prefixing all lines with
4696 /// \p Indent. \p SlotTracker is used to print unnamed VPValue's using
4697 /// consequtive numbers.
4698 ///
4699 /// Note that the numbering is applied to the whole VPlan, so printing
4700 /// individual regions is consistent with the whole VPlan printing.
4701 void print(raw_ostream &O, const Twine &Indent,
4702 VPSlotTracker &SlotTracker) const override;
4703 using VPBlockBase::print; // Get the print(raw_stream &O) version.
4704#endif
4705
4706 /// Clone all blocks in the single-entry single-exit region of the block and
4707 /// their recipes without updating the operands of the cloned recipes.
4708 VPRegionBlock *clone() override;
4709
4710 /// Remove the current region from its VPlan, connecting its predecessor to
4711 /// its entry, and its exiting block to its successor.
4712 void dissolveToCFGLoop();
4713
4714 /// Get the canonical IV increment instruction if it exists. Otherwise, create
4715 /// a new increment before the terminator and return it. The canonical IV
4716 /// increment is subject to DCE if unused, unlike the canonical IV itself.
4717 VPInstruction *getOrCreateCanonicalIVIncrement();
4718
4719 /// Return the canonical induction variable of the region, null for
4720 /// replicating regions.
4722 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4723 }
4725 return CanIVInfo ? CanIVInfo->getRegionValue() : nullptr;
4726 }
4727
4728 /// Return the type of the canonical IV for loop regions.
4730 return CanIVInfo->getRegionValue()->getType();
4731 }
4732
4733 /// Indicates if NUW is set for the canonical IV increment, for loop regions.
4734 bool hasCanonicalIVNUW() const { return CanIVInfo->hasNUW(); }
4735
4736 /// Unsets NUW for the canonical IV increment \p Increment, for loop regions.
4738 assert(Increment && "Must provide increment to clear");
4739 Increment->dropPoisonGeneratingFlags();
4740 CanIVInfo->clearNUW();
4741 }
4742};
4743
4745 return getParent()->getParent();
4746}
4747
4749 return getParent()->getParent();
4750}
4751
4752/// VPlan models a candidate for vectorization, encoding various decisions take
4753/// to produce efficient output IR, including which branches, basic-blocks and
4754/// output IR instructions to generate, and their cost. VPlan holds a
4755/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
4756/// VPBasicBlock.
4757class VPlan {
4758 friend class VPlanPrinter;
4759 friend class VPSlotTracker;
4760
4761 /// VPBasicBlock corresponding to the original preheader. Used to place
4762 /// VPExpandSCEV recipes for expressions used during skeleton creation and the
4763 /// rest of VPlan execution.
4764 /// When this VPlan is used for the epilogue vector loop, the entry will be
4765 /// replaced by a new entry block created during skeleton creation.
4766 VPBasicBlock *Entry;
4767
4768 /// VPIRBasicBlock wrapping the header of the original scalar loop.
4769 VPIRBasicBlock *ScalarHeader;
4770
4771 /// Immutable list of VPIRBasicBlocks wrapping the exit blocks of the original
4772 /// scalar loop. Note that some exit blocks may be unreachable at the moment,
4773 /// e.g. if the scalar epilogue always executes.
4775
4776 /// Holds the VFs applicable to this VPlan.
4778
4779 /// Holds the UFs applicable to this VPlan. If empty, the VPlan is valid for
4780 /// any UF.
4782
4783 /// Holds the name of the VPlan, for printing.
4784 std::string Name;
4785
4786 /// Represents the trip count of the original loop, for folding
4787 /// the tail.
4788 VPValue *TripCount = nullptr;
4789
4790 /// Represents the backedge taken count of the original loop, for folding
4791 /// the tail. It equals TripCount - 1.
4792 VPSymbolicValue *BackedgeTakenCount = nullptr;
4793
4794 /// Represents the vector trip count.
4795 VPSymbolicValue VectorTripCount;
4796
4797 /// Represents the vectorization factor of the loop.
4798 VPSymbolicValue VF;
4799
4800 /// Represents the unroll factor of the loop.
4801 VPSymbolicValue UF;
4802
4803 /// Represents the loop-invariant VF * UF of the vector loop region.
4804 VPSymbolicValue VFxUF;
4805
4806 /// Contains all the external definitions created for this VPlan, as a mapping
4807 /// from IR Values to VPIRValues.
4809
4810 /// Blocks allocated and owned by the VPlan. They will be deleted once the
4811 /// VPlan is destroyed.
4812 SmallVector<VPBlockBase *> CreatedBlocks;
4813
4814 /// Construct a VPlan with \p Entry to the plan and with \p ScalarHeader
4815 /// wrapping the original header of the scalar loop. The vector loop will have
4816 /// index type \p IdxTy.
4817 VPlan(VPBasicBlock *Entry, VPIRBasicBlock *ScalarHeader, Type *IdxTy)
4818 : Entry(Entry), ScalarHeader(ScalarHeader), VectorTripCount(IdxTy),
4819 VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4820 Entry->setPlan(this);
4821 assert(ScalarHeader->getNumSuccessors() == 0 &&
4822 "scalar header must be a leaf node");
4823 }
4824
4825public:
4826 /// Construct a VPlan for \p L. This will create VPIRBasicBlocks wrapping the
4827 /// original preheader and scalar header of \p L, to be used as entry and
4828 /// scalar header blocks of the new VPlan. The vector loop will have index
4829 /// type \p IdxTy.
4830 VPlan(Loop *L, Type *IdxTy);
4831
4832 /// Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock
4833 /// wrapping \p ScalarHeaderBB and vector loop index of type \p IdxTy.
4834 VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
4835 : VectorTripCount(IdxTy), VF(IdxTy), UF(IdxTy), VFxUF(IdxTy) {
4836 setEntry(createVPBasicBlock("preheader"));
4837 ScalarHeader = createVPIRBasicBlock(ScalarHeaderBB);
4838 }
4839
4841
4843 Entry = VPBB;
4844 VPBB->setPlan(this);
4845 }
4846
4847 /// Generate the IR code for this VPlan.
4848 void execute(VPTransformState *State);
4849
4850 /// Return the cost of this plan.
4852
4853 VPBasicBlock *getEntry() { return Entry; }
4854 const VPBasicBlock *getEntry() const { return Entry; }
4855
4856 /// Returns the preheader of the vector loop region, if one exists, or null
4857 /// otherwise.
4859 const VPRegionBlock *VectorRegion = getVectorLoopRegion();
4860 return VectorRegion
4861 ? cast<VPBasicBlock>(VectorRegion->getSinglePredecessor())
4862 : nullptr;
4863 }
4864
4865 /// Returns the VPRegionBlock of the vector loop.
4868
4869 /// Returns true if this VPlan is for an outer loop, i.e., its vector
4870 /// loop region contains a nested loop region.
4871 LLVM_ABI_FOR_TEST bool isOuterLoop() const;
4872
4873 /// Returns the 'middle' block of the plan, that is the block that selects
4874 /// whether to execute the scalar tail loop or the exit block from the loop
4875 /// latch. If there is an early exit from the vector loop, the middle block
4876 /// conceptully has the early exit block as third successor, split accross 2
4877 /// VPBBs. In that case, the second VPBB selects whether to execute the scalar
4878 /// tail loop or the exit block. If the scalar tail loop or exit block are
4879 /// known to always execute, the middle block may branch directly to that
4880 /// block. This function cannot be called once the vector loop region has been
4881 /// removed.
4883 VPRegionBlock *LoopRegion = getVectorLoopRegion();
4884 assert(
4885 LoopRegion &&
4886 "cannot call the function after vector loop region has been removed");
4887 // The middle block is always the last successor of the region.
4888 return cast<VPBasicBlock>(LoopRegion->getSuccessors().back());
4889 }
4890
4892 return const_cast<VPlan *>(this)->getMiddleBlock();
4893 }
4894
4895 /// Return the VPBasicBlock for the preheader of the scalar loop.
4898 getScalarHeader()->getSinglePredecessor());
4899 }
4900
4901 /// Return the VPIRBasicBlock wrapping the header of the scalar loop.
4902 VPIRBasicBlock *getScalarHeader() const { return ScalarHeader; }
4903
4904 /// Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of
4905 /// the original scalar loop.
4906 ArrayRef<VPIRBasicBlock *> getExitBlocks() const { return ExitBlocks; }
4907
4908 /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an
4909 /// exit block.
4911
4912 /// Returns true if \p VPBB is an exit block.
4913 bool isExitBlock(VPBlockBase *VPBB);
4914
4915 /// The trip count of the original loop.
4917 assert(TripCount && "trip count needs to be set before accessing it");
4918 return TripCount;
4919 }
4920
4921 /// Set the trip count assuming it is currently null; if it is not - use
4922 /// resetTripCount().
4923 void setTripCount(VPValue *NewTripCount) {
4924 assert(!TripCount && NewTripCount && "TripCount should not be set yet.");
4925 TripCount = NewTripCount;
4926 }
4927
4928 /// Resets the trip count for the VPlan. The caller must make sure all uses of
4929 /// the original trip count have been replaced.
4930 void resetTripCount(VPValue *NewTripCount) {
4931 assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 &&
4932 "TripCount must be set when resetting");
4933 TripCount = NewTripCount;
4934 }
4935
4936 /// The backedge taken count of the original loop.
4938 // BTC shares the canonical IV type with VectorTripCount.
4939 if (!BackedgeTakenCount)
4940 BackedgeTakenCount = new VPSymbolicValue(VectorTripCount.getType());
4941 return BackedgeTakenCount;
4942 }
4943 VPValue *getBackedgeTakenCount() const { return BackedgeTakenCount; }
4944
4945 /// The vector trip count.
4946 VPSymbolicValue &getVectorTripCount() { return VectorTripCount; }
4947
4948 /// Returns the VF of the vector loop region.
4949 VPSymbolicValue &getVF() { return VF; };
4950 const VPSymbolicValue &getVF() const { return VF; };
4951
4952 /// Returns the UF of the vector loop region.
4953 VPSymbolicValue &getUF() { return UF; };
4954
4955 /// Returns VF * UF of the vector loop region.
4956 VPSymbolicValue &getVFxUF() { return VFxUF; }
4957
4960 }
4961
4962 const DataLayout &getDataLayout() const {
4964 }
4965
4966 void addVF(ElementCount VF) { VFs.insert(VF); }
4967
4969 assert(hasVF(VF) && "Cannot set VF not already in plan");
4970 VFs.clear();
4971 VFs.insert(VF);
4972 }
4973
4974 /// Remove \p VF from the plan.
4976 assert(hasVF(VF) && "tried to remove VF not present in plan");
4977 VFs.remove(VF);
4978 }
4979
4980 bool hasVF(ElementCount VF) const { return VFs.count(VF); }
4981 bool hasScalableVF() const {
4982 return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); });
4983 }
4984
4985 /// Returns an iterator range over all VFs of the plan.
4988 return VFs;
4989 }
4990
4991 /// Returns the single VF of the plan, asserting that the plan has exactly
4992 /// one VF.
4994 assert(VFs.size() == 1 && "expected plan with single VF");
4995 return VFs[0];
4996 }
4997
4998 bool hasScalarVFOnly() const {
4999 bool HasScalarVFOnly = VFs.size() == 1 && VFs[0].isScalar();
5000 assert(HasScalarVFOnly == hasVF(ElementCount::getFixed(1)) &&
5001 "Plan with scalar VF should only have a single VF");
5002 return HasScalarVFOnly;
5003 }
5004
5005 bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); }
5006
5007 /// Returns the concrete UF of the plan, after unrolling.
5008 unsigned getConcreteUF() const {
5009 assert(UFs.size() == 1 && "Expected a single UF");
5010 return UFs[0];
5011 }
5012
5013 void setUF(unsigned UF) {
5014 assert(hasUF(UF) && "Cannot set the UF not already in plan");
5015 UFs.clear();
5016 UFs.insert(UF);
5017 }
5018
5019 /// Returns true if the VPlan already has been unrolled, i.e. it has a single
5020 /// concrete UF.
5021 bool isUnrolled() const { return UFs.size() == 1; }
5022
5023 /// Return a string with the name of the plan and the applicable VFs and UFs.
5024 std::string getName() const;
5025
5026 void setName(const Twine &newName) { Name = newName.str(); }
5027
5028 /// Gets the live-in VPIRValue for \p V or adds a new live-in (if none exists
5029 /// yet) for \p V.
5031 assert(V && "Trying to get or add the VPIRValue of a null Value");
5032 auto [It, Inserted] = LiveIns.try_emplace(V);
5033 if (Inserted) {
5034 if (auto *CI = dyn_cast<ConstantInt>(V))
5035 It->second = new VPConstantInt(CI);
5036 else
5037 It->second = new VPIRValue(V);
5038 }
5039
5040 assert(isa<VPIRValue>(It->second) &&
5041 "Only VPIRValues should be in mapping");
5042 return It->second;
5043 }
5045 assert(V && "Trying to get or add the VPIRValue of a null VPIRValue");
5046 return getOrAddLiveIn(V->getValue());
5047 }
5048
5049 /// Return a VPIRValue wrapping i1 true.
5050 VPIRValue *getTrue() { return getConstantInt(1, 1); }
5051
5052 /// Return a VPIRValue wrapping i1 false.
5053 VPIRValue *getFalse() { return getConstantInt(1, 0); }
5054
5055 /// Return a VPIRValue wrapping the null value of type \p Ty.
5056 VPIRValue *getZero(Type *Ty) { return getConstantInt(Ty, 0); }
5057
5058 /// Return a VPIRValue wrapping the AllOnes value of type \p Ty.
5060 return getConstantInt(APInt::getAllOnes(Ty->getIntegerBitWidth()));
5061 }
5062
5063 /// Return a VPIRValue wrapping a ConstantInt with the given type and value.
5064 VPIRValue *getConstantInt(Type *Ty, uint64_t Val, bool IsSigned = false) {
5065 return getOrAddLiveIn(ConstantInt::get(Ty, Val, IsSigned));
5066 }
5067
5068 /// Return a VPIRValue wrapping a ConstantInt with the given bitwidth and
5069 /// value.
5071 bool IsSigned = false) {
5072 return getConstantInt(APInt(BitWidth, Val, IsSigned));
5073 }
5074
5075 /// Return a VPIRValue wrapping a ConstantInt with the given APInt value.
5077 return getOrAddLiveIn(ConstantInt::get(getContext(), Val));
5078 }
5079
5080 /// Return the live-in VPIRValue for \p V, if there is one or nullptr
5081 /// otherwise.
5082 VPIRValue *getLiveIn(Value *V) const { return LiveIns.lookup(V); }
5083
5084 /// Return the list of live-in VPValues available in the VPlan.
5085 auto getLiveIns() const { return LiveIns.values(); }
5086
5087#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5088 /// Print the live-ins of this VPlan to \p O.
5089 void printLiveIns(raw_ostream &O) const;
5090
5091 /// Print this VPlan to \p O.
5092 LLVM_ABI_FOR_TEST void print(raw_ostream &O) const;
5093
5094 /// Print this VPlan in DOT format to \p O.
5095 LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const;
5096
5097 /// Dump the plan to stderr (for debugging).
5098 LLVM_DUMP_METHOD void dump() const;
5099#endif
5100
5101 /// Clone the current VPlan, update all VPValues of the new VPlan and cloned
5102 /// recipes to refer to the clones, and return it.
5104
5105 /// Create a new VPBasicBlock with \p Name and containing \p Recipe if
5106 /// present. The returned block is owned by the VPlan and deleted once the
5107 /// VPlan is destroyed.
5109 VPRecipeBase *Recipe = nullptr) {
5110 auto *VPB = new VPBasicBlock(Name, Recipe);
5111 CreatedBlocks.push_back(VPB);
5112 return VPB;
5113 }
5114
5115 /// Create a new loop region with a canonical IV using \p CanIVTy and
5116 /// \p DL. Use \p Name as the region's name and set entry and exiting blocks
5117 /// to \p Entry and \p Exiting respectively, if provided. The returned block
5118 /// is owned by the VPlan and deleted once the VPlan is destroyed.
5120 const std::string &Name = "",
5121 VPBlockBase *Entry = nullptr,
5122 VPBlockBase *Exiting = nullptr) {
5123 auto *VPB = new VPRegionBlock(CanIVTy, DL, Entry, Exiting, Name);
5124 CreatedBlocks.push_back(VPB);
5125 return VPB;
5126 }
5127
5128 /// Create a new replicate region with \p Entry, \p Exiting and \p Name. The
5129 /// returned block is owned by the VPlan and deleted once the VPlan is
5130 /// destroyed.
5132 const std::string &Name = "") {
5133 auto *VPB = new VPRegionBlock(Entry, Exiting, Name);
5134 CreatedBlocks.push_back(VPB);
5135 return VPB;
5136 }
5137
5138 /// Create a VPIRBasicBlock wrapping \p IRBB, but do not create
5139 /// VPIRInstructions wrapping the instructions in t\p IRBB. The returned
5140 /// block is owned by the VPlan and deleted once the VPlan is destroyed.
5142
5143 /// Create a VPIRBasicBlock from \p IRBB containing VPIRInstructions for all
5144 /// instructions in \p IRBB, except its terminator which is managed by the
5145 /// successors of the block in VPlan. The returned block is owned by the VPlan
5146 /// and deleted once the VPlan is destroyed.
5148
5149 /// Returns true if the VPlan is based on a loop with an early exit. That is
5150 /// the case if the VPlan has either more than one exit block or a single exit
5151 /// block with multiple predecessors (one for the exit via the latch and one
5152 /// via the other early exit).
5153 bool hasEarlyExit() const {
5154 return count_if(ExitBlocks,
5155 [](VPIRBasicBlock *EB) { return EB->hasPredecessors(); }) >
5156 1 ||
5157 (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
5158 }
5159
5160 /// Returns true if the scalar tail may execute after the vector loop, i.e.
5161 /// if the middle block is a predecessor of the scalar preheader. Note that
5162 /// this relies on unneeded branches to the scalar tail loop being removed.
5163 bool hasScalarTail() const {
5164 auto *ScalarPH = getScalarPreheader();
5165 return ScalarPH &&
5166 is_contained(ScalarPH->getPredecessors(), getMiddleBlock());
5167 }
5168
5169 /// The type of the canonical induction variable of the vector loop.
5170 Type *getIndexType() const { return VF.getType(); }
5171};
5172
5173#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
5174inline raw_ostream &operator<<(raw_ostream &OS, const VPlan &Plan) {
5175 Plan.print(OS);
5176 return OS;
5177}
5178#endif
5179
5180} // end namespace llvm
5181
5182#endif // LLVM_TRANSFORMS_VECTORIZE_VPLAN_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file implements methods to test, set and extract typed bits from packed unsigned integers.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
#define LLVM_ABI_FOR_TEST
Definition Compiler.h:218
#define LLVM_PACKED_START
Definition Compiler.h:554
dxil translate DXIL Translate Metadata
Hexagon Common GEP
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
#define T
MachineInstr unsigned OpIdx
#define P(N)
static StringRef getName(Value *V)
static bool mayHaveSideEffects(MachineInstr &MI)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static const BasicSubtargetSubTypeKV * find(StringRef S, ArrayRef< BasicSubtargetSubTypeKV > A)
Find KV in array using binary search.
This file contains the declarations of the entities induced by Vectorization Plans,...
#define VP_CLASSOF_IMPL(VPRecipeID)
Definition VPlan.h:585
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
Get the last element.
Definition ArrayRef.h:150
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
static DebugLoc getUnknown()
Definition DebugLoc.h:151
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:155
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:202
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags fromRaw(unsigned Flags)
unsigned getRaw() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
InnerLoopVectorizer vectorizes loops which contain only one basic block to a specified vectorization ...
bool isCast() const
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LoopVectorizationCostModel - estimates the expected speedups due to vectorization.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Metadata node.
Definition Metadata.h:1075
Root of the metadata hierarchy.
Definition Metadata.h:64
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an assumption made using SCEV expressions which can be checked at run-time.
This class represents an analyzed expression in the program.
This class provides computation of slot numbers for LLVM Assembly writing.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
This class represents a truncation of integer types.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
VPActiveLaneMaskPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4052
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPActiveLaneMaskPHIRecipe(VPValue *StartMask, DebugLoc DL)
Definition VPlan.h:4046
~VPActiveLaneMaskPHIRecipe() override=default
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4399
RecipeListTy::const_iterator const_iterator
Definition VPlan.h:4427
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
Definition VPlan.h:4474
RecipeListTy::const_reverse_iterator const_reverse_iterator
Definition VPlan.h:4429
RecipeListTy::iterator iterator
Instruction iterators...
Definition VPlan.h:4426
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4452
iplist< VPRecipeBase > RecipeListTy
Definition VPlan.h:4410
VPBasicBlock(const unsigned char BlockSC, const Twine &Name="")
Definition VPlan.h:4416
iterator end()
Definition VPlan.h:4436
iterator begin()
Recipe iterator methods.
Definition VPlan.h:4434
RecipeListTy::reverse_iterator reverse_iterator
Definition VPlan.h:4428
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
Definition VPlan.h:4487
const VPBasicBlock * getCFGPredecessor(unsigned Idx) const
Returns the predecessor block at index Idx with the predecessors as per the corresponding plain CFG.
Definition VPlan.cpp:763
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
Definition VPlan.cpp:266
~VPBasicBlock() override
Definition VPlan.h:4420
const_reverse_iterator rbegin() const
Definition VPlan.h:4440
reverse_iterator rend()
Definition VPlan.h:4441
RecipeListTy Recipes
The VPRecipes held in the order of output instructions to generate.
Definition VPlan.h:4414
VPRecipeBase & back()
Definition VPlan.h:4449
const VPRecipeBase & front() const
Definition VPlan.h:4446
const_iterator begin() const
Definition VPlan.h:4435
VPRecipeBase & front()
Definition VPlan.h:4447
const VPRecipeBase & back() const
Definition VPlan.h:4448
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4465
bool empty() const
Definition VPlan.h:4445
const_iterator end() const
Definition VPlan.h:4437
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4460
static RecipeListTy VPBasicBlock::* getSublistAccess(VPRecipeBase *)
Returns a pointer to a member of the recipe list.
Definition VPlan.h:4455
reverse_iterator rbegin()
Definition VPlan.h:4439
friend class VPlan
Definition VPlan.h:4400
size_t size() const
Definition VPlan.h:4444
const_reverse_iterator rend() const
Definition VPlan.h:4442
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:3008
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
Definition VPlan.h:3013
VPBlendRecipe(PHINode *Phi, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL)
The blend operation is a User of the incoming values and of their respective masks,...
Definition VPlan.h:2969
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:3003
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3025
VPBlendRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:2990
VPBlendRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2988
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
Definition VPlan.h:3019
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2999
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:94
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
Definition VPlan.h:315
VPRegionBlock * getParent()
Definition VPlan.h:186
VPBlocksTy & getPredecessors()
Definition VPlan.h:223
iterator_range< VPBlockBase ** > predecessors()
Definition VPlan.h:220
LLVM_DUMP_METHOD void dump() const
Dump this VPBlockBase to dbgs().
Definition VPlan.h:385
void setName(const Twine &newName)
Definition VPlan.h:179
size_t getNumSuccessors() const
Definition VPlan.h:237
iterator_range< VPBlockBase ** > successors()
Definition VPlan.h:219
virtual void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Print plain-text dump of this VPBlockBase to O, prefixing all lines with Indent.
bool hasPredecessors() const
Returns true if this block has any predecessors.
Definition VPlan.h:217
void swapSuccessors()
Swap successors of the block. The block must have exactly 2 successors.
Definition VPlan.h:337
void printSuccessors(raw_ostream &O, const Twine &Indent) const
Print the successors of this block to O, prefixing all lines with Indent.
Definition VPlan.cpp:661
SmallVectorImpl< VPBlockBase * > VPBlocksTy
Definition VPlan.h:173
virtual ~VPBlockBase()=default
const VPBlocksTy & getHierarchicalPredecessors()
Definition VPlan.h:273
unsigned getIndexForSuccessor(const VPBlockBase *Succ) const
Returns the index for Succ in the blocks successor list.
Definition VPlan.h:350
size_t getNumPredecessors() const
Definition VPlan.h:238
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
Definition VPlan.h:306
VPBlockBase * getEnclosingBlockWithPredecessors()
Definition VPlan.cpp:258
unsigned getIndexForPredecessor(const VPBlockBase *Pred) const
Returns the index for Pred in the blocks predecessors list.
Definition VPlan.h:343
bool hasSuccessors() const
Returns true if this block has any successors.
Definition VPlan.h:215
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:222
virtual VPBlockBase * clone()=0
Clone the current block and it's recipes without updating the operands of the cloned recipes,...
enum { VPRegionBlockSC, VPBasicBlockSC, VPIRBasicBlockSC } VPBlockTy
An enumeration for keeping track of the concrete subclass of VPBlockBase that are actually instantiat...
Definition VPlan.h:171
virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx)=0
Return the cost of the block.
void setPlan(VPlan *ParentPlan)
Sets the pointer of the plan containing the block.
Definition VPlan.cpp:230
const VPRegionBlock * getParent() const
Definition VPlan.h:187
const std::string & getName() const
Definition VPlan.h:177
void clearSuccessors()
Remove all the successors of this block.
Definition VPlan.h:325
VPBlockBase * getSingleHierarchicalSuccessor()
Definition VPlan.h:263
void setTwoSuccessors(VPBlockBase *IfTrue, VPBlockBase *IfFalse)
Set two given VPBlockBases IfTrue and IfFalse to be the two successors of this VPBlockBase.
Definition VPlan.h:297
VPBlockBase * getSinglePredecessor() const
Definition VPlan.h:233
virtual void execute(VPTransformState *State)=0
The method which generates the output IR that correspond to this VPBlockBase, thereby "executing" the...
const VPBlocksTy & getHierarchicalSuccessors()
Definition VPlan.h:257
void clearPredecessors()
Remove all the predecessor of this block.
Definition VPlan.h:322
friend class VPBlockUtils
Definition VPlan.h:95
unsigned getVPBlockID() const
Definition VPlan.h:184
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:364
void swapPredecessors()
Swap predecessors of the block.
Definition VPlan.h:329
VPBlockBase(const unsigned char SC, const std::string &N)
Definition VPlan.h:163
VPBlocksTy & getSuccessors()
Definition VPlan.h:212
VPBlockBase * getEnclosingBlockWithSuccessors()
An Enclosing Block of a block B is any block containing B, including B itself.
Definition VPlan.cpp:250
void setOneSuccessor(VPBlockBase *Successor)
Set a given VPBlockBase Successor as the single successor of this VPBlockBase.
Definition VPlan.h:286
void setParent(VPRegionBlock *P)
Definition VPlan.h:197
VPBlockBase * getSingleHierarchicalPredecessor()
Definition VPlan.h:279
VPBlockBase * getSingleSuccessor() const
Definition VPlan.h:227
const VPBlocksTy & getSuccessors() const
Definition VPlan.h:211
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlan.h:3516
VPBranchOnMaskRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3500
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3524
VPBranchOnMaskRecipe(VPValue *BlockInMask, DebugLoc DL)
Definition VPlan.h:3497
VPlan-based builder utility analogous to IRBuilder.
VPRegionValue * getRegionValue()
Definition VPlan.h:4593
VPCanonicalIVInfo(Type *Ty, DebugLoc DL, VPRegionBlock *Region)
Definition VPlan.h:4590
const VPRegionValue * getRegionValue() const
Definition VPlan.h:4594
bool hasNUW() const
Definition VPlan.h:4596
VPCurrentIterationPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4084
VPCurrentIterationPHIRecipe(VPValue *StartIV, DebugLoc DL)
Definition VPlan.h:4078
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPCurrentIterationPHIRecipe.
Definition VPlan.h:4096
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:4090
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4103
~VPCurrentIterationPHIRecipe() override=default
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:4220
VPValue * getIndex() const
Definition VPlan.h:4217
const FPMathOperator * getFPBinOp() const
Definition VPlan.h:4219
VPIRValue * getStartValue() const
Definition VPlan.h:4216
VPValue * getStepValue() const
Definition VPlan.h:4218
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4208
VPDerivedIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4201
VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, const FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *IV, VPValue *Step)
Definition VPlan.h:4192
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPDerivedIVRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4223
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPIRValue *Start, VPValue *CanonicalIV, VPValue *Step)
Definition VPlan.h:4185
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4021
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPExpandSCEVRecipe.
Definition VPlan.h:4026
VPExpandSCEVRecipe(const SCEV *Expr)
const SCEV * getSCEV() const
Definition VPlan.h:4032
VPExpandSCEVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4017
~VPExpandSCEVRecipe() override=default
void execute(VPTransformState &State) override
Method for generating code, must not be called as this recipe is abstract.
Definition VPlan.h:3673
bool isVectorToScalar() const
Returns true if this VPExpressionRecipe produces a single scalar.
VPValue * getOperandOfResultType() const
Return the VPValue to use to infer the result type of the recipe.
Definition VPlan.h:3655
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3586
VPExpressionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3637
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
~VPExpressionRecipe() override
Definition VPlan.h:3625
VPExpressionRecipe(VPWidenCastRecipe *Ext, VPReductionRecipe *Red)
Definition VPlan.h:3584
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3601
VPExpressionRecipe(VPWidenCastRecipe *Ext0, VPWidenCastRecipe *Ext1, VPWidenRecipe *Mul, VPWidenRecipe *Neg, VPReductionRecipe *Red)
Definition VPlan.h:3605
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
unsigned getVFScaleFactor() const
Definition VPlan.h:3667
VPExpressionRecipe(VPWidenRecipe *Mul, VPReductionRecipe *Red)
Definition VPlan.h:3599
A pure virtual base class for all recipes modeling header phis, including phis for first order recurr...
Definition VPlan.h:2437
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2439
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2448
void addBackedgeValue(VPValue *V)
Add V as the incoming value from the loop backedge.
Definition VPlan.h:2492
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2461
static bool classof(const VPValue *V)
Definition VPlan.h:2458
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override=0
Print the recipe.
virtual VPValue * getBackedgeValue()
Returns the incoming value from the loop backedge.
Definition VPlan.h:2484
VPHeaderPHIRecipe(unsigned char VPRecipeID, Instruction *UnderlyingInstr, VPValue *Start, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2444
void setBackedgeValue(VPValue *V)
Update the incoming value from the loop backedge.
Definition VPlan.h:2489
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2473
void setStartValue(VPValue *V)
Update the start value of the recipe.
Definition VPlan.h:2481
static bool classof(const VPRecipeBase *R)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:2454
VPValue * getStartValue() const
Definition VPlan.h:2476
void execute(VPTransformState &State) override=0
Generate the phi nodes.
virtual VPRecipeBase & getBackedgeRecipe()
Returns the backedge value as a recipe.
Definition VPlan.h:2502
~VPHeaderPHIRecipe() override=default
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
Definition VPlan.h:2156
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
VPHistogramRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2169
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2186
unsigned getOpcode() const
Definition VPlan.h:2182
VP_CLASSOF_IMPL(VPRecipeBase::VPHistogramSC)
~VPHistogramRecipe() override=default
VPHistogramRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2161
A special type of VPBasicBlock that wraps an existing IR basic block.
Definition VPlan.h:4552
void execute(VPTransformState *State) override
The method which generates the output IR instructions that correspond to this VPBasicBlock,...
Definition VPlan.cpp:473
BasicBlock * getIRBasicBlock() const
Definition VPlan.h:4576
static bool classof(const VPBlockBase *V)
Definition VPlan.h:4566
~VPIRBasicBlock() override=default
friend class VPlan
Definition VPlan.h:4553
VPIRBasicBlock * clone() override
Clone the current block and it's recipes, without updating the operands of the cloned recipes.
Definition VPlan.cpp:498
Class to record and manage LLVM IR flags.
Definition VPlan.h:694
FastMathFlagsTy FMFs
Definition VPlan.h:782
ReductionFlagsTy ReductionFlags
Definition VPlan.h:784
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
bool hasNoWrapFlags() const
Definition VPlan.h:1037
VPIRFlags(RecurKind Kind, bool IsOrdered, bool IsInLoop, FastMathFlags FMFs)
Definition VPlan.h:875
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
VPIRFlags(DisjointFlagsTy DisjointFlags)
Definition VPlan.h:855
VPIRFlags(WrapFlagsTy WrapFlags)
Definition VPlan.h:841
WrapFlagsTy WrapFlags
Definition VPlan.h:776
void printFlags(raw_ostream &O) const
VPIRFlags(CmpInst::Predicate Pred, FastMathFlags FMFs)
Definition VPlan.h:834
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:999
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
bool isReductionOrdered() const
Definition VPlan.h:1063
TruncFlagsTy TruncFlags
Definition VPlan.h:777
CmpInst::Predicate getPredicate() const
Definition VPlan.h:971
WrapFlagsTy getNoWrapFlags() const
Definition VPlan.h:1047
uint8_t AllFlags[2]
Definition VPlan.h:785
bool hasNonNegFlag() const
Returns true if the recipe has non-negative flag.
Definition VPlan.h:1007
void transferFlags(VPIRFlags &Other)
Definition VPlan.h:880
ExactFlagsTy ExactFlags
Definition VPlan.h:779
bool hasNoSignedWrap() const
Definition VPlan.h:1026
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
bool isDisjoint() const
Definition VPlan.h:1051
VPIRFlags(TruncFlagsTy TruncFlags)
Definition VPlan.h:846
VPIRFlags(FastMathFlags FMFs)
Definition VPlan.h:851
VPIRFlags(NonNegFlagsTy NonNegFlags)
Definition VPlan.h:860
VPIRFlags(CmpInst::Predicate Pred)
Definition VPlan.h:829
uint8_t GEPFlagsStorage
Definition VPlan.h:780
VPIRFlags(ExactFlagsTy ExactFlags)
Definition VPlan.h:865
bool isNonNeg() const
Definition VPlan.h:1009
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:989
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:994
DisjointFlagsTy DisjointFlags
Definition VPlan.h:778
void setPredicate(CmpInst::Predicate Pred)
Definition VPlan.h:979
bool hasNoUnsignedWrap() const
Definition VPlan.h:1015
FCmpFlagsTy FCmpFlags
Definition VPlan.h:783
NonNegFlagsTy NonNegFlags
Definition VPlan.h:781
bool isReductionInLoop() const
Definition VPlan.h:1069
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
Definition VPlan.h:891
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:928
VPIRFlags(GEPNoWrapFlags GEPFlags)
Definition VPlan.h:870
uint8_t CmpPredStorage
Definition VPlan.h:775
RecurKind getRecurKind() const
Definition VPlan.h:1057
VPIRFlags(Instruction &I)
Definition VPlan.h:791
Instruction & getInstruction() const
Definition VPlan.h:1745
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first part of operand Op.
Definition VPlan.h:1753
~VPIRInstruction() override=default
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPIRInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1732
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:1759
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
bool usesScalars(const VPValue *Op) const override
Returns true if the VPUser uses scalars of operand Op.
Definition VPlan.h:1747
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1720
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Helper to manage IR metadata for recipes.
Definition VPlan.h:1170
VPIRMetadata & operator=(const VPIRMetadata &Other)=default
MDNode * getMetadata(unsigned Kind) const
Get metadata of kind Kind. Returns nullptr if not found.
Definition VPlan.h:1206
VPIRMetadata(Instruction &I)
Adds metatadata that can be preserved from the original instruction I.
Definition VPlan.h:1178
VPIRMetadata(const VPIRMetadata &Other)=default
Copy constructor for cloning.
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
VPIRMetadata()=default
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
void applyMetadata(Instruction &I) const
Add all metadata to I.
void setMetadata(unsigned Kind, MDNode *Node)
Set metadata with kind Kind to Node.
Definition VPlan.h:1190
VPInstructionWithType(unsigned Opcode, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Definition VPlan.h:1540
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlan.h:1582
static bool classof(const VPUser *R)
Definition VPlan.h:1567
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1547
Type * getResultType() const
Definition VPlan.h:1588
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1571
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the instruction.
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1225
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
Definition VPlan.h:1472
iterator_range< operand_iterator > operandsWithoutMask()
Returns an iterator range over the operands excluding the mask operand if present.
Definition VPlan.h:1494
VPInstruction * clone() override
Clone the current recipe.
Definition VPlan.h:1403
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1327
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1318
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1331
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1343
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1321
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1268
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1314
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1263
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1260
@ VScale
Returns the value for vscale.
Definition VPlan.h:1347
@ CanonicalIVIncrementForPart
Definition VPlan.h:1244
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1271
bool hasResult() const
Definition VPlan.h:1437
iterator_range< const_operand_iterator > operandsWithoutMask() const
Definition VPlan.h:1497
void addMask(VPValue *Mask)
Add mask Mask to an unmasked VPInstruction, if it needs masking.
Definition VPlan.h:1477
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1519
unsigned getOpcode() const
Definition VPlan.h:1416
void setName(StringRef NewName)
Set the symbolic name for the VPInstruction.
Definition VPlan.h:1522
VPValue * getMask() const
Returns the mask for the VPInstruction.
Definition VPlan.h:1488
VPInstruction * cloneWithOperands(ArrayRef< VPValue * > NewOperands, Type *ResultTy=nullptr)
Definition VPlan.h:1407
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1462
A common base class for interleaved memory operations.
Definition VPlan.h:3050
virtual unsigned getNumStoreOperands() const =0
Returns the number of stored operands of this interleave group.
bool usesFirstLaneOnly(const VPValue *Op) const override=0
Returns true if the recipe only uses the first lane of operand Op.
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:3113
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:3119
static bool classof(const VPUser *U)
Definition VPlan.h:3095
VPInterleaveBase(const unsigned char SC, const InterleaveGroup< Instruction > *IG, ArrayRef< VPValue * > Operands, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3062
Instruction * getInsertPos() const
Definition VPlan.h:3117
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3090
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:3115
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3107
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:3136
VPInterleaveBase * clone() override=0
Clone the current recipe.
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3101
bool usesFirstLaneOnly(const VPValue *Op) const override
The recipe only uses the first lane of the address, and EVL operand.
Definition VPlan.h:3216
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3210
~VPInterleaveEVLRecipe() override=default
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3223
VPInterleaveEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3203
VPInterleaveEVLRecipe(VPInterleaveRecipe &R, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3190
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
Definition VPlan.h:3146
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3173
~VPInterleaveRecipe() override=default
VPInterleaveRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3156
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3167
VPInterleaveRecipe(const InterleaveGroup< Instruction > *IG, VPValue *Addr, ArrayRef< VPValue * > StoredValues, VPValue *Mask, bool NeedsMaskForGaps, const VPIRMetadata &MD, DebugLoc DL)
Definition VPlan.h:3148
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
A VPRecipeValue defined by a multi-def recipe, stores a pointer to it.
Definition VPlanValue.h:364
Helper type to provide functions to access incoming values and blocks for phi-like recipes.
Definition VPlan.h:1600
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
VPUser::const_operand_range incoming_values() const
Returns an interator range over the incoming values.
Definition VPlan.h:1629
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
Definition VPlan.h:1658
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1624
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4543
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1649
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1609
virtual ~VPPhiAccessors()=default
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
iterator_range< mapped_iterator< detail::index_iterator, std::function< const VPBasicBlock *(size_t)> > > const_incoming_blocks_range
Definition VPlan.h:1634
const_incoming_blocks_range incoming_blocks() const
Returns an iterator range over the incoming blocks.
Definition VPlan.h:1638
~VPPredInstPHIRecipe() override=default
VPPredInstPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3713
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPPredInstPHIRecipe.
Definition VPlan.h:3724
VPPredInstPHIRecipe(VPValue *PredV, DebugLoc DL)
Construct a VPPredInstPHIRecipe given PredInst whose value needs a phi nodes after merging back from ...
Definition VPlan.h:3708
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
bool mayReadOrWriteMemory() const
Returns true if the recipe may read from or write to memory.
Definition VPlan.h:550
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...
VPRegionBlock * getRegion()
Definition VPlan.h:4744
enum { VPBranchOnMaskSC, VPDerivedIVSC, VPExpandSCEVSC, VPExpressionSC, VPIRInstructionSC, VPInstructionSC, VPInterleaveEVLSC, VPInterleaveSC, VPReductionEVLSC, VPReductionSC, VPReplicateSC, VPScalarIVStepsSC, VPVectorPointerSC, VPVectorEndPointerSC, VPWidenCallSC, VPWidenCanonicalIVSC, VPWidenCastSC, VPWidenGEPSC, VPWidenIntrinsicSC, VPWidenMemIntrinsicSC, VPWidenLoadEVLSC, VPWidenLoadSC, VPWidenStoreEVLSC, VPWidenStoreSC, VPWidenSC, VPBlendSC, VPHistogramSC, VPWidenPHISC, VPPredInstPHISC, VPCurrentIterationPHISC, VPActiveLaneMaskPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, VPReductionPHISC, VPFirstPHISC=VPWidenPHISC, VPFirstHeaderPHISC=VPCurrentIterationPHISC, VPLastHeaderPHISC=VPReductionPHISC, VPLastPHISC=VPReductionPHISC, } VPRecipeTy
An enumeration for keeping track of the concrete subclass of VPRecipeBase that is actually instantiat...
Definition VPlan.h:420
void setDebugLoc(DebugLoc NewDL)
Set the recipe's debug location to NewDL.
Definition VPlan.h:558
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
~VPRecipeBase() override=default
VPBasicBlock * getParent()
Definition VPlan.h:477
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:555
virtual void execute(VPTransformState &State)=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
static bool classof(const VPDef *D)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:526
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
virtual VPRecipeBase * clone()=0
Clone the current recipe.
friend class VPBlockUtils
Definition VPlan.h:404
const VPBasicBlock * getParent() const
Definition VPlan.h:478
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
static bool classof(const VPUser *U)
Definition VPlan.h:531
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
unsigned getVPRecipeID() const
Definition VPlan.h:523
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:467
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
Definition VPlanValue.h:337
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3382
VPReductionEVLRecipe(VPReductionRecipe &R, VPValue &EVL, VPValue *CondOp, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3361
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3385
VPReductionEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3372
~VPReductionEVLRecipe() override=default
bool isOrdered() const
Returns true, if the phi is part of an ordered reduction.
Definition VPlan.h:2930
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
Definition VPlan.h:2916
VPReductionPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2898
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2909
~VPReductionPHIRecipe() override=default
bool hasUsesOutsideReductionChain() const
Returns true, if the phi is part of a multi-use reduction.
Definition VPlan.h:2942
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2924
VPReductionPHIRecipe(PHINode *Phi, RecurKind Kind, VPValue &Start, VPValue &BackedgeValue, ReductionStyle Style, const VPIRFlags &Flags, bool HasUsesOutsideReductionChain=false)
Create a new VPReductionPHIRecipe for the reduction Phi.
Definition VPlan.h:2879
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2933
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2947
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPReductionPHIRecipe * cloneWithOperands(VPValue *Start, VPValue *BackedgeValue)
Definition VPlan.h:2891
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:2939
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
Definition VPlan.h:2927
A recipe to represent inloop, ordered or partial reduction operations.
Definition VPlan.h:3239
VPReductionRecipe(const unsigned char SC, RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, ArrayRef< VPValue * > Operands, VPValue *CondOp, ReductionStyle Style, DebugLoc DL)
Definition VPlan.h:3248
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3324
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:3293
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:3308
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3335
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3337
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3320
VPReductionRecipe(RecurKind RdxKind, FastMathFlags FMFs, Instruction *I, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3273
bool isOrdered() const
Return true if the in-loop reduction is ordered.
Definition VPlan.h:3322
VPReductionRecipe(const RecurKind RdxKind, FastMathFlags FMFs, VPValue *ChainOp, VPValue *VecOp, VPValue *CondOp, ReductionStyle Style, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3279
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3326
~VPReductionRecipe() override=default
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3333
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3328
VPReductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3287
static bool classof(const VPUser *U)
Definition VPlan.h:3298
static bool classof(const VPValue *VPV)
Definition VPlan.h:3303
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:3342
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4609
const VPBlockBase * getEntry() const
Definition VPlan.h:4653
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4685
~VPRegionBlock() override=default
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4670
VPBlockBase * getExiting()
Definition VPlan.h:4666
const VPRegionValue * getCanonicalIV() const
Definition VPlan.h:4724
void setEntry(VPBlockBase *EntryBlock)
Set EntryBlock as the entry VPBlockBase of this VPRegionBlock.
Definition VPlan.h:4658
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
Definition VPlan.h:4729
bool hasCanonicalIVNUW() const
Indicates if NUW is set for the canonical IV increment, for loop regions.
Definition VPlan.h:4734
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
Definition VPlan.h:4737
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
Definition VPlan.h:4721
const VPBlockBase * getExiting() const
Definition VPlan.h:4665
VPBlockBase * getEntry()
Definition VPlan.h:4654
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
Definition VPlan.h:4678
friend class VPlan
Definition VPlan.h:4610
static bool classof(const VPBlockBase *V)
Method to support type inquiry through isa, cast, and dyn_cast.
Definition VPlan.h:4649
VPValues defined by a VPRegionBlock, like the canonical IV.
Definition VPlanValue.h:215
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
Definition VPlan.h:3404
bool isSingleScalar() const
Definition VPlan.h:3460
VPReplicateRecipe(Instruction *I, ArrayRef< VPValue * > Operands, bool IsSingleScalar, VPValue *Mask=nullptr, const VPIRFlags &Flags={}, VPIRMetadata Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:3412
~VPReplicateRecipe() override=default
static Type * computeScalarType(const Instruction *I, ArrayRef< VPValue * > Operands)
Compute the scalar result type for a VPReplicateRecipe wrapping I with Operands (excluding any predic...
VPReplicateRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:3434
bool usesScalars(const VPValue *Op) const override
Returns true if the recipe uses scalars of operand Op.
Definition VPlan.h:3472
bool isPredicated() const
Definition VPlan.h:3462
VPReplicateRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3432
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3465
unsigned getOpcode() const
Definition VPlan.h:3484
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Definition VPlan.h:3479
Instruction::BinaryOps getInductionOpcode() const
Definition VPlan.h:4315
VPValue * getStepValue() const
Definition VPlan.h:4288
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPScalarIVStepsRecipe.
Definition VPlan.h:4282
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV, VPValue *Step, VPValue *VF, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:4255
void setStartIndex(VPValue *StartIndex)
Set or add the StartIndex operand.
Definition VPlan.h:4301
VPScalarIVStepsRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4267
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4296
VPValue * getVFValue() const
Return the number of scalars to produce per unroll part, used to compute StartIndex during unrolling.
Definition VPlan.h:4292
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF, Instruction::BinaryOps Opcode, FastMathFlags FMFs, DebugLoc DL)
Definition VPlan.h:4248
~VPScalarIVStepsRecipe() override=default
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:4309
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:608
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Value *UV, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:614
static bool classof(const VPValue *V)
Definition VPlan.h:666
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:679
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:623
const Instruction * getUnderlyingInstr() const
Definition VPlan.h:682
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, Value *UV=nullptr, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:618
static bool classof(const VPUser *U)
Definition VPlan.h:671
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:610
LLVM_ABI_FOR_TEST VPSingleDefValue(VPSingleDefRecipe *Def, Value *UV=nullptr, Type *Ty=nullptr)
Construct a VPSingleDefValue. Must only be used by VPSingleDefRecipe.
Definition VPlan.cpp:169
This class can be used to assign names to VPValues.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1527
operand_range operands()
Definition VPlanValue.h:457
void setOperand(unsigned I, VPValue *New)
Definition VPlanValue.h:430
unsigned getNumOperands() const
Definition VPlanValue.h:424
friend class VPWidenMemoryRecipe
Grant access to addOperand for VPWidenMemoryRecipe.
Definition VPlanValue.h:388
operand_iterator op_end()
Definition VPlanValue.h:455
operand_iterator op_begin()
Definition VPlanValue.h:453
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:425
VPUser(ArrayRef< VPValue * > Operands)
Definition VPlanValue.h:405
iterator_range< const_operand_iterator > const_operand_range
Definition VPlanValue.h:451
iterator_range< operand_iterator > operand_range
Definition VPlanValue.h:450
void addOperand(VPValue *Operand)
Definition VPlanValue.h:410
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Definition VPlan.cpp:149
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:75
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:208
unsigned getNumUsers() const
Definition VPlanValue.h:115
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2307
VPValue * getVFValue() const
Definition VPlan.h:2288
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Type * getSourceElementType() const
Definition VPlan.h:2285
int64_t getStride() const
Definition VPlan.h:2286
VPVectorEndPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2328
VPValue * getOffset() const
Definition VPlan.h:2289
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2321
void addOffset(VPValue *Offset)
Append Offset as the offset operand.
Definition VPlan.h:2299
VPVectorEndPointerRecipe(VPValue *Ptr, VPValue *VF, Type *SourceElementTy, int64_t Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2275
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPVectorPointerRecipe.
Definition VPlan.h:2314
VPValue * getPointer() const
Definition VPlan.h:2287
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
void addPerPartOffset(VPValue *VFxPart)
Add the per-part offset (VFxPart) used for unrolled parts > 0.
Definition VPlan.h:2369
VPValue * getStride() const
Definition VPlan.h:2362
Type * getSourceElementType() const
Definition VPlan.h:2377
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlan.h:2379
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlan.h:2386
VPVectorPointerRecipe(VPValue *Ptr, Type *SourceElementTy, VPValue *Stride, GEPNoWrapFlags GEPFlags, DebugLoc DL)
Definition VPlan.h:2353
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHeaderPHIRecipe.
Definition VPlan.h:2403
VPVectorPointerRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2393
VPValue * getVFxPart() const
Definition VPlan.h:2364
A recipe for widening Call instructions using library calls.
Definition VPlan.h:2090
VPWidenCallRecipe(Value *UV, Function *Variant, ArrayRef< VPValue * > CallArguments, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:2097
const_operand_range args() const
Definition VPlan.h:2138
VPWidenCallRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2116
operand_range args()
Definition VPlan.h:2137
Function * getCalledScalarFunction() const
Definition VPlan.h:2133
~VPWidenCallRecipe() override=default
VPWidenCanonicalIVRecipe(VPRegionValue *CanonicalIV, const VPIRFlags::WrapFlagsTy &Flags={false, false})
Definition VPlan.h:4122
~VPWidenCanonicalIVRecipe() override=default
VPValue * getStepValue() const
Definition VPlan.h:4155
void addPerPartStep(VPValue *Step)
Add the per-part step (VF * Part) used for unrolled parts.
Definition VPlan.h:4160
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCanonicalIVPHIRecipe.
Definition VPlan.h:4144
VPRegionValue * getCanonicalIV() const
Return the canonical IV being widened.
Definition VPlan.h:4151
VPWidenCanonicalIVRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:4129
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlan.h:4139
VPWidenCastRecipe is a recipe to create vector cast instructions.
Definition VPlan.h:1871
Instruction::CastOps getOpcode() const
Definition VPlan.h:1907
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
~VPWidenCastRecipe() override=default
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, CastInst *CI=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1876
VPWidenCastRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1892
unsigned getOpcode() const
This recipe generates a GEP instruction.
Definition VPlan.h:2237
Type * getSourceElementType() const
Definition VPlan.h:2242
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenGEPRecipe.
Definition VPlan.h:2245
VPWidenGEPRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2228
~VPWidenGEPRecipe() override=default
VPWidenGEPRecipe(Type *SourceElementTy, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, DebugLoc DL=DebugLoc::getUnknown(), GetElementPtrInst *UV=nullptr)
Definition VPlan.h:2211
void execute(VPTransformState &State) override=0
Generate the phi nodes.
ArrayRef< const SCEVPredicate * > getNoWrapPredicates() const
Returns the SCEV predicates associated with this induction.
Definition VPlan.h:2591
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2610
static bool classof(const VPValue *V)
Definition VPlan.h:2553
void setStepValue(VPValue *V)
Update the step value of the recipe.
Definition VPlan.h:2572
VPValue * getBackedgeValue() override
Returns the incoming value from the loop backedge.
Definition VPlan.h:2595
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2565
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2580
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
Definition VPlan.h:2583
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, DebugLoc DL)
Definition VPlan.h:2521
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2568
VPWidenInductionRecipe(unsigned char Kind, PHINode *IV, VPValue *Start, VPValue *Step, const InductionDescriptor &IndDesc, Type *ResultTy, DebugLoc DL)
Definition VPlan.h:2527
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
Definition VPlan.h:2588
VPRecipeBase & getBackedgeRecipe() override
Returns the backedge value as a recipe.
Definition VPlan.h:2602
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:2548
const VPValue * getVFValue() const
Definition VPlan.h:2575
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2558
const VPValue * getStepValue() const
Definition VPlan.h:2569
void addUnrolledPartOperands(VPValue *SplatVFStep, VPValue *LastPart)
After unrolling, append the splat-VF step (VF * step) and the value of the induction at the last unro...
Definition VPlan.h:2536
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2671
const TruncInst * getTruncInst() const
Definition VPlan.h:2687
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlan.h:2665
~VPWidenIntOrFpInductionRecipe() override=default
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
Definition VPlan.h:2675
VPWidenIntOrFpInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2657
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2631
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2686
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPIRValue *Start, VPValue *Step, VPValue *VF, const InductionDescriptor &IndDesc, TruncInst *Trunc, const VPIRFlags &Flags, DebugLoc DL)
Definition VPlan.h:2640
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
Definition VPlan.h:2697
unsigned getNumIncoming() const override
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:2682
bool isCanonical() const
Returns true if the induction is canonical, i.e.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
A recipe for widening vector intrinsics.
Definition VPlan.h:1918
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1968
CallInst * createVectorCall(VPTransformState &State)
Helper function to produce the widened intrinsic call.
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:2022
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
bool mayReadFromMemory() const
Returns true if the intrinsic may read from memory.
Definition VPlan.h:2028
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1954
bool mayHaveSideEffects() const
Returns true if the intrinsic may have side-effects.
Definition VPlan.h:2034
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:2004
static bool classof(const VPValue *V)
Definition VPlan.h:1999
VPWidenIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1979
VPWidenIntrinsicRecipe(const unsigned char SC, Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1932
bool mayWriteToMemory() const
Returns true if the intrinsic may write to memory.
Definition VPlan.h:2031
~VPWidenIntrinsicRecipe() override=default
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1989
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
static bool classof(const VPUser *U)
Definition VPlan.h:1994
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
void execute(VPTransformState &State) override
Produce a widened version of the vector memory intrinsic.
~VPWidenMemIntrinsicRecipe() override=default
VPWidenMemIntrinsicRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2067
VPWidenMemIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID, ArrayRef< VPValue * > CallArguments, Type *Ty, Align Alignment, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:2053
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector memory intrinsic.
A common mixin class for widening memory operations.
Definition VPlan.h:3740
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3751
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3776
virtual ~VPWidenMemoryRecipe()=default
Instruction & Ingredient
Definition VPlan.h:3742
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Return the cost of this VPWidenMemoryRecipe.
Instruction & getIngredient() const
Definition VPlan.h:3798
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3748
virtual const VPRecipeBase * getAsRecipe() const =0
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3786
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3745
VPWidenMemoryRecipe(Instruction &I, bool Consecutive, const VPIRMetadata &Metadata)
Definition VPlan.h:3763
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
bool isMasked() const
Returns true if the recipe is masked.
Definition VPlan.h:3782
void setMask(VPValue *Mask)
Definition VPlan.h:3753
Align getAlign() const
Returns the alignment of the memory access.
Definition VPlan.h:3793
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3779
A recipe for widened phis.
Definition VPlan.h:2755
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:2797
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
VPWidenPHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2775
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
~VPWidenPHIRecipe() override=default
VPWidenPHIRecipe(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new VPWidenPHIRecipe with incoming values IncomingValues, debug location DL and Name.
Definition VPlan.h:2762
void execute(VPTransformState &State) override
Generate the phi/select nodes.
VPWidenPointerInductionRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2724
~VPWidenPointerInductionRecipe() override=default
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
void execute(VPTransformState &State) override
Generate vector values for the pointer induction.
Definition VPlan.h:2733
VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, const InductionDescriptor &IndDesc, DebugLoc DL)
Create a new VPWidenPointerInductionRecipe for Phi with start value Start and the number of elements ...
Definition VPlan.h:2714
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
Definition VPlan.h:1810
VPWidenRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:1831
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:1860
VPWidenRecipe(Instruction &I, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1814
VPWidenRecipe(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &Metadata={}, DebugLoc DL={})
Definition VPlan.h:1821
~VPWidenRecipe() override=default
VPWidenRecipe * cloneWithOperands(ArrayRef< VPValue * > NewOperands)
Definition VPlan.h:1833
unsigned getOpcode() const
Definition VPlan.h:1850
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4757
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
Definition VPlan.h:5082
LLVM_ABI_FOR_TEST void printDOT(raw_ostream &O) const
Print this VPlan in DOT format to O.
Definition VPlan.cpp:1177
friend class VPSlotTracker
Definition VPlan.h:4759
std::string getName() const
Return a string with the name of the plan and the applicable VFs and UFs.
Definition VPlan.cpp:1153
bool hasVF(ElementCount VF) const
Definition VPlan.h:4980
ElementCount getSingleVF() const
Returns the single VF of the plan, asserting that the plan has exactly one VF.
Definition VPlan.h:4993
const DataLayout & getDataLayout() const
Definition VPlan.h:4962
LLVMContext & getContext() const
Definition VPlan.h:4958
VPBasicBlock * getEntry()
Definition VPlan.h:4853
Type * getIndexType() const
The type of the canonical induction variable of the vector loop.
Definition VPlan.h:5170
void setName(const Twine &newName)
Definition VPlan.h:5026
bool hasScalableVF() const
Definition VPlan.h:4981
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4916
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
Definition VPlan.h:4937
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
Definition VPlan.h:4987
VPIRBasicBlock * getExitBlock(BasicBlock *IRBB) const
Return the VPIRBasicBlock corresponding to IRBB.
Definition VPlan.cpp:902
LLVM_ABI_FOR_TEST ~VPlan()
Definition VPlan.cpp:885
VPIRValue * getOrAddLiveIn(VPIRValue *V)
Definition VPlan.h:5044
bool isExitBlock(VPBlockBase *VPBB)
Returns true if VPBB is an exit block.
Definition VPlan.cpp:910
const VPBasicBlock * getEntry() const
Definition VPlan.h:4854
friend class VPlanPrinter
Definition VPlan.h:4758
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
Definition VPlan.h:5053
VPIRValue * getConstantInt(const APInt &Val)
Return a VPIRValue wrapping a ConstantInt with the given APInt value.
Definition VPlan.h:5076
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
Definition VPlan.h:4956
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
Definition VPlan.h:5059
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
Definition VPlan.h:5131
VPIRBasicBlock * createEmptyVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock wrapping IRBB, but do not create VPIRInstructions wrapping the instructions i...
Definition VPlan.cpp:1312
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
Definition VPlan.h:5085
bool hasUF(unsigned UF) const
Definition VPlan.h:5005
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
Definition VPlan.h:4906
VPlan(BasicBlock *ScalarHeaderBB, Type *IdxTy)
Construct a VPlan with a new VPBasicBlock as entry, a VPIRBasicBlock wrapping ScalarHeaderBB and vect...
Definition VPlan.h:4834
VPSymbolicValue & getVectorTripCount()
The vector trip count.
Definition VPlan.h:4946
VPValue * getBackedgeTakenCount() const
Definition VPlan.h:4943
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
Definition VPlan.h:5030
VPRegionBlock * createLoopRegion(Type *CanIVTy, DebugLoc DL, const std::string &Name="", VPBlockBase *Entry=nullptr, VPBlockBase *Exiting=nullptr)
Create a new loop region with a canonical IV using CanIVTy and DL.
Definition VPlan.h:5119
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
Definition VPlan.h:5056
void setVF(ElementCount VF)
Definition VPlan.h:4968
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
Definition VPlan.h:5021
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
Definition VPlan.cpp:1068
bool hasEarlyExit() const
Returns true if the VPlan is based on a loop with an early exit.
Definition VPlan.h:5153
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this plan.
Definition VPlan.cpp:1050
LLVM_ABI_FOR_TEST bool isOuterLoop() const
Returns true if this VPlan is for an outer loop, i.e., its vector loop region contains a nested loop ...
Definition VPlan.cpp:1083
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
Definition VPlan.h:5008
VPIRValue * getConstantInt(unsigned BitWidth, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given bitwidth and value.
Definition VPlan.h:5070
const VPBasicBlock * getMiddleBlock() const
Definition VPlan.h:4891
void setTripCount(VPValue *NewTripCount)
Set the trip count assuming it is currently null; if it is not - use resetTripCount().
Definition VPlan.h:4923
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
Definition VPlan.h:4930
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
Definition VPlan.h:4882
void setEntry(VPBasicBlock *VPBB)
Definition VPlan.h:4842
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
Definition VPlan.h:5108
LLVM_ABI_FOR_TEST VPIRBasicBlock * createVPIRBasicBlock(BasicBlock *IRBB)
Create a VPIRBasicBlock from IRBB containing VPIRInstructions for all instructions in IRBB,...
Definition VPlan.cpp:1318
void removeVF(ElementCount VF)
Remove VF from the plan.
Definition VPlan.h:4975
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
Definition VPlan.h:5050
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
Definition VPlan.h:4858
LLVM_DUMP_METHOD void dump() const
Dump the plan to stderr (for debugging).
Definition VPlan.cpp:1183
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
Definition VPlan.h:4953
bool hasScalarVFOnly() const
Definition VPlan.h:4998
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
Definition VPlan.h:4896
void execute(VPTransformState *State)
Generate the IR code for this VPlan.
Definition VPlan.cpp:920
LLVM_ABI_FOR_TEST void print(raw_ostream &O) const
Print this VPlan to O.
Definition VPlan.cpp:1136
void addVF(ElementCount VF)
Definition VPlan.h:4966
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
Definition VPlan.h:4902
void printLiveIns(raw_ostream &O) const
Print the live-ins of this VPlan to O.
Definition VPlan.cpp:1092
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
Definition VPlan.h:4949
void setUF(unsigned UF)
Definition VPlan.h:5013
const VPSymbolicValue & getVF() const
Definition VPlan.h:4950
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
Definition VPlan.h:5163
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
Definition VPlan.cpp:1224
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:5064
LLVM Value Representation.
Definition Value.h:75
Increasing range of size_t indices.
Definition STLExtras.h:2506
typename base_list_type::const_reverse_iterator const_reverse_iterator
Definition ilist.h:124
typename base_list_type::reverse_iterator reverse_iterator
Definition ilist.h:123
typename base_list_type::const_iterator const_iterator
Definition ilist.h:122
An intrusive list with ownership and callbacks specified/controlled by ilist_traits,...
Definition ilist.h:328
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
This file defines classes to implement an intrusive doubly linked list class (i.e.
This file defines the ilist_node class template, which is a convenient base class for creating classe...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
Definition DwarfDebug.h:190
CastInfo helper for casting from VPRecipeBase to a mixin class that is not part of the VPRecipeBase c...
Definition VPlan.h:4328
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
LLVM_PACKED_END
Definition VPlan.h:1112
auto cast_if_present(const Y &Val)
cast_if_present<X> - Functionally identical to cast, except that a null value is accepted.
Definition Casting.h:683
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
Definition STLExtras.h:840
ReductionStyle getReductionStyle(bool InLoop, bool Ordered, unsigned ScaleFactor)
Definition VPlan.h:2852
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
Type * toScalarizedTy(Type *Ty)
A helper for converting vectorized types to scalarized (non-vector) types.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto cast_or_null(const Y &Val)
Definition Casting.h:714
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
MemoryEffectsBase< IRMemLocation > MemoryEffects
Summary of how a function affects memory in the program.
Definition ModRef.h:356
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
UncountableExitStyle
Different methods of handling early exits.
Definition VPlan.h:79
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
Definition VPlan.h:89
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isPointerTy(const Type *T)
Definition SPIRVUtils.h:371
LLVM_ABI Type * computeScalarTypeForInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands)
Compute the scalar result type for an IR Opcode given Operands.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
@ Other
Any other memory.
Definition ModRef.h:68
RecurKind
These are the kinds of recurrences that we support.
@ Mul
Product of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
std::variant< RdxOrdered, RdxInLoop, RdxUnordered > ReductionStyle
Definition VPlan.h:2850
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
std::unique_ptr< VPlan > VPlanPtr
Definition VPlan.h:74
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:861
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
This struct provides a method for customizing the way a cast is performed.
Definition Casting.h:476
Provides a cast trait that strips const from types to make it easier to implement a const-version of ...
Definition Casting.h:388
This cast trait just provides the default implementation of doCastIfPossible to make CastInfo special...
Definition Casting.h:309
Provides a cast trait that uses a defined pointer to pointer cast as a base for reference-to-referenc...
Definition Casting.h:423
This reduction is in-loop.
Definition VPlan.h:2844
Possible variants of a reduction.
Definition VPlan.h:2842
This reduction is unordered with the partial result scaled down by some factor.
Definition VPlan.h:2847
unsigned VFScaleFactor
Definition VPlan.h:2848
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:342
An overlay on VPIRValue for VPValues that wrap a ConstantInt.
Definition VPlanValue.h:264
Struct to hold various analysis needed for cost computations.
void execute(VPTransformState &State) override
Generate the phi nodes.
VPFirstOrderRecurrencePHIRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:2813
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:2825
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPFirstOrderRecurrencePHIRecipe(PHINode *Phi, VPValue &Start, VPValue &BackedgeValue)
Definition VPlan.h:2804
DisjointFlagsTy(bool IsDisjoint)
Definition VPlan.h:726
NonNegFlagsTy(bool IsNonNeg)
Definition VPlan.h:731
TruncFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:721
WrapFlagsTy(bool HasNUW, bool HasNSW)
Definition VPlan.h:714
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1778
PHINode & getIRPhi()
Definition VPlan.h:1791
VPIRPhi(PHINode &PN)
Definition VPlan.h:1779
static bool classof(const VPRecipeBase *U)
Definition VPlan.h:1781
static bool classof(const VPUser *U)
Definition VPlan.h:1786
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1802
A VPValue representing a live-in from the input IR or a constant.
Definition VPlanValue.h:246
static bool classof(const VPUser *U)
Definition VPlan.h:1678
VPPhi * clone() override
Clone the current recipe.
Definition VPlan.h:1693
const VPRecipeBase * getAsRecipe() const override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:1708
static bool classof(const VPSingleDefRecipe *SDR)
Definition VPlan.h:1688
static bool classof(const VPValue *V)
Definition VPlan.h:1683
VPPhi(ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL, const Twine &Name="", Type *ResultTy=nullptr)
Definition VPlan.h:1673
A pure-virtual common base class for recipes defining a single VPValue and using IR flags.
Definition VPlan.h:1116
static bool classof(const VPSingleDefRecipe *R)
Definition VPlan.h:1157
static bool classof(const VPRecipeBase *R)
Definition VPlan.h:1128
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1117
static bool classof(const VPValue *V)
Definition VPlan.h:1150
void execute(VPTransformState &State) override=0
The method which generates the output IR instructions that correspond to this VPRecipe,...
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1122
VPRecipeWithIRFlags * clone() override=0
Clone the current recipe.
static bool classof(const VPUser *U)
Definition VPlan.h:1145
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:286
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
Definition VPlan.h:3853
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3890
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
VPWidenLoadEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3863
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3889
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3870
VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Addr, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3854
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3880
A recipe for widening load operations, using the address to load from and an optional mask.
Definition VPlan.h:3804
VPWidenLoadRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3805
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3830
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3840
void execute(VPTransformState &State) override
Generate a wide load or gather.
VPWidenLoadRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3813
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenLoadSC)
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadRecipe.
Definition VPlan.h:3824
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3839
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
Definition VPlan.h:3955
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3971
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
VPWidenStoreEVLRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3964
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3999
VPWidenStoreEVLRecipe(VPWidenStoreRecipe &S, VPValue *Addr, VPValue *StoredVal, VPValue &EVL, VPValue *Mask)
Definition VPlan.h:3956
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3984
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3998
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3974
A recipe for widening store operations, using the stored value, the address to store to and an option...
Definition VPlan.h:3902
VPRecipeBase * getAsRecipe() override
Return a VPRecipeBase* to the current object.
Definition VPlan.h:3941
VPWidenStoreRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredVal, VPValue *Mask, bool Consecutive, const VPIRMetadata &Metadata, DebugLoc DL)
Definition VPlan.h:3903
VP_CLASSOF_IMPL(VPRecipeBase::VPWidenStoreSC)
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3920
VPWidenStoreRecipe * clone() override
Clone the current recipe.
Definition VPlan.h:3911
const VPRecipeBase * getAsRecipe() const override
Definition VPlan.h:3942
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreRecipe.
Definition VPlan.h:3926
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlan.h:3932
static VPMixin * castFailed()
Definition VPlan.h:4346
static bool isPossible(VPRecipeBase *R)
Used by isa.
Definition VPlan.h:4337
static VPMixin * doCast(VPRecipeBase *R)
Used by cast.
Definition VPlan.h:4340