21#define DEBUG_TYPE "vplan"
41 while (!Worklist.
empty()) {
44 auto *OpR =
Op->getDefiningRecipe();
45 if (!OpR || OpR->mayHaveSideEffects() || EphRecipes.
contains(OpR))
48 auto *UR = dyn_cast<VPRecipeBase>(U);
49 return !UR || !EphRecipes.contains(UR);
67 for (
auto &R : *
A->getParent()) {
77 if (ParentA == ParentB)
78 return LocalComesBefore(
A,
B);
86 unsigned OverrideMaxNumRegs)
const {
89 unsigned AvailableRegs = OverrideMaxNumRegs > 0
91 :
TTI.getNumberOfRegisters(RegClass);
92 if (MaxUsers > AvailableRegs) {
95 unsigned Spills = MaxUsers - AvailableRegs;
101 << Spills <<
" spills of "
102 <<
TTI.getRegisterClassName(RegClass) <<
"\n");
139 if (!VPBB->getParent())
145 for (
VPValue *U : R.operands()) {
148 EndPoint[U] = Idx2Recipe.
size();
168 EndPoint[WideIV] = Idx2Recipe.
size();
188 LLVM_DEBUG(
dbgs() <<
"LV(REG): Calculating max register usage:\n");
190 const auto &TTICapture =
TTI;
194 !TTICapture.isElementTypeLegalForScalableVector(Ty)))
202 OpenIntervals.
insert(CanIV);
208 for (
unsigned int Idx = 0, Sz = Idx2Recipe.
size(); Idx < Sz; ++Idx) {
212 VPValueList &
List = TransposeEnds[Idx];
218 if (
none_of(R->definedValues(),
219 [&Ends](
VPValue *Def) { return Ends.count(Def); }) &&
220 !R->mayHaveSideEffects())
232 for (
unsigned J = 0, E = VFs.
size(); J < E; ++J) {
240 for (
auto *VPV : OpenIntervals) {
251 if (VFs[J].isScalar() ||
258 TTI.getRegisterClassForType(
false, VPV->getScalarType());
265 unsigned ScaleFactor =
268 if (ScaleFactor > 1) {
269 VF = VFs[J].divideCoefficientBy(ScaleFactor);
271 <<
" to " << VF <<
" for " << *R <<
"\n";);
275 unsigned ClassID =
TTI.getRegisterClassForType(
true, ScalarTy);
276 RegUsage[ClassID] += GetRegUsage(ScalarTy, VF);
281 auto &Entry = MaxUsages[J][Pair.first];
282 Entry = std::max(Entry, Pair.second);
287 << OpenIntervals.
size() <<
'\n');
291 for (
VPValue *DefV : R->definedValues())
293 OpenIntervals.
insert(DefV);
301 for (
unsigned Idx = 0, End = VFs.
size(); Idx < End; ++Idx) {
307 for (
auto *In : LoopInvariants) {
314 TTI.getRegisterClassForType(VF.
isVector(), In->getScalarType());
315 Invariant[ClassID] += GetRegUsage(In->getScalarType(), VF);
319 dbgs() <<
"LV(REG): VF = " << VFs[Idx] <<
'\n';
320 dbgs() <<
"LV(REG): Found max usage: " << MaxUsages[Idx].
size()
322 for (
const auto &pair : MaxUsages[Idx]) {
323 dbgs() <<
"LV(REG): RegisterClass: "
324 <<
TTI.getRegisterClassName(pair.first) <<
", " << pair.second
327 dbgs() <<
"LV(REG): Found invariant usage: " << Invariant.
size()
329 for (
const auto &pair : Invariant) {
330 dbgs() <<
"LV(REG): RegisterClass: "
331 <<
TTI.getRegisterClassName(pair.first) <<
", " << pair.second
ReachingDefInfo InstSet & ToRemove
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
std::pair< uint64_t, uint64_t > Interval
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file contains the declarations of the Vectorization Plan base classes:
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
Implements a dense probed hash-table based set.
Core dominator tree base class.
bool properlyDominates(const DomTreeNodeBase< VPBlockBase > *A, const DomTreeNodeBase< VPBlockBase > *B) const
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getFixed(ScalarTy MinVal)
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
const VPBasicBlock * getEntryBasicBlock() const
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
A recipe for generating conditional branches on the bits of a mask.
A recipe for generating the phi node tracking the current scalar iteration index.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPValues defined by a VPRegionBlock, like the canonical IV.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
unsigned getNumUsers() const
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe to compute the pointers for widened memory accesses of SourceElementTy, with the Stride expr...
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPSymbolicValue & getVectorTripCount()
The vector trip count.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void Calculate(DomTreeT &DT)
bool match(Val *V, const Pattern &P)
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
unsigned getVFScaleFactor(VPRecipeBase *R)
Get the VF scaling factor applied to the recipe's output, if the recipe has one.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void collectEphemeralRecipesForVPlan(VPlan &Plan, DenseSet< VPRecipeBase * > &EphRecipes)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
A MapVector that performs no allocations if smaller than a certain size.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
SmallMapVector< unsigned, unsigned, 4 > LoopInvariantRegs
Holds the number of loop invariant values that are used in the loop.