57 if (!VPBB->getParent())
60 auto EndIter = Term ? Term->getIterator() : VPBB->end();
65 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
84 Ingredient.getDebugLoc());
87 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
88 nullptr ,
false ,
false , *VPI,
89 Ingredient.getDebugLoc());
92 Ingredient.getDebugLoc());
100 *VPI, CI->getDebugLoc());
103 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
107 *VPI, Ingredient.getDebugLoc());
111 "inductions must be created earlier");
120 "Only recpies with zero or one defined values expected");
121 Ingredient.eraseFromParent();
138 if (
A->getOpcode() != Instruction::Store ||
139 B->getOpcode() != Instruction::Store)
149 const APInt *Distance;
155 Type *TyA = TypeInfo.inferScalarType(
A->getOperand(0));
157 Type *TyB = TypeInfo.inferScalarType(
B->getOperand(0));
163 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
165 auto VFs =
B->getParent()->getPlan()->vectorFactors();
169 return Distance->
abs().
uge(
177 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
178 L(L), TypeInfo(TypeInfo) {}
185 return ExcludeRecipes.contains(&R) ||
186 (Store && isNoAliasViaDistance(Store, &GroupLeader));
199 std::optional<SinkStoreInfo> SinkInfo = {}) {
200 bool CheckReads = SinkInfo.has_value();
207 "Expected at most one successor in block chain");
210 if (SinkInfo && SinkInfo->shouldSkip(R))
214 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
243 if (R.mayHaveSideEffects() || R.mayReadFromMemory() || R.isPhi())
248 return RepR && RepR->getOpcode() == Instruction::Alloca;
257 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
273 if (!ScalarVFOnly && RepR->isSingleScalar())
276 WorkList.
insert({SinkTo, Candidate});
288 for (
auto &Recipe : *VPBB)
290 InsertIfValidSinkCandidate(VPBB,
Op);
294 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
297 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
302 auto UsersOutsideSinkTo =
304 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
306 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
307 return !U->usesFirstLaneOnly(SinkCandidate);
310 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
312 if (NeedsDuplicating) {
316 if (
auto *SinkCandidateRepR =
322 nullptr , *SinkCandidateRepR,
326 Clone = SinkCandidate->
clone();
336 InsertIfValidSinkCandidate(SinkTo,
Op);
346 if (!EntryBB || EntryBB->size() != 1 ||
356 if (EntryBB->getNumSuccessors() != 2)
361 if (!Succ0 || !Succ1)
364 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
366 if (Succ0->getSingleSuccessor() == Succ1)
368 if (Succ1->getSingleSuccessor() == Succ0)
385 if (!Region1->isReplicator())
387 auto *MiddleBasicBlock =
389 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
394 if (!Region2 || !Region2->isReplicator())
399 if (!Mask1 || Mask1 != Mask2)
402 assert(Mask1 && Mask2 &&
"both region must have conditions");
408 if (TransformedRegions.
contains(Region1))
415 if (!Then1 || !Then2)
435 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
441 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
442 Phi1ToMove.eraseFromParent();
445 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
459 TransformedRegions.
insert(Region1);
462 return !TransformedRegions.
empty();
469 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
470 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
471 auto *BlockInMask = PredRecipe->
getMask();
490 RecipeWithoutMask->getDebugLoc());
514 if (RepR->isPredicated())
533 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
547 if (!VPBB->getParent())
551 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
560 R.moveBefore(*PredVPBB, PredVPBB->
end());
562 auto *ParentRegion = VPBB->getParent();
563 if (ParentRegion && ParentRegion->getExiting() == VPBB)
564 ParentRegion->setExiting(PredVPBB);
565 for (
auto *Succ :
to_vector(VPBB->successors())) {
571 return !WorkList.
empty();
578 bool ShouldSimplify =
true;
579 while (ShouldSimplify) {
595 if (!
IV ||
IV->getTruncInst())
610 for (
auto *U : FindMyCast->
users()) {
612 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
613 FoundUserCast = UserCast;
617 FindMyCast = FoundUserCast;
642 if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
656 WidenOriginalIV->dropPoisonGeneratingFlags();
669 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
671 if (IsConditionalAssume)
674 if (R.mayHaveSideEffects())
678 return all_of(R.definedValues(),
679 [](
VPValue *V) { return V->getNumUsers() == 0; });
695 if (!PhiR || PhiR->getNumOperands() != 2)
697 VPUser *PhiUser = PhiR->getSingleUser();
701 if (PhiUser !=
Incoming->getDefiningRecipe() ||
704 PhiR->replaceAllUsesWith(PhiR->getOperand(0));
705 PhiR->eraseFromParent();
706 Incoming->getDefiningRecipe()->eraseFromParent();
721 Kind, FPBinOp, StartV, CanonicalIV, Step,
"offset.idx");
731 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
737 if (ResultTy != StepTy) {
744 Builder.setInsertPoint(VecPreheader);
745 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
747 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
753 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
758 Users.insert_range(V->users());
760 return Users.takeVector();
774 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
811 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
812 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
820 Def->operands(),
true,
822 Clone->insertAfter(Def);
823 Def->replaceAllUsesWith(Clone);
834 PtrIV->replaceAllUsesWith(PtrAdd);
841 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
842 return U->usesScalars(WideIV);
848 Plan,
ID.getKind(),
ID.getInductionOpcode(),
850 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
851 WideIV->getDebugLoc(), Builder);
854 if (!HasOnlyVectorVFs) {
856 "plans containing a scalar VF cannot also include scalable VFs");
857 WideIV->replaceAllUsesWith(Steps);
860 WideIV->replaceUsesWithIf(Steps,
861 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
863 return U.usesFirstLaneOnly(WideIV);
864 return U.usesScalars(WideIV);
880 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
885 if (!Def || Def->getNumOperands() != 2)
893 auto IsWideIVInc = [&]() {
894 auto &
ID = WideIV->getInductionDescriptor();
897 VPValue *IVStep = WideIV->getStepValue();
898 switch (
ID.getInductionOpcode()) {
899 case Instruction::Add:
901 case Instruction::FAdd:
903 case Instruction::FSub:
906 case Instruction::Sub: {
926 return IsWideIVInc() ? WideIV :
nullptr;
946 if (WideIntOrFp && WideIntOrFp->getTruncInst())
959 FirstActiveLane =
B.createScalarZExtOrTrunc(FirstActiveLane, CanonicalIVType,
960 FirstActiveLaneType,
DL);
961 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
968 EndValue =
B.createAdd(EndValue, One,
DL);
971 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
973 VPIRValue *Start = WideIV->getStartValue();
974 VPValue *Step = WideIV->getStepValue();
975 EndValue =
B.createDerivedIV(
977 Start, EndValue, Step);
992 if (WideIntOrFp && WideIntOrFp->getTruncInst())
999 if (!WideIntOrFp || !WideIntOrFp->isCanonical()) {
1002 Start, VectorTC, Step);
1031 assert(EndValue &&
"Must have computed the end value up front");
1047 auto *Zero = Plan.
getZero(StepTy);
1048 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1053 return B.createNaryOp(
1054 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1056 : Instruction::FAdd,
1057 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1069 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1078 WideIV, VectorPHBuilder, TypeInfo, ResumeTC))
1079 EndValues[WideIV] = EndValue;
1089 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1090 R.eraseFromParent();
1099 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1101 if (PredVPBB == MiddleVPBB)
1103 ExitIRI->getOperand(Idx),
1107 Plan, TypeInfo, PredVPBB, ExitIRI->getOperand(Idx), PSE);
1109 ExitIRI->setOperand(Idx, Escape);
1126 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1129 ExpR->replaceAllUsesWith(V->second);
1130 ExpR->eraseFromParent();
1139 while (!WorkList.
empty()) {
1141 if (!Seen.
insert(Cur).second)
1149 R->eraseFromParent();
1156static std::optional<std::pair<bool, unsigned>>
1159 std::optional<std::pair<bool, unsigned>>>(R)
1162 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1164 return std::make_pair(
true,
I->getVectorIntrinsicID());
1166 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe>([](
auto *
I) {
1170 return std::make_pair(
false,
1173 .
Default([](
auto *) {
return std::nullopt; });
1191 Value *V =
Op->getUnderlyingValue();
1197 auto FoldToIRValue = [&]() ->
Value * {
1199 if (OpcodeOrIID->first) {
1200 if (R.getNumOperands() != 2)
1202 unsigned ID = OpcodeOrIID->second;
1203 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1],
1206 unsigned Opcode = OpcodeOrIID->second;
1215 return Folder.FoldSelect(
Ops[0],
Ops[1],
1218 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1220 case Instruction::Select:
1221 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1222 case Instruction::ICmp:
1223 case Instruction::FCmp:
1226 case Instruction::GetElementPtr: {
1229 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1239 case Instruction::ExtractElement:
1246 if (
Value *V = FoldToIRValue())
1247 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1253 VPlan *Plan = Def->getParent()->getPlan();
1260 return Def->replaceAllUsesWith(V);
1266 PredPHI->replaceAllUsesWith(
Op);
1279 bool CanCreateNewRecipe =
1286 if (TruncTy == ATy) {
1287 Def->replaceAllUsesWith(
A);
1296 : Instruction::ZExt;
1299 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1301 Ext->setUnderlyingValue(UnderlyingExt);
1303 Def->replaceAllUsesWith(Ext);
1305 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1306 Def->replaceAllUsesWith(Trunc);
1314 for (
VPUser *U :
A->users()) {
1316 for (
VPValue *VPV : R->definedValues())
1330 Def->replaceAllUsesWith(
X);
1331 Def->eraseFromParent();
1337 return Def->replaceAllUsesWith(
1342 return Def->replaceAllUsesWith(
X);
1346 return Def->replaceAllUsesWith(
1351 return Def->replaceAllUsesWith(
1356 return Def->replaceAllUsesWith(
X);
1360 return Def->replaceAllUsesWith(Plan->
getFalse());
1364 return Def->replaceAllUsesWith(
X);
1367 if (CanCreateNewRecipe &&
1372 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1373 !Def->getOperand(1)->hasMoreThanOneUniqueUser()))
1374 return Def->replaceAllUsesWith(
1375 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1379 return Def->replaceAllUsesWith(Plan->
getFalse());
1382 return Def->replaceAllUsesWith(
X);
1386 if (CanCreateNewRecipe &&
1388 return Def->replaceAllUsesWith(Builder.createNot(
C));
1392 Def->setOperand(0,
C);
1393 Def->setOperand(1,
Y);
1394 Def->setOperand(2,
X);
1399 return Def->replaceAllUsesWith(
A);
1402 return Def->replaceAllUsesWith(
A);
1405 return Def->replaceAllUsesWith(
1411 return Def->replaceAllUsesWith(Builder.createNaryOp(
1413 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1418 const VPRegionBlock *ParentRegion = Def->getParent()->getParent();
1419 bool IsInReplicateRegion = ParentRegion && ParentRegion->
isReplicator();
1420 if (CanCreateNewRecipe && !IsInReplicateRegion &&
1422 return Def->replaceAllUsesWith(Builder.createNaryOp(
1424 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1429 return Def->replaceAllUsesWith(
A);
1444 R->setOperand(1,
Y);
1445 R->setOperand(2,
X);
1449 R->replaceAllUsesWith(Cmp);
1454 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1455 Cmp->setDebugLoc(Def->getDebugLoc());
1467 if (
Op->getNumUsers() > 1 ||
1471 }
else if (!UnpairedCmp) {
1472 UnpairedCmp =
Op->getDefiningRecipe();
1476 UnpairedCmp =
nullptr;
1483 if (NewOps.
size() < Def->getNumOperands()) {
1485 return Def->replaceAllUsesWith(NewAnyOf);
1492 if (CanCreateNewRecipe &&
1498 return Def->replaceAllUsesWith(NewCmp);
1506 return Def->replaceAllUsesWith(Def->getOperand(1));
1512 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1513 Def->replaceAllUsesWith(
X);
1523 Def->setOperand(1, Def->getOperand(0));
1524 Def->setOperand(0,
Y);
1529 if (Phi->getOperand(0) == Phi->getOperand(1))
1530 Phi->replaceAllUsesWith(Phi->getOperand(0));
1537 return Def->replaceAllUsesWith(Def->getOperand(0));
1543 Def->replaceAllUsesWith(
1544 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1548 return Def->replaceAllUsesWith(
A);
1554 Def->replaceAllUsesWith(
1555 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1562 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1567 Def->replaceAllUsesWith(
1577 "broadcast operand must be single-scalar");
1578 Def->setOperand(0,
C);
1583 if (Def->getNumOperands() == 1)
1584 Def->replaceAllUsesWith(Def->getOperand(0));
1589 if (Def->getNumOperands() == 1 &&
1591 return Def->replaceAllUsesWith(IRV);
1604 return Def->replaceAllUsesWith(
A);
1607 Def->replaceAllUsesWith(Builder.createNaryOp(
1608 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1616 if (Phi->getOperand(1) != Def &&
match(Phi->getOperand(0),
m_ZeroInt()) &&
1617 Phi->getSingleUser() == Def) {
1618 Phi->setOperand(0,
Y);
1619 Def->replaceAllUsesWith(Phi);
1634 Steps->replaceAllUsesWith(Steps->getOperand(0));
1642 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1644 return PhiR && PhiR->isInLoop();
1650 Def->replaceAllUsesWith(
A);
1659 [Def,
A](
VPUser *U) { return U->usesScalars(A) || Def == U; })) {
1660 return Def->replaceAllUsesWith(
A);
1664 return Def->replaceAllUsesWith(
A);
1691 while (!Worklist.
empty()) {
1700 R->replaceAllUsesWith(
1701 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1720 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1729 !WidenStoreR->isConsecutive()) {
1730 assert(!WidenStoreR->isReverse() &&
1731 "Not consecutive memory recipes shouldn't be reversed");
1732 VPValue *Mask = WidenStoreR->getMask();
1741 {WidenStoreR->getOperand(1)});
1746 &WidenStoreR->getIngredient(), {Extract, WidenStoreR->getAddr()},
1747 true ,
nullptr , {},
1749 ScalarStore->insertBefore(WidenStoreR);
1750 WidenStoreR->eraseFromParent();
1758 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1759 true ,
nullptr , *RepR ,
1760 *RepR , RepR->getDebugLoc());
1761 Clone->insertBefore(RepOrWidenR);
1763 VPValue *ExtractOp = Clone->getOperand(0);
1769 Clone->setOperand(0, ExtractOp);
1770 RepR->eraseFromParent();
1779 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1788 return !U->usesScalars(
Op);
1792 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1795 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1796 IntroducesBCastOf(Op)))
1800 auto *IRV = dyn_cast<VPIRValue>(Op);
1801 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1802 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1803 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1808 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1809 true ,
nullptr, *RepOrWidenR);
1810 Clone->insertBefore(RepOrWidenR);
1811 RepOrWidenR->replaceAllUsesWith(Clone);
1813 RepOrWidenR->eraseFromParent();
1849 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
1850 UniqueValues.
insert(Blend->getIncomingValue(0));
1851 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
1853 UniqueValues.
insert(Blend->getIncomingValue(
I));
1855 if (UniqueValues.
size() == 1) {
1856 Blend->replaceAllUsesWith(*UniqueValues.
begin());
1857 Blend->eraseFromParent();
1861 if (Blend->isNormalized())
1867 unsigned StartIndex = 0;
1868 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1873 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
1880 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
1882 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
1883 if (
I == StartIndex)
1885 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
1886 OperandsWithMask.
push_back(Blend->getMask(
I));
1891 OperandsWithMask, *Blend, Blend->getDebugLoc());
1892 NewBlend->insertBefore(&R);
1894 VPValue *DeadMask = Blend->getMask(StartIndex);
1896 Blend->eraseFromParent();
1901 if (NewBlend->getNumOperands() == 3 &&
1903 VPValue *Inc0 = NewBlend->getOperand(0);
1904 VPValue *Inc1 = NewBlend->getOperand(1);
1905 VPValue *OldMask = NewBlend->getOperand(2);
1906 NewBlend->setOperand(0, Inc1);
1907 NewBlend->setOperand(1, Inc0);
1908 NewBlend->setOperand(2, NewMask);
1935 APInt MaxVal = AlignedTC - 1;
1938 unsigned NewBitWidth =
1944 bool MadeChange =
false;
1953 if (!WideIV || !WideIV->isCanonical() ||
1954 WideIV->hasMoreThanOneUniqueUser() ||
1955 NewIVTy == WideIV->getScalarType())
1960 VPUser *SingleUser = WideIV->getSingleUser();
1968 auto *NewStart = Plan.
getZero(NewIVTy);
1969 WideIV->setStartValue(NewStart);
1971 WideIV->setStepValue(NewStep);
1978 Cmp->setOperand(1, NewBTC);
1992 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1994 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2007 const SCEV *VectorTripCount =
2012 "Trip count SCEV must be computable");
2033 auto *Term = &ExitingVPBB->
back();
2046 for (
unsigned Part = 0; Part < UF; ++Part) {
2052 Extracts[Part] = Ext;
2064 match(Phi->getBackedgeValue(),
2066 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2083 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2090 "Expected incoming values of Phi to be ActiveLaneMasks");
2095 EntryALM->setOperand(2, ALMMultiplier);
2096 LoopALM->setOperand(2, ALMMultiplier);
2100 ExtractFromALM(EntryALM, EntryExtracts);
2105 ExtractFromALM(LoopALM, LoopExtracts);
2107 Not->setOperand(0, LoopExtracts[0]);
2110 for (
unsigned Part = 0; Part < UF; ++Part) {
2111 Phis[Part]->setStartValue(EntryExtracts[Part]);
2112 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2125 auto *Term = &ExitingVPBB->
back();
2134 const SCEV *VectorTripCount =
2140 "Trip count SCEV must be computable");
2165 if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
2166 return R->isCanonical();
2167 return isa<VPCanonicalIVPHIRecipe, VPCurrentIterationPHIRecipe,
2168 VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
2174 R->getScalarType());
2176 HeaderR.eraseFromParent();
2180 HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
2181 HeaderR.eraseFromParent();
2191 B->setParent(
nullptr);
2200 if (Exits.
size() != 1) {
2202 "BranchOnTwoConds needs 2 remaining exits");
2204 Term->getOperand(0));
2213 Term->setOperand(1, Plan.
getTrue());
2218 {}, {}, Term->getDebugLoc());
2222 Term->eraseFromParent();
2258 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2268 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2269 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2298 auto TryToPushSinkCandidate = [&](
VPRecipeBase *SinkCandidate) {
2301 if (SinkCandidate == Previous)
2305 !Seen.
insert(SinkCandidate).second ||
2318 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
2321 "only recipes with a single defined value expected");
2336 if (SinkCandidate == FOR)
2339 SinkCandidate->moveAfter(Previous);
2340 Previous = SinkCandidate;
2364 [&VPDT, HoistPoint](
VPUser *U) {
2365 auto *R = cast<VPRecipeBase>(U);
2366 return HoistPoint == R ||
2367 VPDT.properlyDominates(HoistPoint, R);
2369 "HoistPoint must dominate all users of FOR");
2371 auto NeedsHoisting = [HoistPoint, &VPDT,
2373 VPRecipeBase *HoistCandidate = HoistCandidateV->getDefiningRecipe();
2374 if (!HoistCandidate)
2379 HoistCandidate->
getRegion() == EnclosingLoopRegion) &&
2380 "CFG in VPlan should still be flat, without replicate regions");
2382 if (!Visited.
insert(HoistCandidate).second)
2394 return HoistCandidate;
2403 for (
unsigned I = 0;
I != HoistCandidates.
size(); ++
I) {
2406 "only recipes with a single defined value expected");
2418 if (
auto *R = NeedsHoisting(
Op)) {
2421 if (R->getNumDefinedValues() != 1)
2435 HoistCandidate->moveBefore(*HoistPoint->
getParent(),
2454 VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
2457 while (
auto *PrevPhi =
2459 assert(PrevPhi->getParent() == FOR->getParent());
2461 Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
2480 {FOR, FOR->getBackedgeValue()});
2485 RecurSplice->setOperand(0, FOR);
2491 for (
VPUser *U : RecurSplice->users()) {
2500 VPValue *PenultimateIndex =
B.createSub(LastActiveLane, One);
2501 VPValue *PenultimateLastIter =
2503 {PenultimateIndex, FOR->getBackedgeValue()});
2508 VPValue *Sel =
B.createSelect(Cmp, LastPrevIter, PenultimateLastIter);
2521 RecurKind RK = PhiR->getRecurrenceKind();
2528 RecWithFlags->dropPoisonGeneratingFlags();
2534struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2536 return Def == getEmptyKey() || Def == getTombstoneKey();
2547 return GEP->getSourceElementType();
2550 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2551 [](
auto *
I) {
return I->getSourceElementType(); })
2552 .
Default([](
auto *) {
return nullptr; });
2556 static bool canHandle(
const VPSingleDefRecipe *Def) {
2565 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2566 C->second == Instruction::ExtractValue)))
2572 return !
Def->mayReadFromMemory();
2576 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2577 const VPlan *Plan =
Def->getParent()->getPlan();
2578 VPTypeAnalysis TypeInfo(*Plan);
2581 getGEPSourceElementType(Def), TypeInfo.inferScalarType(Def),
2584 if (RFlags->hasPredicate())
2590 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2593 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2595 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2597 !
equal(
L->operands(),
R->operands()))
2600 "must have valid opcode info for both recipes");
2602 if (LFlags->hasPredicate() &&
2603 LFlags->getPredicate() !=
2609 const VPRegionBlock *RegionL =
L->getRegion();
2610 const VPRegionBlock *RegionR =
R->getRegion();
2613 L->getParent() !=
R->getParent())
2615 const VPlan *Plan =
L->getParent()->getPlan();
2616 VPTypeAnalysis TypeInfo(*Plan);
2617 return TypeInfo.inferScalarType(L) == TypeInfo.inferScalarType(R);
2632 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2636 if (!VPDT.
dominates(V->getParent(), VPBB))
2641 Def->replaceAllUsesWith(V);
2660 "Expected vector prehader's successor to be the vector loop region");
2667 return !Op->isDefinedOutsideLoopRegions();
2670 R.moveBefore(*Preheader, Preheader->
end());
2697 if (Def->getNumUsers() == 0)
2706 auto *UserR = cast<VPRecipeBase>(U);
2707 VPBasicBlock *Parent = UserR->getParent();
2710 if (UserR->isPhi() || Parent->getEnclosingLoopRegion())
2713 if (SinkBB && SinkBB != Parent)
2727 "Defining block must dominate sink block");
2753 VPValue *ResultVPV = R.getVPSingleValue();
2755 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2756 if (!NewResSizeInBits)
2769 (void)OldResSizeInBits;
2777 VPW->dropPoisonGeneratingFlags();
2779 if (OldResSizeInBits != NewResSizeInBits &&
2783 Instruction::ZExt, ResultVPV, OldResTy,
nullptr,
2785 Ext->insertAfter(&R);
2787 Ext->setOperand(0, ResultVPV);
2788 assert(OldResSizeInBits > NewResSizeInBits &&
"Nothing to shrink?");
2791 "Only ICmps should not need extending the result.");
2801 for (
unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
2802 auto *
Op = R.getOperand(Idx);
2803 unsigned OpSizeInBits =
2805 if (OpSizeInBits == NewResSizeInBits)
2807 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2808 auto [ProcessedIter, IterIsEmpty] = ProcessedTruncs.
try_emplace(
Op);
2810 R.setOperand(Idx, ProcessedIter->second);
2818 Builder.setInsertPoint(&R);
2820 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2821 ProcessedIter->second = NewOp;
2822 R.setOperand(Idx, NewOp);
2837 assert(VPBB->getNumSuccessors() == 2 &&
2838 "Two successors expected for BranchOnCond");
2839 unsigned RemovedIdx;
2850 "There must be a single edge between VPBB and its successor");
2859 VPBB->back().eraseFromParent();
2913 VPValue *StartV = CanonicalIVPHI->getStartValue();
2915 auto *CanonicalIVIncrement =
2918 CanonicalIVIncrement->dropPoisonGeneratingFlags();
2919 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2930 auto *EntryIncrement = Builder.createOverflowingOp(
2932 DL,
"index.part.next");
2938 {EntryIncrement, TC, ALMMultiplier},
DL,
2939 "active.lane.mask.entry");
2945 LaneMaskPhi->insertAfter(CanonicalIVPHI);
2950 Builder.setInsertPoint(OriginalTerminator);
2951 auto *InLoopIncrement = Builder.createOverflowingOp(
2953 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2955 {InLoopIncrement, TC, ALMMultiplier},
DL,
2956 "active.lane.mask.next");
2961 auto *NotMask = Builder.createNot(ALM,
DL);
2968 bool UseActiveLaneMaskForControlFlow) {
2970 auto *FoundWidenCanonicalIVUser =
find_if(
2972 assert(FoundWidenCanonicalIVUser &&
2973 "Must have widened canonical IV when tail folding!");
2975 auto *WideCanonicalIV =
2978 if (UseActiveLaneMaskForControlFlow) {
2987 nullptr,
"active.lane.mask");
3003 template <
typename OpTy>
bool match(OpTy *V)
const {
3014template <
typename Op0_t,
typename Op1_t>
3033 VPValue *Addr, *Mask, *EndPtr;
3036 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
3038 EVLEndPtr->insertBefore(&CurRecipe);
3039 EVLEndPtr->setOperand(1, &EVL);
3043 if (
match(&CurRecipe,
3057 LoadR->insertBefore(&CurRecipe);
3059 Intrinsic::experimental_vp_reverse, {LoadR, Plan->
getTrue(), &EVL},
3068 StoredVal, EVL, Mask);
3070 if (
match(&CurRecipe,
3076 Intrinsic::experimental_vp_reverse,
3077 {ReversedVal, Plan->
getTrue(), &EVL},
3081 AdjustEndPtr(EndPtr), NewReverse, EVL,
3086 if (Rdx->isConditional() &&
3091 if (Interleave->getMask() &&
3096 if (
match(&CurRecipe,
3105 Intrinsic::vp_merge, {Mask,
LHS,
RHS, &EVL},
3124 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3129 HeaderMask = R.getVPSingleValue();
3141 NewR->insertBefore(R);
3142 for (
auto [Old, New] :
3143 zip_equal(R->definedValues(), NewR->definedValues()))
3144 Old->replaceAllUsesWith(New);
3151 R->eraseFromParent();
3168 "User of VF that we can't transform to EVL.");
3174 [&LoopRegion, &Plan](
VPUser *U) {
3176 m_c_Add(m_Specific(LoopRegion->getCanonicalIV()),
3177 m_Specific(&Plan.getVFxUF()))) ||
3178 isa<VPWidenPointerInductionRecipe>(U);
3180 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3181 "increment of the canonical induction.");
3197 MaxEVL = Builder.createScalarZExtOrTrunc(
3201 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3202 VPValue *PrevEVL = Builder.createScalarPhi(
3216 Intrinsic::experimental_vp_splice,
3217 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3221 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3238 VPValue *EVLMask = Builder.createICmp(
3299 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3307 VPValue *StartV = CanonicalIVPHI->getStartValue();
3310 auto *CurrentIteration =
3312 CurrentIteration->insertAfter(CanonicalIVPHI);
3313 VPBuilder Builder(Header, Header->getFirstNonPhi());
3316 VPPhi *AVLPhi = Builder.createScalarPhi(
3320 if (MaxSafeElements) {
3330 auto *CanonicalIVIncrement =
3332 Builder.setInsertPoint(CanonicalIVIncrement);
3336 OpVPEVL = Builder.createScalarZExtOrTrunc(
3337 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3339 auto *NextIter = Builder.createAdd(OpVPEVL, CurrentIteration,
3340 CanonicalIVIncrement->getDebugLoc(),
3341 "current.iteration.next",
3342 {CanonicalIVIncrement->hasNoUnsignedWrap(),
3343 CanonicalIVIncrement->hasNoSignedWrap()});
3344 CurrentIteration->addOperand(NextIter);
3348 "avl.next", {
true,
false});
3356 CanonicalIVPHI->replaceAllUsesWith(CurrentIteration);
3357 CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
3371 assert(!CurrentIteration &&
3372 "Found multiple CurrentIteration. Only one expected");
3373 CurrentIteration = PhiR;
3377 if (!CurrentIteration)
3388 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3394 VPValue *Backedge = CanonicalIV->getIncomingValue(1);
3397 "Unexpected canonical iv");
3403 CanonicalIV->eraseFromParent();
3423 if (!
match(EVLPhi->getBackedgeValue(),
3433 [[maybe_unused]]
bool FoundAVLNext =
3436 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3448 "Expected BranchOnCond with ICmp comparing CanIV increment with vector "
3453 LatchBr->setOperand(
3464 return R->getRegion() ||
3468 for (
const SCEV *Stride : StridesMap.
values()) {
3471 const APInt *StrideConst;
3494 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3501 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3504 if (NewSCEV != ScevExpr) {
3506 ExpSCEV->replaceAllUsesWith(NewExp);
3515 const std::function<
bool(
BasicBlock *)> &BlockNeedsPredication) {
3519 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3524 while (!Worklist.
empty()) {
3527 if (!Visited.
insert(CurRec).second)
3549 RecWithFlags->isDisjoint()) {
3552 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3553 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3554 RecWithFlags->replaceAllUsesWith(New);
3555 RecWithFlags->eraseFromParent();
3558 RecWithFlags->dropPoisonGeneratingFlags();
3563 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3564 "found instruction with poison generating flags not covered by "
3565 "VPRecipeWithIRFlags");
3570 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3582 Instruction &UnderlyingInstr = WidenRec->getIngredient();
3583 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3584 if (AddrDef && WidenRec->isConsecutive() &&
3585 BlockNeedsPredication(UnderlyingInstr.
getParent()))
3586 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3588 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3592 InterleaveRec->getInterleaveGroup();
3593 bool NeedPredication =
false;
3595 I < NumMembers; ++
I) {
3598 NeedPredication |= BlockNeedsPredication(Member->getParent());
3601 if (NeedPredication)
3602 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3614 if (InterleaveGroups.empty())
3621 for (
const auto *IG : InterleaveGroups) {
3627 StoredValues.
push_back(StoreR->getStoredValue());
3628 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3635 StoredValues.
push_back(StoreR->getStoredValue());
3639 bool NeedsMaskForGaps =
3640 (IG->requiresScalarEpilogue() && !ScalarEpilogueAllowed) ||
3641 (!StoredValues.
empty() && !IG->isFull());
3653 VPValue *Addr = Start->getAddr();
3662 assert(IG->getIndex(IRInsertPos) != 0 &&
3663 "index of insert position shouldn't be zero");
3667 IG->getIndex(IRInsertPos),
3671 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3677 if (IG->isReverse()) {
3680 -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
3681 ReversePtr->insertBefore(InsertPos);
3685 InsertPos->getMask(), NeedsMaskForGaps,
3686 InterleaveMD, InsertPos->getDebugLoc());
3687 VPIG->insertBefore(InsertPos);
3690 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3693 if (!Member->getType()->isVoidTy()) {
3752 AddOp = Instruction::Add;
3753 MulOp = Instruction::Mul;
3755 AddOp =
ID.getInductionOpcode();
3756 MulOp = Instruction::FMul;
3764 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3765 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3767 Flags.dropPoisonGeneratingFlags();
3776 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3781 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3782 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3788 WidePHI->insertBefore(WidenIVR);
3799 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3803 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3806 VF = Builder.createScalarZExtOrTrunc(VF, StepTy,
3809 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3816 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3819 WidePHI->addOperand(
Next);
3847 VPlan *Plan = R->getParent()->getPlan();
3848 VPValue *Start = R->getStartValue();
3849 VPValue *Step = R->getStepValue();
3850 VPValue *VF = R->getVFValue();
3852 assert(R->getInductionDescriptor().getKind() ==
3854 "Not a pointer induction according to InductionDescriptor!");
3857 "Recipe should have been replaced");
3863 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3867 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3870 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3872 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3873 R->replaceAllUsesWith(PtrAdd);
3878 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, TypeInfo.
inferScalarType(VF),
3880 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3883 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3892 if (!R->isReplicator())
3896 R->dissolveToCFGLoop();
3917 assert(Br->getNumOperands() == 2 &&
3918 "BranchOnTwoConds must have exactly 2 conditions");
3922 assert(Successors.size() == 3 &&
3923 "BranchOnTwoConds must have exactly 3 successors");
3928 VPValue *Cond0 = Br->getOperand(0);
3929 VPValue *Cond1 = Br->getOperand(1);
3934 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
3947 Br->eraseFromParent();
3970 WidenIVR->replaceAllUsesWith(PtrAdd);
3983 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
3984 Select = Builder.createSelect(Blend->getMask(
I),
3985 Blend->getIncomingValue(
I),
Select,
3986 R.getDebugLoc(),
"predphi", *Blend);
3987 Blend->replaceAllUsesWith(
Select);
3992 if (!VEPR->getOffset()) {
3994 "Expected unroller to have materialized offset for UF != 1");
3995 VEPR->materializeOffset();
4010 for (
VPValue *
Op : LastActiveL->operands()) {
4011 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4016 VPValue *FirstInactiveLane = Builder.createNaryOp(
4018 LastActiveL->getDebugLoc(),
"first.inactive.lane");
4023 Builder.createSub(FirstInactiveLane, One,
4024 LastActiveL->getDebugLoc(),
"last.active.lane");
4034 assert(VPI->isMasked() &&
4035 "Unmasked MaskedCond should be simplified earlier");
4036 VPI->replaceAllUsesWith(Builder.createNaryOp(
4046 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4049 ToRemove.push_back(BranchOnCountInst);
4064 ? Instruction::UIToFP
4065 : Instruction::Trunc;
4066 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4072 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4078 MulOpc = Instruction::FMul;
4079 Flags = VPI->getFastMathFlags();
4081 MulOpc = Instruction::Mul;
4086 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4088 VPI->replaceAllUsesWith(VectorStep);
4094 R->eraseFromParent();
4101 struct EarlyExitInfo {
4112 if (Pred == MiddleVPBB)
4117 VPValue *CondOfEarlyExitingVPBB;
4118 [[maybe_unused]]
bool Matched =
4119 match(EarlyExitingVPBB->getTerminator(),
4121 assert(Matched &&
"Terminator must be BranchOnCond");
4125 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4126 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4128 TrueSucc == ExitBlock
4129 ? CondOfEarlyExitingVPBB
4130 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4136 "exit condition must dominate the latch");
4145 assert(!Exits.
empty() &&
"must have at least one early exit");
4152 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4154 llvm::sort(Exits, [&RPOIdx](
const EarlyExitInfo &
A,
const EarlyExitInfo &
B) {
4155 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4161 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4162 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4164 Exits[
I].EarlyExitingVPBB) &&
4165 "RPO sort must place dominating exits before dominated ones");
4171 VPValue *Combined = Exits[0].CondToExit;
4172 for (
const EarlyExitInfo &Info :
drop_begin(Exits))
4173 Combined = Builder.createLogicalOr(Combined, Info.CondToExit);
4180 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4184 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4192 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4226 for (
auto [Exit, VectorEarlyExitVPBB] :
4227 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4228 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4240 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4241 VPValue *NewIncoming = IncomingVal;
4243 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4248 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4249 ExitIRI->addOperand(NewIncoming);
4252 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4286 bool IsLastDispatch = (
I + 2 == Exits.
size());
4288 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4294 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4297 CurrentBB = FalseBB;
4304 "Unexpected terminator");
4305 auto *IsLatchExitTaken =
4307 LatchExitingBranch->getOperand(1));
4309 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4310 LatchExitingBranch->eraseFromParent();
4311 Builder.setInsertPoint(LatchVPBB);
4313 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4315 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4326 Type *RedTy = Ctx.Types.inferScalarType(Red);
4327 VPValue *VecOp = Red->getVecOp();
4330 auto IsExtendedRedValidAndClampRange =
4342 if (Red->isPartialReduction()) {
4347 ExtRedCost = Ctx.TTI.getPartialReductionCost(
4348 Opcode, SrcTy,
nullptr, RedTy, VF, ExtKind,
4351 ? std::optional{Red->getFastMathFlags()}
4355 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4356 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4357 Red->getFastMathFlags(),
CostKind);
4359 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4369 IsExtendedRedValidAndClampRange(
4372 Ctx.Types.inferScalarType(
A)))
4391 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4392 Opcode != Instruction::FAdd)
4395 Type *RedTy = Ctx.Types.inferScalarType(Red);
4398 auto IsMulAccValidAndClampRange =
4405 Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
4408 if (Red->isPartialReduction()) {
4410 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
4413 MulAccCost = Ctx.TTI.getPartialReductionCost(
4414 Opcode, SrcTy, SrcTy2, RedTy, VF,
4423 ? std::optional{Red->getFastMathFlags()}
4429 (Ext0->getOpcode() != Ext1->getOpcode() ||
4430 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4434 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4436 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4444 ExtCost += Ext0->computeCost(VF, Ctx);
4446 ExtCost += Ext1->computeCost(VF, Ctx);
4448 ExtCost += OuterExt->computeCost(VF, Ctx);
4450 return MulAccCost.
isValid() &&
4451 MulAccCost < ExtCost + MulCost + RedCost;
4456 VPValue *VecOp = Red->getVecOp();
4463 assert(Opcode == Instruction::FAdd &&
4464 "MulAccumulateReduction from an FMul must accumulate into an FAdd "
4473 if (RecipeA && RecipeB &&
4474 IsMulAccValidAndClampRange(
FMul, RecipeA, RecipeB,
nullptr)) {
4496 if (!ExtA || ExtB || !
isa<VPIRValue>(ValB) || Red->isPartialReduction())
4498 Type *NarrowTy = Ctx.Types.inferScalarType(ExtA->getOperand(0));
4512 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4513 Type *WideTy = Ctx.Types.inferScalarType(ExtA);
4514 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4515 Mul->setOperand(1, ExtB);
4525 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4530 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4537 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4546 if (!Red->isPartialReduction() &&
4555 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4564 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4565 Ext0->getOpcode() == Ext1->getOpcode() &&
4566 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4568 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getResultType(),
nullptr,
4569 *Ext0, *Ext0, Ext0->getDebugLoc());
4570 NewExt0->insertBefore(Ext0);
4575 Ext->getResultType(),
nullptr, *Ext1,
4576 *Ext1, Ext1->getDebugLoc());
4579 Mul->setOperand(0, NewExt0);
4580 Mul->setOperand(1, NewExt1);
4581 Red->setOperand(1,
Mul);
4594 auto IP = std::next(Red->getIterator());
4595 auto *VPBB = Red->getParent();
4605 Red->replaceAllUsesWith(AbstractR);
4635 for (
VPValue *VPV : VPValues) {
4644 if (
User->usesScalars(VPV))
4647 HoistPoint = HoistBlock->
begin();
4651 "All users must be in the vector preheader or dominated by it");
4656 VPV->replaceUsesWithIf(Broadcast,
4657 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4658 return Broadcast != &U && !U.usesScalars(VPV);
4675 if (RepR->isPredicated() || !RepR->isSingleScalar() ||
4676 RepR->getOpcode() != Instruction::Load)
4679 VPValue *Addr = RepR->getOperand(0);
4682 if (!
Loc.AATags.Scope)
4687 if (R.mayWriteToMemory()) {
4689 if (!
Loc || !
Loc->AATags.Scope || !
Loc->AATags.NoAlias)
4697 for (
auto &[LoadRecipe, LoadLoc] : CandidateLoads) {
4701 const AAMDNodes &LoadAA = LoadLoc.AATags;
4717 return CommonMetadata;
4720template <
unsigned Opcode>
4725 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4726 "Only Load and Store opcodes supported");
4727 constexpr bool IsLoad = (Opcode == Instruction::Load);
4738 if (!RepR || RepR->getOpcode() != Opcode || !RepR->isPredicated())
4742 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
4745 RecipesByAddress[AddrSCEV].push_back(RepR);
4752 return TypeInfo.
inferScalarType(IsLoad ? Recipe : Recipe->getOperand(0));
4754 for (
auto &[Addr, Recipes] : RecipesByAddress) {
4755 if (Recipes.size() < 2)
4763 VPValue *MaskI = RecipeI->getMask();
4764 Type *TypeI = GetLoadStoreValueType(RecipeI);
4770 bool HasComplementaryMask =
false;
4775 VPValue *MaskJ = RecipeJ->getMask();
4776 Type *TypeJ = GetLoadStoreValueType(RecipeJ);
4777 if (TypeI == TypeJ) {
4787 if (HasComplementaryMask) {
4788 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4803template <
typename InstType>
4821 for (
auto &Group :
Groups) {
4841 return R->isSingleScalar() == IsSingleScalar;
4843 "all members in group must agree on IsSingleScalar");
4848 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
4849 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
4851 UnpredicatedLoad->insertBefore(EarliestLoad);
4855 Load->replaceAllUsesWith(UnpredicatedLoad);
4856 Load->eraseFromParent();
4866 if (!StoreLoc || !StoreLoc->AATags.Scope)
4872 StoresToSink.
end());
4876 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L, TypeInfo);
4890 for (
auto &Group :
Groups) {
4903 VPValue *SelectedValue = Group[0]->getOperand(0);
4906 bool IsSingleScalar = Group[0]->isSingleScalar();
4907 for (
unsigned I = 1;
I < Group.size(); ++
I) {
4908 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
4909 "all members in group must agree on IsSingleScalar");
4910 VPValue *Mask = Group[
I]->getMask();
4912 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
4921 StoreWithMinAlign->getUnderlyingInstr(),
4922 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
4923 nullptr, *LastStore, CommonMetadata);
4924 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
4928 Store->eraseFromParent();
4935 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
4936 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
4998 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5000 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5007 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5017 DefR->replaceUsesWithIf(
5018 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5020 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5034 for (
VPValue *Def : R.definedValues()) {
5047 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5049 return U->usesScalars(Def) &&
5052 if (
none_of(Def->users(), IsCandidateUnpackUser))
5059 Unpack->insertAfter(&R);
5060 Def->replaceUsesWithIf(Unpack,
5061 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5062 return IsCandidateUnpackUser(&U);
5072 bool RequiresScalarEpilogue,
5084 assert(StepR->getParent() == VectorPHVPBB &&
5085 "Step must be defined in VectorPHVPBB");
5087 InsertPt = std::next(StepR->getIterator());
5089 VPBuilder Builder(VectorPHVPBB, InsertPt);
5097 if (TailByMasking) {
5098 TC = Builder.createAdd(
5109 Builder.createNaryOp(Instruction::URem, {TC, Step},
5118 if (RequiresScalarEpilogue) {
5120 "requiring scalar epilogue is not supported with fail folding");
5123 R = Builder.createSelect(IsZero, Step, R);
5145 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5152 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5156 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5160 VPValue *MulByUF = Builder.createOverflowingOp(
5172 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5180 const SCEV *Expr = ExpSCEV->getSCEV();
5183 ExpandedSCEVs[ExpSCEV->getSCEV()] = Res;
5188 ExpSCEV->eraseFromParent();
5191 "VPExpandSCEVRecipes must be at the beginning of the entry block, "
5192 "before any VPIRInstructions");
5195 auto EI = Entry->begin();
5205 return ExpandedSCEVs;
5221 return Member0Op == OpV;
5223 return !W->getMask() && W->isConsecutive() && Member0Op == OpV;
5225 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5242 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5245 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5251 const auto &[
OpIdx, OpV] =
P;
5266 if (!InterleaveR || InterleaveR->
getMask())
5267 return std::nullopt;
5269 Type *GroupElementTy =
nullptr;
5273 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5274 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5276 return std::nullopt;
5281 [&TypeInfo, GroupElementTy](
VPValue *
Op) {
5282 return TypeInfo.inferScalarType(Op) == GroupElementTy;
5284 return std::nullopt;
5288 if (IG->getFactor() != IG->getNumMembers())
5289 return std::nullopt;
5295 assert(
Size.isScalable() == VF.isScalable() &&
5296 "if Size is scalable, VF must be scalable and vice versa");
5297 return Size.getKnownMinValue();
5301 unsigned MinVal = VF.getKnownMinValue();
5303 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5306 return std::nullopt;
5314 return RepR && RepR->isSingleScalar();
5321 auto *R = V->getDefiningRecipe();
5330 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx)
5331 WideMember0->setOperand(
5340 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5342 *LI, LoadGroup->getAddr(), LoadGroup->getMask(),
true,
5343 false, {}, LoadGroup->getDebugLoc());
5344 L->insertBefore(LoadGroup);
5350 assert(RepR->isSingleScalar() &&
5352 "must be a single scalar load");
5353 NarrowedOps.
insert(RepR);
5358 VPValue *PtrOp = WideLoad->getAddr();
5360 PtrOp = VecPtr->getOperand(0);
5365 nullptr, {}, *WideLoad);
5366 N->insertBefore(WideLoad);
5371std::unique_ptr<VPlan>
5391 "unexpected branch-on-count");
5395 std::optional<ElementCount> VFToOptimize;
5412 if (R.mayWriteToMemory() && !InterleaveR)
5427 std::optional<ElementCount> NarrowedVF =
5429 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5431 VFToOptimize = NarrowedVF;
5434 if (InterleaveR->getStoredValues().empty())
5439 auto *Member0 = InterleaveR->getStoredValues()[0];
5449 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5452 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5453 return IR && IR->getInterleaveGroup()->isFull() &&
5454 IR->getVPValue(Op.index()) == Op.value();
5467 if (StoreGroups.
empty())
5471 bool RequiresScalarEpilogue =
5482 std::unique_ptr<VPlan> NewPlan;
5484 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5485 Plan.
setVF(*VFToOptimize);
5486 NewPlan->removeVF(*VFToOptimize);
5492 for (
auto *StoreGroup : StoreGroups) {
5498 *
SI, StoreGroup->getAddr(), Res,
nullptr,
true,
5499 false, {}, StoreGroup->getDebugLoc());
5500 S->insertBefore(StoreGroup);
5501 StoreGroup->eraseFromParent();
5513 if (VFToOptimize->isScalable()) {
5526 RequiresScalarEpilogue, Step);
5528 Inc->setOperand(1, Step);
5534 "All VPVectorPointerRecipes should have been removed");
5550 "must have a BranchOnCond");
5553 if (VF.
isScalable() && VScaleForTuning.has_value())
5554 VectorStep *= *VScaleForTuning;
5555 assert(VectorStep > 0 &&
"trip count should not be zero");
5559 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5566 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->getFirstNonPhi());
5578 "Cannot handle loops with uncountable early exits");
5651 make_range(MiddleVPBB->getFirstNonPhi(), MiddleVPBB->end()))) {
5665 "vector.recur.extract.for.phi");
5683 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
bool UseMax,
5684 bool Signed) -> std::optional<APInt> {
5695 return std::nullopt;
5703 PhiR->getRecurrenceKind()))
5712 VPValue *BackedgeVal = PhiR->getBackedgeValue();
5713 VPValue *CondSelect = BackedgeVal;
5728 VPValue *
IV = TrueVal == PhiR ? FalseVal : TrueVal;
5742 bool UseSigned =
true;
5743 std::optional<APInt> SentinelVal =
5744 CheckSentinel(IVSCEV, UseMax,
true);
5746 SentinelVal = CheckSentinel(IVSCEV, UseMax,
false);
5755 if (AR->hasNoSignedWrap())
5757 else if (AR->hasNoUnsignedWrap())
5770 VPIRFlags Flags(MinMaxKind,
false,
false,
5779 VPValue *StartVPV = PhiR->getStartValue();
5787 MiddleBuilder.
createSelect(Cmp, ReducedIV, StartVPV, ExitDL);
5796 AnyOfPhi->insertAfter(PhiR);
5800 if (TrueVal == PhiR)
5807 {StartVPV, ReducedIV, OrVal}, {}, ExitDL);
5819 *CondSelect,
RdxUnordered{1}, {}, PhiR->hasUsesOutsideReductionChain());
5820 NewPhiR->insertBefore(PhiR);
5821 PhiR->replaceAllUsesWith(NewPhiR);
5822 PhiR->eraseFromParent();
5831struct VPPartialReductionChain {
5840 unsigned ScaleFactor;
5863 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
5866 BinOp->
setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
5878 if (!
Mul->hasOneUse() ||
5879 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
5880 MulLHS->getOpcode() != MulRHS->getOpcode())
5883 Mul->setOperand(0, Builder.createWidenCast(MulLHS->getOpcode(),
5884 MulLHS->getOperand(0),
5885 Ext->getResultType()));
5886 Mul->setOperand(1, MulLHS == MulRHS
5887 ?
Mul->getOperand(0)
5888 : Builder.createWidenCast(MulRHS->getOpcode(),
5889 MulRHS->getOperand(0),
5890 Ext->getResultType()));
5899static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
5928 if (WidenRecipe->
getOpcode() == Instruction::Sub &&
5938 Builder.insert(NegRecipe);
5943 BinOp = optimizeExtendsForPartialReduction(BinOp, TypeInfo);
5953 assert((!ExitValue || IsLastInChain) &&
5954 "if we found ExitValue, it must match RdxPhi's backedge value");
5965 PartialRed->insertBefore(WidenRecipe);
5982 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
5983 StartInst->setOperand(2, NewScaleFactor);
5991 VPValue *OldStartValue = StartInst->getOperand(0);
5992 StartInst->setOperand(0, StartInst->getOperand(1));
5996 assert(RdxResult &&
"Could not find reduction result");
5999 constexpr unsigned SubOpc = Instruction::BinaryOps::Sub;
6005 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6011static bool isValidPartialReduction(
const VPPartialReductionChain &Chain,
6015 -> std::pair<Type *, TargetTransformInfo::PartialReductionExtendKind> {
6021 return {ExtOpType, ExtKind};
6023 auto ExtInfoA = GetExtInfo(Chain.ExtendA);
6024 auto ExtInfoB = GetExtInfo(Chain.ExtendB);
6025 Type *ExtOpTypeA = ExtInfoA.first;
6026 Type *ExtOpTypeB = ExtInfoB.first;
6027 auto ExtKindA = ExtInfoA.second;
6028 auto ExtKindB = ExtInfoB.second;
6032 if (!Chain.ExtendB && Chain.BinOp && Chain.BinOp != Chain.ReductionBinOp) {
6040 ExtOpTypeB = ExtOpTypeA;
6041 ExtKindB = ExtKindA;
6044 std::optional<unsigned> BinOpc =
6045 (Chain.BinOp && Chain.BinOp != Chain.ReductionBinOp)
6046 ? std::make_optional(Chain.BinOp->
getOpcode())
6053 WidenRecipe->
getOpcode(), ExtOpTypeA, ExtOpTypeB, PhiType, VF,
6054 ExtKindA, ExtKindB, BinOpc, CostCtx.
CostKind,
6056 ? std::optional{WidenRecipe->getFastMathFlags()}
6076 VPValue *PhiOp = UpdateR->getOperand(1);
6085 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6093 Op = CastRecipe->getOperand(0);
6099 if (getScaledReductions(RedPhiR,
Op, Chains, CostCtx,
Range)) {
6100 Op = UpdateR->getOperand(0);
6101 PhiOp = UpdateR->getOperand(1);
6102 if (
Op == Chains.
rbegin()->ReductionBinOp)
6105 "PhiOp must be the chain value");
6108 "Unexpected type for chain values");
6109 }
else if (RedPhiR != PhiOp) {
6122 auto MatchExtends = [OuterExtKind,
6124 assert(Operands.size() <= 2 &&
"expected at most 2 operands");
6126 for (
const auto &[
I, OpVal] :
enumerate(Operands)) {
6130 if (
I > 0 && CastRecipes[0] &&
match(OpVal,
m_APInt(Unused)))
6139 if (!CastRecipes[
I])
6150 return CastRecipes[0] !=
nullptr;
6171 if (!MatchExtends({
Op}))
6187 VPPartialReductionChain Chain(
6188 {UpdateR, CastRecipes[0], CastRecipes[1], BinOp,
6190 if (!isValidPartialReduction(Chain, PhiType, CostCtx,
Range))
6219 getScaledReductions(RedPhiR, ExitValue, ChainsByPhi[RedPhiR], CostCtx,
6224 if (ChainsByPhi.
empty())
6231 for (
const auto &[
_, Chains] : ChainsByPhi)
6232 for (
const VPPartialReductionChain &Chain : Chains) {
6233 PartialReductionOps.
insert(Chain.BinOp);
6234 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6242 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6251 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6252 for (
const VPPartialReductionChain &Chain : Chains) {
6253 if (!ExtendUsersValid(Chain.ExtendA) ||
6254 !ExtendUsersValid(Chain.ExtendB)) {
6258 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6260 return PhiR == RedPhiR;
6262 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6268 if (!
all_of(Chain.ReductionBinOp->
users(), UseIsValid)) {
6277 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6278 return RepR && isa<StoreInst>(RepR->getUnderlyingInstr());
6287 for (
auto &[Phi, Chains] : ChainsByPhi)
6288 for (
const VPPartialReductionChain &Chain : Chains)
6289 transformToPartialReduction(Chain, CostCtx.
Types, Plan, Phi);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
ReachingDefInfo InstSet & ToRemove
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static bool mergeBlocksIntoPredecessors(Loop &L, DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU, ScalarEvolution &SE)
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L, VPTypeAnalysis &TypeInfo)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
unsigned getActiveBits() const
Compute the number of active bits in the value.
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
@ NoAlias
The two locations do not alias at all.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
back - Get the last element.
const T & front() const
front - Get the first element.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
Utility class for floating point operations which can have information about relaxed accuracy require...
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
uint32_t getNumMembers() const
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(const SCEV *SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getUDivExpr(const SCEV *LHS, const SCEV *RHS)
Get a canonical unsigned division expression, or something simpler if possible.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, const SCEV *LHS, const SCEV *RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias)
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
reverse_iterator rbegin()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPRegionBlock * getEnclosingLoopRegion()
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksOnly(const T &Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPValue * createElementCount(Type *Ty, ElementCount EC)
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPInstruction * createOverflowingOp(unsigned Opcode, ArrayRef< VPValue * > Operands, VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step, const Twine &Name="")
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
Canonical scalar induction phi of the vector loop.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A special type of VPBasicBlock that wraps an existing IR basic block.
BasicBlock * getIRBasicBlock() const
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ComputeAnyOfResult
Compute the final result of a AnyOf reduction with select(cmp(),x,y), where one of (x,...
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ FirstOrderRecurrenceSplice
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPRegionBlock * getRegion()
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPRecipeBase * getRecipe(Instruction *I)
Return the recipe created for given ingredient.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
Type * getCanonicalIVType()
Return the type of the canonical IV for loop regions.
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
VPCanonicalIVPHIRecipe * getCanonicalIV()
Returns the canonical induction recipe of the region.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDef is a base class for recipes for modeling a sequence of one or more output IR that define ...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
An analysis for type-inference for VPValues.
LLVMContext & getContext()
Return the LLVMContext used by the analysis.
Type * inferScalarType(const VPValue *V)
Infer the type of V. Returns the scalar type of V.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
void addOperand(VPValue *Operand)
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A Recipe for widening the canonical induction variable of the vector loop.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
A common base class for widening memory operations.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPValue & getVF()
Returns the VF of the vector loop region.
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPValue & getUF()
Returns the UF of the vector loop region.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPIRBasicBlock * getScalarHeader() const
Return the VPIRBasicBlock wrapping the header of the scalar loop.
VPBasicBlock * getVectorPreheader()
Returns the preheader of the vector loop region, if one exists, or null otherwise.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
bool match(const SCEV *S, const Pattern &P)
class_match< const SCEV > m_SCEV()
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
GEPLikeRecipe_match< Op0_t, Op1_t > m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPDerivedIV_match< Op0_t, Op1_t, Op2_t > m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
bind_ty< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
specific_intval< 1 > m_True()
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
class_match< VPValue > m_VPValue()
Match an arbitrary VPValue and ignore it.
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
bind_ty< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
bool isUniformAcrossVFsAndUFs(VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) TODO: Int...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
iterator_range< po_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_post_order_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order while traversing through ...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
iterator_range< po_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_post_order_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in post order.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
@ Default
The result values are uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
MDNode * Scope
The tag for alias scope specification (used with noalias).
MDNode * NoAlias
The tag specifying the noalias scope.
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
This reduction is unordered with the partial result scaled down by some factor.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
TargetTransformInfo::TargetCostKind CostKind
const TargetTransformInfo & TTI
A recipe for handling first-order recurrence phis.
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...