58 if (!VPBB->getParent())
61 auto EndIter = Term ? Term->getIterator() : VPBB->end();
66 VPValue *VPV = Ingredient.getVPSingleValue();
82 *Load, Ingredient.getOperand(0),
nullptr ,
83 false , *VPI, Ingredient.getDebugLoc());
86 *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
87 nullptr ,
false , *VPI,
88 Ingredient.getDebugLoc());
91 Ingredient.operands(), *VPI,
92 Ingredient.getDebugLoc(),
GEP);
104 if (VectorID == Intrinsic::experimental_noalias_scope_decl)
109 if (VectorID == Intrinsic::assume ||
110 VectorID == Intrinsic::lifetime_end ||
111 VectorID == Intrinsic::lifetime_start ||
112 VectorID == Intrinsic::sideeffect ||
113 VectorID == Intrinsic::pseudoprobe) {
118 const bool IsSingleScalar = VectorID != Intrinsic::assume &&
119 VectorID != Intrinsic::pseudoprobe;
123 Ingredient.getDebugLoc());
126 *CI, VectorID,
drop_end(Ingredient.operands()), CI->getType(),
127 VPIRFlags(*CI), *VPI, CI->getDebugLoc());
131 CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI,
135 *VPI, Ingredient.getDebugLoc());
139 "inductions must be created earlier");
148 "Only recpies with zero or one defined values expected");
149 Ingredient.eraseFromParent();
165 if (
A->getOpcode() != Instruction::Store ||
166 B->getOpcode() != Instruction::Store)
176 const APInt *Distance;
182 Type *TyA =
A->getOperand(0)->getScalarType();
184 Type *TyB =
B->getOperand(0)->getScalarType();
190 uint64_t MaxStoreSize = std::max(SizeA, SizeB);
192 auto VFs =
B->getParent()->getPlan()->vectorFactors();
196 return Distance->
abs().
uge(
204 : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), PSE(PSE),
212 return ExcludeRecipes.contains(&R) ||
213 (Store && isNoAliasViaDistance(Store, &GroupLeader));
226 std::optional<SinkStoreInfo> SinkInfo = {}) {
227 bool CheckReads = SinkInfo.has_value();
234 if (SinkInfo && SinkInfo->shouldSkip(R))
238 if (!
R.mayWriteToMemory() && !(CheckReads &&
R.mayReadFromMemory()))
263template <
unsigned Opcode>
268 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
269 "Only Load and Store opcodes supported");
270 constexpr bool IsLoad = (Opcode == Instruction::Load);
273 RecipesByAddressAndType;
278 if (!RepR || RepR->getOpcode() != Opcode || !FilterFn(RepR))
282 VPValue *Addr = RepR->getOperand(IsLoad ? 0 : 1);
286 RecipesByAddressAndType[{AddrSCEV, LoadStoreTy}].push_back(RepR);
291 for (
auto &Group :
Groups) {
306 auto InsertIfValidSinkCandidate = [ScalarVFOnly, &WorkList](
318 if (Candidate->getParent() == SinkTo ||
323 if (!ScalarVFOnly && RepR->isSingleScalar())
326 WorkList.
insert({SinkTo, Candidate});
338 for (
auto &Recipe : *VPBB)
340 InsertIfValidSinkCandidate(VPBB,
Op);
344 for (
unsigned I = 0;
I != WorkList.
size(); ++
I) {
347 std::tie(SinkTo, SinkCandidate) = WorkList[
I];
352 auto UsersOutsideSinkTo =
354 return cast<VPRecipeBase>(U)->getParent() != SinkTo;
356 if (
any_of(UsersOutsideSinkTo, [SinkCandidate](
VPUser *U) {
357 return !U->usesFirstLaneOnly(SinkCandidate);
360 bool NeedsDuplicating = !UsersOutsideSinkTo.empty();
362 if (NeedsDuplicating) {
366 if (
auto *SinkCandidateRepR =
372 nullptr , *SinkCandidateRepR,
376 Clone = SinkCandidate->
clone();
386 InsertIfValidSinkCandidate(SinkTo,
Op);
396 if (!EntryBB || EntryBB->size() != 1 ||
406 if (EntryBB->getNumSuccessors() != 2)
411 if (!Succ0 || !Succ1)
414 if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
416 if (Succ0->getSingleSuccessor() == Succ1)
418 if (Succ1->getSingleSuccessor() == Succ0)
435 if (!Region1->isReplicator())
437 auto *MiddleBasicBlock =
439 if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
444 if (!Region2 || !Region2->isReplicator())
449 if (!Mask1 || Mask1 != Mask2)
452 assert(Mask1 && Mask2 &&
"both region must have conditions");
458 if (TransformedRegions.
contains(Region1))
465 if (!Then1 || !Then2)
485 VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
491 if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
492 Phi1ToMove.eraseFromParent();
495 Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
509 TransformedRegions.
insert(Region1);
512 return !TransformedRegions.
empty();
520 std::string RegionName = (
Twine(
"pred.") + Instr->getOpcodeName()).str();
521 assert(Instr->getParent() &&
"Predicated instruction not in any basic block");
522 auto *BlockInMask = PredRecipe->
getMask();
543 Region->setParent(ParentRegion);
549 RecipeWithoutMask->getDebugLoc());
550 Exiting->appendRecipe(PHIRecipe);
563 if (RepR->isPredicated())
582 if (ParentRegion && ParentRegion->
getExiting() == CurrentBlock)
594 if (!VPBB->getParent())
598 if (!PredVPBB || PredVPBB->getNumSuccessors() != 1 ||
607 R.moveBefore(*PredVPBB, PredVPBB->
end());
609 auto *ParentRegion = VPBB->getParent();
610 if (ParentRegion && ParentRegion->getExiting() == VPBB)
611 ParentRegion->setExiting(PredVPBB);
615 return !WorkList.
empty();
622 bool ShouldSimplify =
true;
623 while (ShouldSimplify) {
639 if (!
IV ||
IV->getTruncInst())
654 for (
auto *U : FindMyCast->
users()) {
656 if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
657 FoundUserCast = UserCast;
664 FindMyCast = FoundUserCast;
666 if (FindMyCast !=
IV)
681 Builder.createDerivedIV(Kind, FPBinOp, StartV, CanonicalIV, Step);
690 BaseIV = Builder.createScalarCast(Instruction::Trunc, BaseIV, TruncTy,
DL);
696 if (ResultTy != StepTy) {
703 Builder.setInsertPoint(VecPreheader);
704 Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy,
DL);
706 return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
732 WideCanIV->getDebugLoc(), Builder));
733 WideCanIV->eraseFromParent();
750 WideCanIV->replaceAllUsesWith(WidenIV);
751 WideCanIV->eraseFromParent();
760 if (PHICost > BroadcastCost)
769 unsigned RegClass =
TTI.getRegisterClassForType(
true, VecTy);
781 WideCanIV->getNoWrapFlags(), WideCanIV->getDebugLoc());
782 NewWideIV->insertBefore(&*Header->getFirstNonPhi());
783 WideCanIV->replaceAllUsesWith(NewWideIV);
784 WideCanIV->eraseFromParent();
792 bool IsConditionalAssume = RepR && RepR->isPredicated() &&
794 if (IsConditionalAssume)
797 if (R.mayHaveSideEffects())
801 return all_of(R.definedValues(),
802 [](
VPValue *V) { return V->getNumUsers() == 0; });
822 VPUser *PhiUser = PhiR->getSingleUser();
828 PhiR->replaceAllUsesWith(Start);
829 PhiR->eraseFromParent();
837 for (
unsigned I = 0;
I !=
Users.size(); ++
I) {
840 Users.insert_range(V->users());
842 return Users.takeVector();
856 nullptr, StartV, StepV, PtrIV->
getDebugLoc(), Builder);
893 Def->getNumUsers() == 0 || !Def->getUnderlyingValue() ||
894 (RepR && (RepR->isSingleScalar() || RepR->isPredicated())))
907 Def->operands(),
true,
909 Clone->insertAfter(Def);
910 Def->replaceAllUsesWith(Clone);
921 PtrIV->replaceAllUsesWith(PtrAdd);
928 if (HasOnlyVectorVFs &&
none_of(WideIV->users(), [WideIV](
VPUser *U) {
929 return U->usesScalars(WideIV);
935 Plan,
ID.getKind(),
ID.getInductionOpcode(),
937 WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
938 WideIV->getDebugLoc(), Builder);
941 if (!HasOnlyVectorVFs) {
943 "plans containing a scalar VF cannot also include scalable VFs");
944 WideIV->replaceAllUsesWith(Steps);
947 WideIV->replaceUsesWithIf(Steps,
948 [WideIV, HasScalableVF](
VPUser &U,
unsigned) {
950 return U.usesFirstLaneOnly(WideIV);
951 return U.usesScalars(WideIV);
967 return (IntOrFpIV && IntOrFpIV->getTruncInst()) ? nullptr : WideIV;
972 if (!Def || Def->getNumOperands() != 2)
980 auto IsWideIVInc = [&]() {
981 auto &
ID = WideIV->getInductionDescriptor();
984 VPValue *IVStep = WideIV->getStepValue();
985 switch (
ID.getInductionOpcode()) {
986 case Instruction::Add:
988 case Instruction::FAdd:
990 case Instruction::FSub:
993 case Instruction::Sub: {
1013 return IsWideIVInc() ? WideIV :
nullptr;
1030 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1041 VPValue *FirstActiveLane =
B.createFirstActiveLane(Mask,
DL);
1042 FirstActiveLane =
B.createScalarZExtOrTrunc(
1043 FirstActiveLane, CanonicalIVType, FirstActiveLane->
getScalarType(),
DL);
1044 VPValue *EndValue =
B.createAdd(CanonicalIV, FirstActiveLane,
DL);
1049 if (Incoming != WideIV) {
1051 EndValue =
B.createAdd(EndValue, One,
DL);
1056 VPIRValue *Start = WideIV->getStartValue();
1057 VPValue *Step = WideIV->getStepValue();
1058 EndValue =
B.createDerivedIV(
1060 Start, EndValue, Step);
1074 if (WideIntOrFp && WideIntOrFp->getTruncInst())
1084 Start, VectorTC, Step);
1114 assert(EndValue &&
"Must have computed the end value up front");
1119 if (Incoming != WideIV)
1131 auto *Zero = Plan.
getZero(StepTy);
1132 return B.createPtrAdd(EndValue,
B.createSub(Zero, Step),
1137 return B.createNaryOp(
1138 ID.getInductionBinOp()->getOpcode() == Instruction::FAdd
1140 : Instruction::FAdd,
1141 {EndValue, Step}, {ID.getInductionBinOp()->getFastMathFlags()});
1152 VPBuilder VectorPHBuilder(VectorPH, VectorPH->begin());
1162 EndValues[WideIV] = EndValue;
1172 R.getVPSingleValue()->replaceAllUsesWith(EndValue);
1173 R.eraseFromParent();
1182 for (
auto [Idx, PredVPBB] :
enumerate(ExitVPBB->getPredecessors())) {
1184 if (PredVPBB == MiddleVPBB)
1186 Plan, ExitIRI->getOperand(Idx), EndValues, PSE);
1189 Plan, ExitIRI->getOperand(Idx), PSE);
1191 ExitIRI->setOperand(Idx, Escape);
1208 const auto &[V, Inserted] = SCEV2VPV.
try_emplace(ExpR->getSCEV(), ExpR);
1212 ExpR->replaceAllUsesWith(V->second);
1216 ExpR->eraseFromParent();
1225 while (!WorkList.
empty()) {
1227 if (!Seen.
insert(Cur).second)
1235 R->eraseFromParent();
1242static std::optional<std::pair<bool, unsigned>>
1245 std::optional<std::pair<bool, unsigned>>>(R)
1248 [](
auto *
I) {
return std::make_pair(
false,
I->getOpcode()); })
1250 return std::make_pair(
true,
I->getVectorIntrinsicID());
1252 .Case<VPVectorPointerRecipe, VPPredInstPHIRecipe, VPScalarIVStepsRecipe>(
1258 I->getVPRecipeID());
1260 .
Default([](
auto *) {
return std::nullopt; });
1277 Value *V =
Op->getUnderlyingValue();
1283 auto FoldToIRValue = [&]() ->
Value * {
1285 if (OpcodeOrIID->first) {
1286 if (R.getNumOperands() != 2)
1288 unsigned ID = OpcodeOrIID->second;
1289 return Folder.FoldBinaryIntrinsic(
ID,
Ops[0],
Ops[1], R.getScalarType());
1291 unsigned Opcode = OpcodeOrIID->second;
1297 R.getVPSingleValue()->getScalarType());
1300 return Folder.FoldSelect(
Ops[0],
Ops[1],
1303 return Folder.FoldBinOp(Instruction::BinaryOps::Xor,
Ops[0],
1305 case Instruction::Select:
1306 return Folder.FoldSelect(
Ops[0],
Ops[1],
Ops[2]);
1307 case Instruction::ICmp:
1308 case Instruction::FCmp:
1311 case Instruction::GetElementPtr: {
1314 return Folder.FoldGEP(
GEP->getSourceElementType(),
Ops[0],
1323 case Instruction::ExtractElement:
1330 if (
Value *V = FoldToIRValue())
1331 return R.getParent()->getPlan()->getOrAddLiveIn(V);
1337 bool CanCreateNewRecipe) {
1338 VPlan *Plan = Def->getParent()->getPlan();
1348 Def->replaceAllUsesWith(
X);
1349 Def->eraseFromParent();
1361 Def->replaceAllUsesWith(
X);
1373 Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1379 Def->replaceAllUsesWith(
X);
1385 Def->replaceAllUsesWith(Plan->
getFalse());
1391 Def->replaceAllUsesWith(
X);
1396 if (CanCreateNewRecipe &&
1401 (!Def->getOperand(0)->hasMoreThanOneUniqueUser() ||
1402 !Def->getOperand(1)->hasMoreThanOneUniqueUser())) {
1403 Def->replaceAllUsesWith(
1404 Builder.createLogicalAnd(
X, Builder.createOr(
Y, Z)));
1411 Def->replaceAllUsesWith(Def->getOperand(1));
1418 Def->replaceAllUsesWith(Builder.createLogicalAnd(
X,
Y));
1424 Def->replaceAllUsesWith(Plan->
getFalse());
1429 Def->replaceAllUsesWith(
X);
1435 if (CanCreateNewRecipe &&
1437 Def->replaceAllUsesWith(Builder.createNot(
C));
1443 Def->setOperand(0,
C);
1444 Def->setOperand(1,
Y);
1445 Def->setOperand(2,
X);
1450 if (CanCreateNewRecipe &&
1454 Y->getScalarType()->isIntegerTy(1)) {
1455 Def->replaceAllUsesWith(
1456 Builder.createOr(
Y, Builder.createLogicalAnd(
X, Z)));
1465 VPlan *Plan = Def->getParent()->getPlan();
1472 return Def->replaceAllUsesWith(V);
1478 PredPHI->replaceAllUsesWith(
Op);
1491 bool CanCreateNewRecipe =
1496 Type *TruncTy = Def->getScalarType();
1497 Type *ATy =
A->getScalarType();
1498 if (TruncTy == ATy) {
1499 Def->replaceAllUsesWith(
A);
1508 : Instruction::ZExt;
1511 if (
auto *UnderlyingExt = Def->getOperand(0)->getUnderlyingValue()) {
1513 Ext->setUnderlyingValue(UnderlyingExt);
1515 Def->replaceAllUsesWith(Ext);
1517 auto *Trunc = Builder.createWidenCast(Instruction::Trunc,
A, TruncTy);
1518 Def->replaceAllUsesWith(Trunc);
1528 return Def->replaceAllUsesWith(
A);
1531 return Def->replaceAllUsesWith(
A);
1534 return Def->replaceAllUsesWith(Plan->
getZero(Def->getScalarType()));
1540 return Def->replaceAllUsesWith(Builder.createSub(
1541 Plan->
getZero(
A->getScalarType()),
A, Def->getDebugLoc(),
"", NW));
1544 if (CanCreateNewRecipe &&
1552 ->hasNoSignedWrap()};
1553 return Def->replaceAllUsesWith(
1554 Builder.createSub(
X,
Y, Def->getDebugLoc(),
"", NW));
1560 return Def->replaceAllUsesWith(Builder.createNaryOp(
1562 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1567 return Def->replaceAllUsesWith(Builder.createNaryOp(
1569 {A, Plan->getConstantInt(APC->getBitWidth(), APC->exactLogBase2())},
1574 return Def->replaceAllUsesWith(
A);
1589 R->setOperand(1,
Y);
1590 R->setOperand(2,
X);
1594 R->replaceAllUsesWith(Cmp);
1599 if (!Cmp->getDebugLoc() && Def->getDebugLoc())
1600 Cmp->setDebugLoc(Def->getDebugLoc());
1612 if (
Op->getNumUsers() > 1 ||
1616 }
else if (!UnpairedCmp) {
1617 UnpairedCmp =
Op->getDefiningRecipe();
1621 UnpairedCmp =
nullptr;
1628 if (NewOps.
size() < Def->getNumOperands()) {
1630 return Def->replaceAllUsesWith(NewAnyOf);
1637 if (CanCreateNewRecipe &&
1643 return Def->replaceAllUsesWith(NewCmp);
1649 Def->getOperand(1)->getScalarType() == Def->getScalarType())
1650 return Def->replaceAllUsesWith(Def->getOperand(1));
1654 Type *WideStepTy = Def->getScalarType();
1655 if (
X->getScalarType() != WideStepTy)
1656 X = Builder.createWidenCast(Instruction::Trunc,
X, WideStepTy);
1657 Def->replaceAllUsesWith(
X);
1666 Def->getScalarType()->isIntegerTy(1)) {
1667 Def->setOperand(1, Def->getOperand(0));
1668 Def->setOperand(0,
Y);
1675 return Def->replaceAllUsesWith(Def->getOperand(0));
1681 Def->replaceAllUsesWith(
1682 BuildVector->getOperand(BuildVector->getNumOperands() - 1));
1687 return Def->replaceAllUsesWith(
X);
1690 return Def->replaceAllUsesWith(
A);
1693 return Def->replaceAllUsesWith(
A);
1699 Def->replaceAllUsesWith(
1700 BuildVector->getOperand(BuildVector->getNumOperands() - 2));
1707 Def->replaceAllUsesWith(BuildVector->getOperand(Idx));
1712 Def->replaceAllUsesWith(
1722 "broadcast operand must be single-scalar");
1723 Def->setOperand(0,
C);
1728 return Def->replaceUsesWithIf(
1729 X, [Def](
const VPUser &U,
unsigned) {
return U.usesScalars(Def); });
1732 if (Def->getNumOperands() == 1) {
1733 Def->replaceAllUsesWith(Def->getOperand(0));
1738 Phi->replaceAllUsesWith(Phi->getOperand(0));
1744 if (Def->getNumOperands() == 1 &&
1746 return Def->replaceAllUsesWith(IRV);
1759 return Def->replaceAllUsesWith(
A);
1766 return Def->replaceAllUsesWith(WidenIV->getRegion()->getCanonicalIV());
1769 Def->replaceAllUsesWith(Builder.createNaryOp(
1770 Instruction::ExtractElement, {A, LaneToExtract}, Def->getDebugLoc()));
1784 auto *IVInc = Def->getOperand(0);
1785 if (IVInc->getNumUsers() == 2) {
1790 if (Phi->getNumUsers() == 1 || (Phi->getNumUsers() == 2 && Inc)) {
1791 Def->replaceAllUsesWith(IVInc);
1793 Inc->replaceAllUsesWith(Phi);
1794 Phi->setOperand(0,
Y);
1810 Steps->replaceAllUsesWith(Steps->getOperand(0));
1818 Def->replaceUsesWithIf(StartV, [](
const VPUser &U,
unsigned Idx) {
1820 return PhiR && PhiR->isInLoop();
1826 return Def->replaceAllUsesWith(
A);
1852 while (!Worklist.
empty()) {
1861 R->replaceAllUsesWith(
1862 Builder.createLogicalAnd(HeaderMask, Builder.createLogicalAnd(
X,
Y)));
1866static std::optional<Instruction::BinaryOps>
1869 case Intrinsic::masked_udiv:
1870 return Instruction::UDiv;
1871 case Intrinsic::masked_sdiv:
1872 return Instruction::SDiv;
1873 case Intrinsic::masked_urem:
1874 return Instruction::URem;
1875 case Intrinsic::masked_srem:
1876 return Instruction::SRem;
1893 if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
1897 if (RepR && RepR->getOpcode() == Instruction::Store &&
1900 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1901 true ,
nullptr , *RepR ,
1902 *RepR , RepR->getDebugLoc());
1903 Clone->insertBefore(RepOrWidenR);
1905 VPValue *ExtractOp = Clone->getOperand(0);
1911 Clone->setOperand(0, ExtractOp);
1912 RepR->eraseFromParent();
1924 VPValue *SafeDivisor = Builder.createSelect(
1925 IntrR->getOperand(2), IntrR->getOperand(1),
1927 VPValue *Clone = Builder.createNaryOp(
1928 *
Opc, {IntrR->getOperand(0), SafeDivisor},
1931 IntrR->eraseFromParent();
1940 auto IntroducesBCastOf = [](
const VPValue *
Op) {
1949 return !U->usesScalars(
Op);
1953 if (
any_of(RepOrWidenR->users(), IntroducesBCastOf(RepOrWidenR)) &&
1956 make_filter_range(Op->users(), not_equal_to(RepOrWidenR)),
1957 IntroducesBCastOf(Op)))
1961 auto *IRV = dyn_cast<VPIRValue>(Op);
1962 bool LiveInNeedsBroadcast = IRV && !isa<Constant>(IRV->getValue());
1963 auto *OpR = dyn_cast<VPReplicateRecipe>(Op);
1964 return LiveInNeedsBroadcast || (OpR && OpR->isSingleScalar());
1969 RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1970 true ,
nullptr, *RepOrWidenR);
1971 Clone->insertBefore(RepOrWidenR);
1972 RepOrWidenR->replaceAllUsesWith(Clone);
1974 RepOrWidenR->eraseFromParent();
2010 if (Blend->isNormalized() || !
match(Blend->getMask(0),
m_False()))
2011 UniqueValues.
insert(Blend->getIncomingValue(0));
2012 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
2014 UniqueValues.
insert(Blend->getIncomingValue(
I));
2016 if (UniqueValues.
size() == 1) {
2017 Blend->replaceAllUsesWith(*UniqueValues.
begin());
2018 Blend->eraseFromParent();
2022 if (Blend->isNormalized())
2028 unsigned StartIndex = 0;
2029 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2034 if (Mask->getNumUsers() == 1 && !
match(Mask,
m_False())) {
2041 OperandsWithMask.
push_back(Blend->getIncomingValue(StartIndex));
2043 for (
unsigned I = 0;
I != Blend->getNumIncomingValues(); ++
I) {
2044 if (
I == StartIndex)
2046 OperandsWithMask.
push_back(Blend->getIncomingValue(
I));
2047 OperandsWithMask.
push_back(Blend->getMask(
I));
2052 OperandsWithMask, *Blend, Blend->getDebugLoc());
2053 NewBlend->insertBefore(&R);
2055 VPValue *DeadMask = Blend->getMask(StartIndex);
2057 Blend->eraseFromParent();
2062 if (NewBlend->getNumOperands() == 3 &&
2064 VPValue *Inc0 = NewBlend->getOperand(0);
2065 VPValue *Inc1 = NewBlend->getOperand(1);
2066 VPValue *OldMask = NewBlend->getOperand(2);
2067 NewBlend->setOperand(0, Inc1);
2068 NewBlend->setOperand(1, Inc0);
2069 NewBlend->setOperand(2, NewMask);
2096 APInt MaxVal = AlignedTC - 1;
2099 unsigned NewBitWidth =
2105 bool MadeChange =
false;
2130 "canonical IV is not expected to have a truncation");
2135 NewWideIV->insertBefore(WideIV);
2142 Cmp->replaceAllUsesWith(
2143 VPBuilder(Cmp).createICmp(Cmp->getPredicate(), NewWideIV, NewBTC));
2157 return any_of(
Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
2159 return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, PSE);
2173 const SCEV *VectorTripCount =
2178 "Trip count SCEV must be computable");
2199 auto *Term = &ExitingVPBB->
back();
2212 for (
unsigned Part = 0; Part < UF; ++Part) {
2218 Extracts[Part] = Ext;
2230 match(Phi->getBackedgeValue(),
2232 assert(Index &&
"Expected index from ActiveLaneMask instruction");
2249 "Expected one VPActiveLaneMaskPHIRecipe for each unroll part");
2256 "Expected incoming values of Phi to be ActiveLaneMasks");
2261 EntryALM->setOperand(2, ALMMultiplier);
2262 LoopALM->setOperand(2, ALMMultiplier);
2266 ExtractFromALM(EntryALM, EntryExtracts);
2271 ExtractFromALM(LoopALM, LoopExtracts);
2273 Not->setOperand(0, LoopExtracts[0]);
2276 for (
unsigned Part = 0; Part < UF; ++Part) {
2277 Phis[Part]->setStartValue(EntryExtracts[Part]);
2278 Phis[Part]->setBackedgeValue(LoopExtracts[Part]);
2291 auto *Term = &ExitingVPBB->
back();
2303 const SCEV *VectorTripCount =
2309 "Trip count SCEV must be computable");
2328 Term->setOperand(1, Plan.
getTrue());
2333 {}, Term->getDebugLoc());
2335 Term->eraseFromParent();
2369 R.getVPSingleValue()->replaceAllUsesWith(Trunc);
2379 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
2380 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
2398 RecurKind RK = PhiR->getRecurrenceKind();
2405 RecWithFlags->dropPoisonGeneratingFlags();
2411struct VPCSEDenseMapInfo :
public DenseMapInfo<VPSingleDefRecipe *> {
2413 return Def == getEmptyKey();
2424 return GEP->getSourceElementType();
2427 .Case<VPVectorPointerRecipe, VPWidenGEPRecipe>(
2428 [](
auto *
I) {
return I->getSourceElementType(); })
2429 .
Default([](
auto *) {
return nullptr; });
2433 static bool canHandle(
const VPSingleDefRecipe *Def) {
2442 if (!
C || (!
C->first && (
C->second == Instruction::InsertValue ||
2443 C->second == Instruction::ExtractValue)))
2449 return !
Def->mayReadFromMemory();
2453 static unsigned getHashValue(
const VPSingleDefRecipe *Def) {
2456 getGEPSourceElementType(Def),
Def->getScalarType(),
2459 if (RFlags->hasPredicate())
2462 return hash_combine(Result, SIVSteps->getInductionOpcode());
2467 static bool isEqual(
const VPSingleDefRecipe *L,
const VPSingleDefRecipe *R) {
2470 if (
L->getVPRecipeID() !=
R->getVPRecipeID() ||
2472 getGEPSourceElementType(L) != getGEPSourceElementType(R) ||
2474 !
equal(
L->operands(),
R->operands()))
2477 "must have valid opcode info for both recipes");
2479 if (LFlags->hasPredicate() &&
2480 LFlags->getPredicate() !=
2484 if (LSIV->getInductionOpcode() !=
2490 const VPRegionBlock *RegionL =
L->getRegion();
2491 const VPRegionBlock *RegionR =
R->getRegion();
2494 L->getParent() !=
R->getParent())
2496 return L->getScalarType() ==
R->getScalarType();
2512 if (!Def || !VPCSEDenseMapInfo::canHandle(Def))
2516 if (!VPDT.
dominates(V->getParent(), VPBB))
2521 Def->replaceAllUsesWith(V);
2552 "Expected vector prehader's successor to be the vector loop region");
2560 return !Op->isDefinedOutsideLoopRegions();
2563 R.moveBefore(*Preheader, Preheader->
end());
2581 assert(!RepR->isPredicated() &&
2582 "Expected prior transformation of predicated replicates to "
2583 "replicate regions");
2588 if (!RepR->isSingleScalar())
2600 if (
any_of(Def->users(), [&SinkBB, &LoopRegion](
VPUser *U) {
2601 auto *UserR = cast<VPRecipeBase>(U);
2602 VPBasicBlock *Parent = UserR->getParent();
2604 if (SinkBB && SinkBB != Parent)
2609 return UserR->isPhi() || Parent->getEnclosingLoopRegion() ||
2610 Parent->getSinglePredecessor() != LoopRegion;
2620 "Defining block must dominate sink block");
2645 VPValue *ResultVPV = R.getVPSingleValue();
2647 unsigned NewResSizeInBits = MinBWs.
lookup(UI);
2648 if (!NewResSizeInBits)
2661 (void)OldResSizeInBits;
2669 VPW->dropPoisonGeneratingFlags();
2671 assert((OldResSizeInBits != NewResSizeInBits ||
2673 "Only ICmps should not need extending the result.");
2679 if (OldResSizeInBits != NewResSizeInBits) {
2681 Instruction::ZExt, ResultVPV, OldResTy);
2683 Ext->setOperand(0, ResultVPV);
2693 unsigned OpSizeInBits =
Op->getScalarType()->getScalarSizeInBits();
2694 if (OpSizeInBits == NewResSizeInBits)
2696 assert(OpSizeInBits > NewResSizeInBits &&
"nothing to truncate");
2697 auto [ProcessedIter, Inserted] = ProcessedTruncs.
try_emplace(
Op);
2703 Builder.setInsertPoint(&R);
2704 ProcessedIter->second =
2705 Builder.createWidenCast(Instruction::Trunc,
Op, NewResTy);
2707 Op = ProcessedIter->second;
2711 NWR->insertBefore(&R);
2715 VPValue *Replacement = NWR->getVPSingleValue();
2716 if (OldResSizeInBits != NewResSizeInBits)
2722 R.eraseFromParent();
2728 std::optional<VPDominatorTree> VPDT;
2745 assert(VPBB->getNumSuccessors() == 2 &&
2746 "Two successors expected for BranchOnCond");
2747 unsigned RemovedIdx;
2758 "There must be a single edge between VPBB and its successor");
2766 VPBB->back().eraseFromParent();
2778 if (Reachable.contains(
B))
2789 for (
VPValue *Def : R.definedValues())
2790 Def->replaceAllUsesWith(&Tmp);
2791 R.eraseFromParent();
2848 DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
2859 auto *EntryIncrement = Builder.createOverflowingOp(
2861 DL,
"index.part.next");
2867 {EntryIncrement, TC, ALMMultiplier},
DL,
2868 "active.lane.mask.entry");
2875 LaneMaskPhi->insertBefore(*HeaderVPBB, HeaderVPBB->begin());
2880 Builder.setInsertPoint(OriginalTerminator);
2881 auto *InLoopIncrement = Builder.createOverflowingOp(
2883 {CanonicalIVIncrement, &Plan.
getVF()}, {
false,
false},
DL);
2885 {InLoopIncrement, TC, ALMMultiplier},
DL,
2886 "active.lane.mask.next");
2887 LaneMaskPhi->addBackedgeValue(ALM);
2891 auto *NotMask = Builder.createNot(ALM,
DL);
2898 bool UseActiveLaneMaskForControlFlow) {
2900 auto *WideCanonicalIV =
2902 assert(WideCanonicalIV &&
2903 "Must have widened canonical IV when tail folding!");
2906 if (UseActiveLaneMaskForControlFlow) {
2915 nullptr,
"active.lane.mask");
2931 template <
typename OpTy>
bool match(OpTy *V)
const {
2942template <
typename Op0_t,
typename Op1_t>
2950 case Intrinsic::masked_udiv:
2951 return Intrinsic::vp_udiv;
2952 case Intrinsic::masked_sdiv:
2953 return Intrinsic::vp_sdiv;
2954 case Intrinsic::masked_urem:
2955 return Intrinsic::vp_urem;
2956 case Intrinsic::masked_srem:
2957 return Intrinsic::vp_srem;
2959 return std::nullopt;
2974 VPValue *Addr, *Mask, *EndPtr;
2977 auto AdjustEndPtr = [&CurRecipe, &EVL](
VPValue *EndPtr) {
2979 EVLEndPtr->insertBefore(&CurRecipe);
2984 EVLEndPtr->setOperand(1, EVLAsVF);
2988 auto GetVPReverse = [&CurRecipe, &EVL, Plan,
2993 Intrinsic::experimental_vp_reverse, {V, Plan->
getTrue(), &EVL},
2994 V->getScalarType(), {}, {},
DL);
2995 Reverse->insertBefore(&CurRecipe);
2999 if (
match(&CurRecipe,
3010 Mask = GetVPReverse(Mask);
3011 Addr = AdjustEndPtr(EndPtr);
3014 LoadR->insertBefore(&CurRecipe);
3016 {LoadR, Plan->
getTrue(), &EVL},
3017 LoadR->getScalarType(), {}, {},
DL);
3028 NewLoad->setOperand(2, Mask);
3029 NewLoad->setOperand(3, &EVL);
3037 StoredVal, EVL, Mask);
3039 if (
match(&CurRecipe,
3043 Mask = GetVPReverse(Mask);
3044 Addr = AdjustEndPtr(EndPtr);
3045 StoredVal = GetVPReverse(ReversedVal);
3047 StoredVal, EVL, Mask);
3051 if (Rdx->isConditional() &&
3056 if (Interleave->getMask() &&
3064 Intrinsic::vp_merge, {Mask ? Mask : Plan->
getTrue(),
LHS,
RHS, &EVL},
3065 LHS->getScalarType(), {}, {},
DL);
3078 if (
match(&CurRecipe,
3083 LHS->getScalarType(), {}, {},
DL);
3089 {IntrR->getOperand(0),
3090 IntrR->getOperand(1),
3091 Mask ? Mask : Plan->
getTrue(), &EVL},
3092 IntrR->getScalarType(), {}, {},
DL);
3101 VPValue *HeaderMask =
nullptr, *EVL =
nullptr;
3106 HeaderMask = R.getVPSingleValue();
3117 NewR->insertBefore(R);
3118 for (
auto [Old, New] :
3119 zip_equal(R->definedValues(), NewR->definedValues()))
3120 Old->replaceAllUsesWith(New);
3133 Mask->getScalarType(), {}, {}, LogicalAnd->getDebugLoc());
3134 Merge->insertBefore(LogicalAnd);
3135 LogicalAnd->replaceAllUsesWith(
Merge);
3142 R->eraseFromParent();
3163 auto IsAllowedUser =
3164 IsaPred<VPVectorEndPointerRecipe, VPScalarIVStepsRecipe,
3165 VPWidenIntOrFpInductionRecipe,
3166 VPWidenMemIntrinsicRecipe>;
3167 if (match(U, m_Trunc(m_Specific(&Plan.getVF()))))
3168 return all_of(cast<VPSingleDefRecipe>(U)->users(),
3170 return IsAllowedUser(U);
3172 "User of VF that we can't transform to EVL.");
3182 "Only users of VFxUF should be VPWidenPointerInductionRecipe and the "
3183 "increment of the canonical induction.");
3199 MaxEVL = Builder.createScalarZExtOrTrunc(
3203 Builder.setInsertPoint(Header, Header->getFirstNonPhi());
3204 VPValue *PrevEVL = Builder.createScalarPhi(
3218 Intrinsic::experimental_vp_splice,
3219 {V1, V2, Imm, Plan.
getTrue(), PrevEVL, &EVL},
3220 R.getVPSingleValue()->getScalarType(), {}, {}, R.getDebugLoc());
3222 R.getVPSingleValue()->replaceAllUsesWith(VPSplice);
3235 if (match(&R, m_ComputeReductionResult(m_Select(m_Specific(HeaderMask),
3236 m_VPValue(), m_VPValue()))))
3237 return R.getOperand(0)->getDefiningRecipe()->getRegion() ==
3238 Plan.getVectorLoopRegion();
3250 VPValue *EVLMask = Builder.createICmp(
3310 VPlan &Plan,
const std::optional<unsigned> &MaxSafeElements) {
3322 auto *CurrentIteration =
3324 CurrentIteration->insertBefore(*Header, Header->begin());
3325 VPBuilder Builder(Header, Header->getFirstNonPhi());
3328 VPPhi *AVLPhi = Builder.createScalarPhi(
3332 if (MaxSafeElements) {
3342 Builder.setInsertPoint(CanonicalIVIncrement);
3346 OpVPEVL = Builder.createScalarZExtOrTrunc(
3347 OpVPEVL, CanIVTy, I32Ty, CanonicalIVIncrement->getDebugLoc());
3349 auto *NextIter = Builder.createAdd(
3350 OpVPEVL, CurrentIteration, CanonicalIVIncrement->getDebugLoc(),
3351 "current.iteration.next", CanonicalIVIncrement->getNoWrapFlags());
3352 CurrentIteration->addBackedgeValue(NextIter);
3356 "avl.next", {
true,
false});
3364 CanonicalIV->replaceAllUsesWith(CurrentIteration);
3365 CanonicalIVIncrement->setOperand(0, CanonicalIV);
3379 assert(!CurrentIteration &&
3380 "Found multiple CurrentIteration. Only one expected");
3381 CurrentIteration = PhiR;
3385 if (!CurrentIteration)
3396 CurrentIteration->
getDebugLoc(),
"current.iteration.iv");
3405 CanIVInc->eraseFromParent();
3414 if (Header->empty())
3423 if (!
match(EVLPhi->getBackedgeValue(),
3436 [[maybe_unused]]
bool FoundAVLNext =
3439 assert(FoundAVLNext &&
"Didn't find AVL backedge?");
3447 [[maybe_unused]]
bool FoundIncrement =
match(
3454 "Expected BranchOnCond with ICmp comparing CanIV + VFxUF with vector "
3459 LatchBr->setOperand(
3470 "expected to run before loop regions are created");
3473 auto CanUseVersionedStride = [&VPDT, Preheader](
VPUser &U,
unsigned) {
3476 return VPDT.
dominates(Preheader, Parent);
3479 for (
const SCEV *Stride : StridesMap.
values()) {
3482 const APInt *StrideConst;
3505 RewriteMap[StrideV] = PSE.
getSCEV(StrideV);
3512 const SCEV *ScevExpr = ExpSCEV->getSCEV();
3515 if (NewSCEV != ScevExpr) {
3517 ExpSCEV->replaceAllUsesWith(NewExp);
3528 auto CollectPoisonGeneratingInstrsInBackwardSlice([&](
VPRecipeBase *Root) {
3533 while (!Worklist.
empty()) {
3536 if (!Visited.
insert(CurRec).second)
3558 RecWithFlags->isDisjoint()) {
3561 Builder.createAdd(
A,
B, RecWithFlags->getDebugLoc());
3562 New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
3563 RecWithFlags->replaceAllUsesWith(New);
3564 RecWithFlags->eraseFromParent();
3567 RecWithFlags->dropPoisonGeneratingFlags();
3572 assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
3573 "found instruction with poison generating flags not covered by "
3574 "VPRecipeWithIRFlags");
3579 if (
VPRecipeBase *OpDef = Operand->getDefiningRecipe())
3587 auto IsNotHeaderMask = [&Plan](
VPValue *Mask) {
3599 VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
3600 if (AddrDef && WidenRec->isConsecutive() &&
3601 IsNotHeaderMask(WidenRec->getMask()))
3602 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3604 VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
3605 if (AddrDef && IsNotHeaderMask(InterleaveRec->getMask()))
3606 CollectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
3616 const bool &EpilogueAllowed) {
3617 if (InterleaveGroups.empty())
3628 IRMemberToRecipe[&MemR->getIngredient()] = MemR;
3635 for (
const auto *IG : InterleaveGroups) {
3640 return !IRMemberToRecipe.contains(Member);
3644 auto *Start = IRMemberToRecipe.
lookup(IG->getMember(0));
3648 StoredValues.
push_back(StoreR->getStoredValue());
3649 for (
unsigned I = 1;
I < IG->getFactor(); ++
I) {
3655 StoredValues.
push_back(StoreR->getStoredValue());
3659 bool NeedsMaskForGaps =
3660 (IG->requiresScalarEpilogue() && !EpilogueAllowed) ||
3661 (!StoredValues.
empty() && !IG->isFull());
3664 auto *InsertPos = IRMemberToRecipe.
lookup(IRInsertPos);
3673 VPValue *Addr = Start->getAddr();
3682 assert(IG->getIndex(IRInsertPos) != 0 &&
3683 "index of insert position shouldn't be zero");
3687 IG->getIndex(IRInsertPos),
3691 Addr =
B.createNoWrapPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
3697 if (IG->isReverse()) {
3700 -(int64_t)IG->getFactor(), NW, InsertPosR->
getDebugLoc());
3701 ReversePtr->insertBefore(InsertPosR);
3705 IG, Addr, StoredValues, InsertPos->getMask(), NeedsMaskForGaps,
3707 VPIG->insertBefore(InsertPosR);
3710 for (
unsigned i = 0; i < IG->getFactor(); ++i)
3713 if (!Member->getType()->isVoidTy()) {
3771 AddOp = Instruction::Add;
3772 MulOp = Instruction::Mul;
3774 AddOp =
ID.getInductionOpcode();
3775 MulOp = Instruction::FMul;
3783 Step = Builder.createScalarCast(Instruction::Trunc, Step, Ty,
DL);
3784 Start = Builder.createScalarCast(Instruction::Trunc, Start, Ty,
DL);
3793 Init = Builder.createWidenCast(Instruction::UIToFP,
Init, StepTy);
3798 Init = Builder.createNaryOp(MulOp, {
Init, SplatStep}, Flags);
3799 Init = Builder.createNaryOp(AddOp, {SplatStart,
Init}, Flags,
3817 if (R->getParent()->getEnclosingLoopRegion())
3818 Builder.setInsertPoint(R->getParent(), std::next(R->getIterator()));
3823 VF = Builder.createScalarCast(Instruction::CastOps::UIToFP, VF, StepTy,
3826 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3828 Inc = Builder.createNaryOp(MulOp, {Step, VF}, Flags);
3835 auto *
Next = Builder.createNaryOp(AddOp, {Prev, Inc}, Flags,
3838 WidePHI->addIncoming(
Next);
3865 VPlan *Plan = R->getParent()->getPlan();
3866 VPValue *Start = R->getStartValue();
3867 VPValue *Step = R->getStepValue();
3868 VPValue *VF = R->getVFValue();
3870 assert(R->getInductionDescriptor().getKind() ==
3872 "Not a pointer induction according to InductionDescriptor!");
3873 assert(R->getScalarType()->isPointerTy() &&
"Unexpected type.");
3875 "Recipe should have been replaced");
3881 VPPhi *ScalarPtrPhi = Builder.createScalarPhi(Start,
DL,
"pointer.phi");
3885 Builder.setInsertPoint(R->getParent(), R->getParent()->getFirstNonPhi());
3888 Offset = Builder.createOverflowingOp(Instruction::Mul, {
Offset, Step});
3890 Builder.createWidePtrAdd(ScalarPtrPhi,
Offset,
DL,
"vector.gep");
3891 R->replaceAllUsesWith(PtrAdd);
3896 VF = Builder.createScalarZExtOrTrunc(VF, StepTy, VF->
getScalarType(),
DL);
3897 VPValue *Inc = Builder.createOverflowingOp(Instruction::Mul, {Step, VF});
3900 Builder.createPtrAdd(ScalarPtrPhi, Inc,
DL,
"ptr.ind");
3908 VPValue *Step = R->getStepValue();
3909 VPValue *Index = R->getIndex();
3913 ? Builder.createScalarSExtOrTrunc(
3915 : Builder.createScalarCast(Instruction::SIToFP, Index, StepTy,
3917 switch (R->getInductionKind()) {
3919 assert(Index->getScalarType() == Start->getScalarType() &&
3920 "Index type does not match StartValue type");
3921 return R->replaceAllUsesWith(Builder.createAdd(
3922 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3925 return R->replaceAllUsesWith(Builder.createPtrAdd(
3926 Start, Builder.createOverflowingOp(Instruction::Mul, {Index, Step})));
3931 (FPBinOp->
getOpcode() == Instruction::FAdd ||
3932 FPBinOp->
getOpcode() == Instruction::FSub) &&
3933 "Original BinOp should be defined for FP induction");
3935 VPValue *
FMul = Builder.createNaryOp(Instruction::FMul, {Step, Index}, FMF);
3936 return R->replaceAllUsesWith(
3937 Builder.createNaryOp(FPBinOp->
getOpcode(), {Start, FMul}, FMF));
3950 if (!R->isReplicator())
3954 R->dissolveToCFGLoop();
3975 assert(Br->getNumOperands() == 2 &&
3976 "BranchOnTwoConds must have exactly 2 conditions");
3980 assert(Successors.size() == 3 &&
3981 "BranchOnTwoConds must have exactly 3 successors");
3986 VPValue *Cond0 = Br->getOperand(0);
3987 VPValue *Cond1 = Br->getOperand(1);
3994 if (Succ0 == Succ1) {
3996 VPValue *Combined = Builder.createOr(Cond0, Cond1,
DL);
4000 Br->eraseFromParent();
4005 !BrOnTwoCondsBB->
getParent() &&
"regions must already be dissolved");
4018 Br->eraseFromParent();
4029 WidenIVR->eraseFromParent();
4039 WidenIVR->replaceAllUsesWith(PtrAdd);
4040 WidenIVR->eraseFromParent();
4044 WidenIVR->eraseFromParent();
4050 DerivedIVR->eraseFromParent();
4055 VPValue *CanIV = WideCanIV->getCanonicalIV();
4057 VPValue *Step = WideCanIV->getStepValue();
4060 "Expected unroller to have materialized step for UF != 1");
4065 Step = Builder.createAdd(
4068 Builder.createAdd(CanIV, Step, WideCanIV->getDebugLoc(),
"vec.iv",
4069 WideCanIV->getNoWrapFlags());
4071 WideCanIV->eraseFromParent();
4078 for (
unsigned I = 1;
I != Blend->getNumIncomingValues(); ++
I)
4079 Select = Builder.createSelect(Blend->getMask(
I),
4080 Blend->getIncomingValue(
I),
Select,
4081 R.getDebugLoc(),
"predphi", *Blend);
4082 Blend->replaceAllUsesWith(
Select);
4083 Blend->eraseFromParent();
4088 if (!VEPR->getOffset()) {
4090 "Expected unroller to have materialized offset for UF != 1");
4091 VEPR->materializeOffset();
4098 Expr->eraseFromParent();
4108 for (
VPValue *
Op : LastActiveL->operands()) {
4109 VPValue *NotMask = Builder.createNot(
Op, LastActiveL->getDebugLoc());
4114 VPValue *FirstInactiveLane = Builder.createFirstActiveLane(
4115 NotMasks, LastActiveL->getDebugLoc(),
"first.inactive.lane");
4121 Builder.createSub(FirstInactiveLane, One,
4122 LastActiveL->getDebugLoc(),
"last.active.lane");
4125 LastActiveL->eraseFromParent();
4132 assert(VPI->isMasked() &&
4133 "Unmasked MaskedCond should be simplified earlier");
4134 VPI->replaceAllUsesWith(Builder.createNaryOp(
4136 VPI->eraseFromParent();
4146 Instruction::Add, VPI->operands(), VPI->getNoWrapFlags(),
4147 VPI->getDebugLoc());
4148 VPI->replaceAllUsesWith(
Add);
4149 VPI->eraseFromParent();
4157 DebugLoc DL = BranchOnCountInst->getDebugLoc();
4160 BranchOnCountInst->eraseFromParent();
4175 ? Instruction::UIToFP
4176 : Instruction::Trunc;
4177 VectorStep = Builder.createWidenCast(CastOp, VectorStep, IVTy);
4183 Builder.createWidenCast(Instruction::Trunc, ScalarStep, IVTy);
4189 MulOpc = Instruction::FMul;
4190 Flags = VPI->getFastMathFlags();
4192 MulOpc = Instruction::Mul;
4197 MulOpc, {VectorStep, ScalarStep}, Flags, R.getDebugLoc());
4199 VPI->replaceAllUsesWith(VectorStep);
4200 VPI->eraseFromParent();
4222 for (
auto &Exit : Exits) {
4223 if (Exit.EarlyExitingVPBB == LatchVPBB)
4227 cast<VPIRPhi>(&R)->removeIncomingValueFor(Exit.EarlyExitingVPBB);
4228 Exit.EarlyExitingVPBB->getTerminator()->eraseFromParent();
4238 std::optional<VPValue *>
Cond =
4245 for (
auto *Recipe : ConditionRecipes) {
4248 assert(CondLoad ==
nullptr &&
"Too many condition loads");
4252 assert(CondLoad &&
"Couldn't find load");
4263 VPValue *Ptr = Load->getOperand(0);
4267 DL.getTypeStoreSize(Load->getScalarType()).getFixedValue());
4278 for (
auto *
GEP : GEPs) {
4295 auto InsertIt = HeaderVPBB->
end();
4297 bool CondMoveNeeded = CondR->
getParent() != HeaderVPBB;
4302 if (R.mayReadOrWriteMemory()) {
4304 CondMoveNeeded =
true;
4305 InsertIt = R.getIterator();
4315 for (
auto *Recipe :
reverse(ConditionRecipes))
4316 Recipe->moveBefore(*HeaderVPBB, InsertIt);
4320 VPBuilder MaskBuilder(HeaderVPBB, InsertIt);
4323 Type *IVScalarTy =
IV->getScalarType();
4330 {Zero, FirstActive, ALMMultiplier},
4331 DebugLoc(),
"uncountable.exit.mask");
4336 if (R.mayReadOrWriteMemory() && &R != CondLoad) {
4338 if (!VPDT.
dominates(R.getParent(), LatchVPBB))
4347 VPBuilder MiddleBuilder(MiddleVPBB, MiddleVPBB->
end());
4357 auto Phis = ScalarPH->
phis();
4376 if (Pred == MiddleVPBB)
4381 VPValue *CondOfEarlyExitingVPBB;
4382 [[maybe_unused]]
bool Matched =
4383 match(EarlyExitingVPBB->getTerminator(),
4385 assert(Matched &&
"Terminator must be BranchOnCond");
4389 VPBuilder EarlyExitingBuilder(EarlyExitingVPBB->getTerminator());
4390 auto *CondToEarlyExit = EarlyExitingBuilder.
createNaryOp(
4392 TrueSucc == ExitBlock
4393 ? CondOfEarlyExitingVPBB
4394 : EarlyExitingBuilder.
createNot(CondOfEarlyExitingVPBB));
4400 "exit condition must dominate the latch");
4409 assert(!Exits.
empty() &&
"must have at least one early exit");
4416 for (
const auto &[Num, VPB] :
enumerate(RPOT))
4419 return RPOIdx[
A.EarlyExitingVPBB] < RPOIdx[
B.EarlyExitingVPBB];
4425 for (
unsigned I = 0;
I + 1 < Exits.
size(); ++
I)
4426 for (
unsigned J =
I + 1; J < Exits.
size(); ++J)
4428 Exits[
I].EarlyExitingVPBB) &&
4429 "RPO sort must place dominating exits before dominated ones");
4435 VPValue *Combined = Exits[0].CondToExit;
4448 "Unexpected terminator");
4449 VPValue *IsLatchExitTaken = LatchExitingBranch->getOperand(0);
4450 DebugLoc LatchDL = LatchExitingBranch->getDebugLoc();
4451 LatchExitingBranch->eraseFromParent();
4454 {IsAnyExitTaken, IsLatchExitTaken}, LatchDL);
4460 LatchVPBB->
setSuccessors({MiddleVPBB, MiddleVPBB, HeaderVPBB});
4464 LatchVPBB, MiddleVPBB, TheLoop,
4470 for (
unsigned Idx = 0; Idx != Exits.
size(); ++Idx) {
4474 VectorEarlyExitVPBBs[Idx] = VectorEarlyExitVPBB;
4482 Exits.
size() == 1 ? VectorEarlyExitVPBBs[0]
4485 LatchVPBB->
setSuccessors({DispatchVPBB, MiddleVPBB, HeaderVPBB});
4517 for (
auto [Exit, VectorEarlyExitVPBB] :
4518 zip_equal(Exits, VectorEarlyExitVPBBs)) {
4519 auto &[EarlyExitingVPBB, EarlyExitVPBB,
_] = Exit;
4531 ExitIRI->getIncomingValueForBlock(EarlyExitingVPBB);
4532 VPValue *NewIncoming = IncomingVal;
4534 VPBuilder EarlyExitBuilder(VectorEarlyExitVPBB);
4539 ExitIRI->removeIncomingValueFor(EarlyExitingVPBB);
4540 ExitIRI->addIncoming(NewIncoming);
4543 EarlyExitingVPBB->getTerminator()->eraseFromParent();
4577 bool IsLastDispatch = (
I + 2 == Exits.
size());
4579 IsLastDispatch ? VectorEarlyExitVPBBs.
back()
4585 VectorEarlyExitVPBBs[
I]->setPredecessors({CurrentBB});
4588 CurrentBB = FalseBB;
4603 VPValue *VecOp = Red->getVecOp();
4605 assert(!Red->isPartialReduction() &&
4606 "This path does not support partial reductions");
4609 auto IsExtendedRedValidAndClampRange =
4622 "getExtendedReductionCost only supports integer types");
4623 ExtRedCost = Ctx.TTI.getExtendedReductionCost(
4624 Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
4625 Red->getFastMathFlags(),
CostKind);
4626 return ExtRedCost.
isValid() && ExtRedCost < ExtCost + RedCost;
4634 IsExtendedRedValidAndClampRange(
4655 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
4656 Opcode != Instruction::FAdd)
4659 assert(!Red->isPartialReduction() &&
4660 "This path does not support partial reductions");
4664 auto IsMulAccValidAndClampRange =
4676 (Ext0->getOpcode() != Ext1->getOpcode() ||
4677 Ext0->getOpcode() == Instruction::CastOps::FPExt))
4681 !Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
4683 MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
4690 ExtCost += Ext0->computeCost(VF, Ctx);
4692 ExtCost += Ext1->computeCost(VF, Ctx);
4694 ExtCost += OuterExt->computeCost(VF, Ctx);
4696 return MulAccCost.
isValid() &&
4697 MulAccCost < ExtCost + MulCost + RedCost;
4702 VPValue *VecOp = Red->getVecOp();
4740 Builder.createWidenCast(Instruction::CastOps::Trunc, ValB, NarrowTy);
4742 ValB = ExtB = Builder.createWidenCast(ExtOpc, Trunc, WideTy);
4743 Mul->setOperand(1, ExtB);
4753 ExtendAndReplaceConstantOp(RecipeA, RecipeB,
B,
Mul);
4758 IsMulAccValidAndClampRange(
Mul, RecipeA, RecipeB,
nullptr)) {
4765 if (!
Sub && IsMulAccValidAndClampRange(
Mul,
nullptr,
nullptr,
nullptr))
4782 ExtendAndReplaceConstantOp(Ext0, Ext1,
B,
Mul);
4791 (Ext->getOpcode() == Ext0->getOpcode() || Ext0 == Ext1) &&
4792 Ext0->getOpcode() == Ext1->getOpcode() &&
4793 IsMulAccValidAndClampRange(
Mul, Ext0, Ext1, Ext) &&
Mul->hasOneUse()) {
4795 Ext0->getOpcode(), Ext0->getOperand(0), Ext->getScalarType(),
nullptr,
4796 *Ext0, *Ext0, Ext0->getDebugLoc());
4797 NewExt0->insertBefore(Ext0);
4802 Ext->getScalarType(),
nullptr, *Ext1,
4803 *Ext1, Ext1->getDebugLoc());
4806 auto *NewMul =
Mul->cloneWithOperands({NewExt0, NewExt1});
4807 NewMul->insertBefore(
Mul);
4808 Ext->replaceAllUsesWith(NewMul);
4809 Ext->eraseFromParent();
4810 Mul->eraseFromParent();
4824 assert(!Red->isPartialReduction() &&
4825 "This path does not support partial reductions");
4828 auto IP = std::next(Red->getIterator());
4829 auto *VPBB = Red->getParent();
4839 Red->replaceAllUsesWith(AbstractR);
4869 for (
VPValue *VPV : VPValues) {
4878 if (
User->usesScalars(VPV))
4881 HoistPoint = HoistBlock->
begin();
4885 "All users must be in the vector preheader or dominated by it");
4890 VPV->replaceUsesWithIf(Broadcast,
4891 [VPV, Broadcast](
VPUser &U,
unsigned Idx) {
4892 return Broadcast != &U && !U.usesScalars(VPV);
4903 return CommonMetadata;
4906template <
unsigned Opcode>
4911 static_assert(Opcode == Instruction::Load || Opcode == Instruction::Store,
4912 "Only Load and Store opcodes supported");
4913 [[maybe_unused]]
constexpr bool IsLoad = (Opcode == Instruction::Load);
4920 for (
auto Recipes :
Groups) {
4921 if (Recipes.size() < 2)
4926 "Expected all recipes in group to have the same load-store type");
4933 VPValue *MaskI = RecipeI->getMask();
4939 bool HasComplementaryMask =
false;
4944 VPValue *MaskJ = RecipeJ->getMask();
4953 if (HasComplementaryMask) {
4954 assert(Group.
size() >= 2 &&
"must have at least 2 entries");
4964template <
typename InstType>
4982 for (
auto &Group :
Groups) {
5002 return R->isSingleScalar() == IsSingleScalar;
5004 "all members in group must agree on IsSingleScalar");
5009 LoadWithMinAlign->getUnderlyingInstr(), {EarliestLoad->getOperand(0)},
5010 IsSingleScalar,
nullptr, *EarliestLoad, CommonMetadata);
5012 UnpredicatedLoad->insertBefore(EarliestLoad);
5016 Load->replaceAllUsesWith(UnpredicatedLoad);
5017 Load->eraseFromParent();
5026 if (!StoreLoc || !StoreLoc->AATags.Scope)
5032 StoresToSink.
end());
5036 SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], PSE, L);
5048 for (
auto &Group :
Groups) {
5061 VPValue *SelectedValue = Group[0]->getOperand(0);
5064 bool IsSingleScalar = Group[0]->isSingleScalar();
5065 for (
unsigned I = 1;
I < Group.size(); ++
I) {
5066 assert(IsSingleScalar == Group[
I]->isSingleScalar() &&
5067 "all members in group must agree on IsSingleScalar");
5068 VPValue *Mask = Group[
I]->getMask();
5070 SelectedValue = Builder.createSelect(Mask,
Value, SelectedValue,
5079 StoreWithMinAlign->getUnderlyingInstr(),
5080 {SelectedValue, LastStore->getOperand(1)}, IsSingleScalar,
5081 nullptr, *LastStore, CommonMetadata);
5082 UnpredicatedStore->insertBefore(*InsertBB, LastStore->
getIterator());
5086 Store->eraseFromParent();
5093 assert(Plan.
hasVF(BestVF) &&
"BestVF is not available in Plan");
5094 assert(Plan.
hasUF(BestUF) &&
"BestUF is not available in Plan");
5157 auto UsesVectorOrInsideReplicateRegion = [DefR, LoopRegion](
VPUser *U) {
5159 return !U->usesScalars(DefR) || ParentRegion != LoopRegion;
5166 none_of(DefR->users(), UsesVectorOrInsideReplicateRegion))
5176 DefR->replaceUsesWithIf(
5177 BuildVector, [BuildVector, &UsesVectorOrInsideReplicateRegion](
5179 return &U != BuildVector && UsesVectorOrInsideReplicateRegion(&U);
5193 for (
VPValue *Def : R.definedValues()) {
5206 auto IsCandidateUnpackUser = [Def](
VPUser *U) {
5208 return U->usesScalars(Def) &&
5211 if (
none_of(Def->users(), IsCandidateUnpackUser))
5218 Unpack->insertAfter(&R);
5219 Def->replaceUsesWithIf(Unpack,
5220 [&IsCandidateUnpackUser](
VPUser &U,
unsigned) {
5221 return IsCandidateUnpackUser(&U);
5230 bool RequiresScalarEpilogue,
VPValue *Step,
5231 std::optional<uint64_t> MaxRuntimeStep) {
5243 "Step VPBB must dominate VectorPHVPBB");
5245 InsertPt = std::next(StepR->getIterator());
5247 VPBuilder Builder(VectorPHVPBB, InsertPt);
5253 if (!RequiresScalarEpilogue &&
match(TC,
m_APInt(TCVal)) && MaxRuntimeStep &&
5265 if (TailByMasking) {
5266 TC = Builder.createAdd(
5277 Builder.createNaryOp(Instruction::URem, {TC, Step},
5286 if (RequiresScalarEpilogue) {
5288 "requiring scalar epilogue is not supported with fail folding");
5291 R = Builder.createSelect(IsZero, Step, R);
5305 "VF and VFxUF must be materialized together");
5317 Builder.createElementCount(TCTy, VFEC * Plan.
getConcreteUF());
5324 VPValue *RuntimeVF = Builder.createElementCount(TCTy, VFEC);
5328 BC, [&VF](
VPUser &U,
unsigned) {
return !U.usesScalars(&VF); });
5332 VPValue *MulByUF = Builder.createOverflowingOp(
5345 auto *AliasMask = Builder.createNaryOp(
5349 if (HeaderMaskDef->isPhi())
5350 Builder =
VPBuilder(&*HeaderMaskDef->getParent()->getFirstNonPhi());
5355 auto *ClampedHeaderMask = Builder.createAnd(HeaderMask, AliasMask);
5357 return &U != ClampedHeaderMask;
5368 assert(IncomingAliasMask &&
"Expected an alias mask!");
5378 if (
Check.NeedsFreeze) {
5388 Intrinsic::loop_dependence_war_mask,
5392 AliasMask = Builder.createAnd(AliasMask, WARMask);
5394 AliasMask = WARMask;
5399 VPValue *NumActive = Builder.createNaryOp(
5402 VPValue *ClampedVF = Builder.createScalarZExtOrTrunc(
5428 VPValue *DistanceToMax = Builder.createSub(MaxUIntTripCount, TripCount);
5436 VPValue *TripCountCheck = Builder.createICmp(
5439 VPValue *
Cond = Builder.createOr(IsScalar, TripCountCheck,
DL);
5450 "Clamped VF not supported with interleaving");
5458 VPBuilder Builder(Entry, Entry->begin());
5466 if (!ExpSCEV || ExpSCEV->getNumUsers() == 0)
5468 Builder.setInsertPoint(ExpSCEV);
5475 ExpSCEV->eraseFromParent();
5484 BasicBlock *EntryBB = Entry->getIRBasicBlock();
5491 const SCEV *Expr = ExpSCEV->getSCEV();
5494 ExpandedSCEVs[Expr] = Res;
5499 ExpSCEV->eraseFromParent();
5502 "all VPExpandSCEVRecipes must have been expanded");
5505 auto EI = Entry->begin();
5515 return ExpandedSCEVs;
5527 VPValue *OpV,
unsigned Idx,
bool IsScalable) {
5531 return Member0Op == OpV;
5535 return !IsScalable && !W->getMask() && W->isConsecutive() &&
5538 return IR->getInterleaveGroup()->isFull() &&
IR->getVPValue(Idx) == OpV;
5555 for (
unsigned Idx = 0; Idx != WideMember0->getNumOperands(); ++Idx) {
5558 OpsI.
push_back(
Op->getDefiningRecipe()->getOperand(Idx));
5563 if (
any_of(
enumerate(OpsI), [WideMember0, Idx, IsScalable](
const auto &
P) {
5564 const auto &[
OpIdx, OpV] =
P;
5576static std::optional<ElementCount>
5580 if (!InterleaveR || InterleaveR->
getMask())
5581 return std::nullopt;
5583 Type *GroupElementTy =
nullptr;
5587 return Op->getScalarType() == GroupElementTy;
5589 return std::nullopt;
5593 return Op->getScalarType() == GroupElementTy;
5595 return std::nullopt;
5599 if (IG->getFactor() != IG->getNumMembers())
5600 return std::nullopt;
5606 assert(
Size.isScalable() == VF.isScalable() &&
5607 "if Size is scalable, VF must be scalable and vice versa");
5608 return Size.getKnownMinValue();
5612 unsigned MinVal = VF.getKnownMinValue();
5614 if (IG->getFactor() == MinVal && GroupSize == GetVectorBitWidthForVF(VF))
5617 return std::nullopt;
5625 return RepR && RepR->isSingleScalar();
5635 auto *R = V->getDefiningRecipe();
5646 for (
unsigned Idx = 0,
E = WideMember0->getNumOperands(); Idx !=
E; ++Idx) {
5648 for (
VPValue *Member : Members)
5649 OpsI.
push_back(Member->getDefiningRecipe()->getOperand(Idx));
5658 auto *LI =
cast<LoadInst>(LoadGroup->getInterleaveGroup()->getInsertPos());
5660 LoadGroup->getMask(),
true,
5661 *LoadGroup, LoadGroup->getDebugLoc());
5662 L->insertBefore(LoadGroup);
5668 assert(RepR->isSingleScalar() && RepR->getOpcode() == Instruction::Load &&
5669 "must be a single scalar load");
5670 NarrowedOps.
insert(RepR);
5675 VPValue *PtrOp = WideLoad->getAddr();
5677 PtrOp = VecPtr->getOperand(0);
5682 nullptr, {}, *WideLoad);
5683 N->insertBefore(WideLoad);
5688std::unique_ptr<VPlan>
5708 "unexpected branch-on-count");
5711 std::optional<ElementCount> VFToOptimize;
5725 if (R.mayWriteToMemory() && !InterleaveR)
5731 return any_of(V->users(), [&](VPUser *U) {
5732 auto *UR = cast<VPRecipeBase>(U);
5733 return UR->getParent()->getParent() != VectorLoop;
5750 std::optional<ElementCount> NarrowedVF =
5752 if (!NarrowedVF || (VFToOptimize && NarrowedVF != VFToOptimize))
5754 VFToOptimize = NarrowedVF;
5757 if (InterleaveR->getStoredValues().empty())
5762 auto *Member0 = InterleaveR->getStoredValues()[0];
5772 VPRecipeBase *DefR = Op.value()->getDefiningRecipe();
5775 auto *IR = dyn_cast<VPInterleaveRecipe>(DefR);
5776 return IR && IR->getInterleaveGroup()->isFull() &&
5777 IR->getVPValue(Op.index()) == Op.value();
5786 VFToOptimize->isScalable()))
5791 if (StoreGroups.empty())
5795 bool RequiresScalarEpilogue =
5806 std::unique_ptr<VPlan> NewPlan;
5808 NewPlan = std::unique_ptr<VPlan>(Plan.
duplicate());
5809 Plan.
setVF(*VFToOptimize);
5810 NewPlan->removeVF(*VFToOptimize);
5816 for (
auto *StoreGroup : StoreGroups) {
5823 StoreGroup->getDebugLoc());
5824 S->insertBefore(StoreGroup);
5825 StoreGroup->eraseFromParent();
5831 Type *CanIVTy = VectorLoop->getCanonicalIVType();
5837 if (VFToOptimize->isScalable()) {
5840 Step = PHBuilder.createOverflowingOp(Instruction::Mul, {VScale,
UF},
5848 materializeVectorTripCount(Plan, VectorPH,
false,
5849 RequiresScalarEpilogue, Step);
5854 removeDeadRecipes(Plan);
5857 "All VPVectorPointerRecipes should have been removed");
5873 "must have a BranchOnCond");
5876 if (VF.
isScalable() && VScaleForTuning.has_value())
5877 VectorStep *= *VScaleForTuning;
5878 assert(VectorStep > 0 &&
"trip count should not be zero");
5882 MiddleTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
5901 "Cannot handle loops with uncountable early exits");
5908 assert(RecurSplice &&
"expected FirstOrderRecurrenceSplice");
5915 if (
any_of(RecurSplice->users(),
5916 [](
VPUser *U) { return !cast<VPRecipeBase>(U)->getRegion(); }) &&
5997 {},
"vector.recur.extract.for.phi");
6000 ExitPhi->replaceUsesOfWith(ExtractR, PenultimateElement);
6014 VPValue *WidenIVCandidate = BinOp->getOperand(0);
6015 VPValue *InvariantCandidate = BinOp->getOperand(1);
6017 std::swap(WidenIVCandidate, InvariantCandidate);
6031 auto *ClonedOp = BinOp->
clone();
6032 if (ClonedOp->getOperand(0) == WidenIV) {
6033 ClonedOp->setOperand(0, ScalarIV);
6035 assert(ClonedOp->getOperand(1) == WidenIV &&
"one operand must be WideIV");
6036 ClonedOp->setOperand(1, ScalarIV);
6051 auto CheckSentinel = [&SE](
const SCEV *IVSCEV,
6052 bool UseMax) -> std::optional<APSInt> {
6054 for (
bool Signed : {
true,
false}) {
6063 return std::nullopt;
6071 PhiR->getRecurrenceKind()))
6080 VPValue *BackedgeVal = PhiR->getBackedgeValue();
6094 !
match(FindLastSelect,
6103 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression, PSE,
6109 "IVOfExpressionToSink not being an AddRec must imply "
6110 "FindLastExpression not being an AddRec.");
6121 std::optional<APSInt> SentinelVal = CheckSentinel(IVSCEV, UseMax);
6122 bool UseSigned = SentinelVal && SentinelVal->isSigned();
6129 if (IVOfExpressionToSink) {
6130 const SCEV *FindLastExpressionSCEV =
6132 if (
match(FindLastExpressionSCEV,
6135 if (
auto NewSentinel =
6136 CheckSentinel(FindLastExpressionSCEV, NewUseMax)) {
6139 SentinelVal = *NewSentinel;
6140 UseSigned = NewSentinel->isSigned();
6142 IVSCEV = FindLastExpressionSCEV;
6143 IVOfExpressionToSink =
nullptr;
6153 if (AR->hasNoSignedWrap())
6155 else if (AR->hasNoUnsignedWrap())
6165 VPValue *NewFindLastSelect = BackedgeVal;
6167 if (!SentinelVal || IVOfExpressionToSink) {
6170 DebugLoc DL = FindLastSelect->getDefiningRecipe()->getDebugLoc();
6171 VPBuilder LoopBuilder(FindLastSelect->getDefiningRecipe());
6172 if (FindLastSelect->getDefiningRecipe()->getOperand(1) == PhiR)
6173 SelectCond = LoopBuilder.
createNot(SelectCond);
6180 if (SelectCond !=
Cond || IVOfExpressionToSink) {
6183 IVOfExpressionToSink ? IVOfExpressionToSink : FindLastExpression,
6192 VPIRFlags Flags(MinMaxKind,
false,
false,
6198 NewFindLastSelect, Flags, ExitDL);
6201 VPValue *VectorRegionExitingVal = ReducedIV;
6202 if (IVOfExpressionToSink)
6203 VectorRegionExitingVal =
6205 ReducedIV, IVOfExpressionToSink);
6208 VPValue *StartVPV = PhiR->getStartValue();
6215 NewRdxResult = MiddleBuilder.
createSelect(Cmp, VectorRegionExitingVal,
6225 AnyOfPhi->insertAfter(PhiR);
6232 OrVal, VectorRegionExitingVal, StartVPV, ExitDL);
6245 PhiR->hasUsesOutsideReductionChain());
6246 NewPhiR->insertBefore(PhiR);
6247 PhiR->replaceAllUsesWith(NewPhiR);
6248 PhiR->eraseFromParent();
6255struct ReductionExtend {
6256 Type *SrcType =
nullptr;
6257 ExtendKind Kind = ExtendKind::PR_None;
6263struct ExtendedReductionOperand {
6267 ReductionExtend ExtendA, ExtendB;
6275struct VPPartialReductionChain {
6278 VPWidenRecipe *ReductionBinOp =
nullptr;
6280 ExtendedReductionOperand ExtendedOp;
6287 unsigned AccumulatorOpIdx;
6288 unsigned ScaleFactor;
6300 if (!
Op->hasOneUse() ||
6306 auto *Trunc = Builder.createWidenCast(Instruction::CastOps::Trunc,
6307 Op->getOperand(1), NarrowTy);
6309 Op->setOperand(1, Builder.createWidenCast(ExtOpc, Trunc, WideTy));
6318 auto *
Sub =
Op->getOperand(0)->getDefiningRecipe();
6320 assert(Ext->getOpcode() ==
6322 "Expected both the LHS and RHS extends to be the same");
6323 bool IsSigned = Ext->getOpcode() == Instruction::SExt;
6326 auto *FreezeX = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
X}));
6327 auto *FreezeY = Builder.insert(
new VPWidenRecipe(Instruction::Freeze, {
Y}));
6328 auto *
Max = Builder.insert(
6330 {FreezeX, FreezeY}, SrcTy));
6331 auto *Min = Builder.insert(
6333 {FreezeX, FreezeY}, SrcTy));
6336 return Builder.createWidenCast(Instruction::CastOps::ZExt, AbsDiff,
6337 Op->getScalarType());
6349 if (!
Mul->hasOneUse() ||
6350 (Ext->getOpcode() != MulLHS->getOpcode() && MulLHS != MulRHS) ||
6351 MulLHS->getOpcode() != MulRHS->getOpcode())
6354 auto *NewLHS = Builder.createWidenCast(
6355 MulLHS->getOpcode(), MulLHS->getOperand(0), Ext->getScalarType());
6356 auto *NewRHS = MulLHS == MulRHS
6358 : Builder.createWidenCast(MulRHS->getOpcode(),
6359 MulRHS->getOperand(0),
6360 Ext->getScalarType());
6361 auto *NewMul =
Mul->cloneWithOperands({NewLHS, NewRHS});
6362 Builder.insert(NewMul);
6363 Op->replaceAllUsesWith(NewMul);
6364 Op->eraseFromParent();
6365 Mul->eraseFromParent();
6374 VPValue *VecOp = Red->getVecOp();
6428static void transformToPartialReduction(
const VPPartialReductionChain &Chain,
6436 WidenRecipe->
getOperand(1 - Chain.AccumulatorOpIdx));
6439 ExtendedOp = optimizeExtendsForPartialReduction(ExtendedOp);
6455 if ((WidenRecipe->
getOpcode() == Instruction::Sub &&
6457 (WidenRecipe->
getOpcode() == Instruction::FSub &&
6462 if (WidenRecipe->
getOpcode() == Instruction::FSub) {
6472 Builder.insert(NegRecipe);
6473 ExtendedOp = NegRecipe;
6484 assert((!ExitValue || IsLastInChain) &&
6485 "if we found ExitValue, it must match RdxPhi's backedge value");
6496 PartialRed->insertBefore(WidenRecipe);
6504 E->insertBefore(WidenRecipe);
6505 PartialRed->replaceAllUsesWith(
E);
6518 auto *NewScaleFactor = Plan.
getConstantInt(32, Chain.ScaleFactor);
6519 StartInst->setOperand(2, NewScaleFactor);
6527 VPValue *OldStartValue = StartInst->getOperand(0);
6528 StartInst->setOperand(0, StartInst->getOperand(1));
6532 assert(RdxResult &&
"Could not find reduction result");
6535 unsigned SubOpc = Chain.RK ==
RecurKind::FSub ? Instruction::BinaryOps::FSub
6536 : Instruction::BinaryOps::Sub;
6542 [&NewResult](
VPUser &U,
unsigned Idx) {
return &
U != NewResult; });
6548 const VPPartialReductionChain &Link,
6551 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6552 std::optional<unsigned> BinOpc = std::nullopt;
6554 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6555 BinOpc = ExtendedOp.ExtendsUser->
getOpcode();
6557 std::optional<llvm::FastMathFlags>
Flags;
6561 auto GetLinkOpcode = [&Link]() ->
unsigned {
6564 return Instruction::Add;
6566 return Instruction::FAdd;
6568 return Link.ReductionBinOp->
getOpcode();
6573 GetLinkOpcode(), ExtendedOp.ExtendA.SrcType, ExtendedOp.ExtendB.SrcType,
6574 RdxType, VF, ExtendedOp.ExtendA.Kind, ExtendedOp.ExtendB.Kind, BinOpc,
6595static std::optional<ExtendedReductionOperand>
6598 "Op should be operand of UpdateR");
6606 if (
Op->hasOneUse() &&
6615 Type *RHSInputType =
Y->getScalarType();
6616 if (LHSInputType != RHSInputType ||
6617 LHSExt->getOpcode() != RHSExt->getOpcode())
6618 return std::nullopt;
6621 return ExtendedReductionOperand{
6623 {LHSInputType, getPartialReductionExtendKind(LHSExt)},
6627 std::optional<TTI::PartialReductionExtendKind> OuterExtKind;
6630 VPValue *CastSource = CastRecipe->getOperand(0);
6631 OuterExtKind = getPartialReductionExtendKind(CastRecipe);
6641 return ExtendedReductionOperand{
6648 if (!
Op->hasOneUse())
6649 return std::nullopt;
6654 return std::nullopt;
6664 return std::nullopt;
6668 ExtendKind LHSExtendKind = getPartialReductionExtendKind(LHSCast);
6671 const APInt *RHSConst =
nullptr;
6677 return std::nullopt;
6681 if (Cast && OuterExtKind &&
6682 getPartialReductionExtendKind(Cast) != OuterExtKind)
6683 return std::nullopt;
6685 Type *RHSInputType = LHSInputType;
6686 ExtendKind RHSExtendKind = LHSExtendKind;
6689 RHSExtendKind = getPartialReductionExtendKind(RHSCast);
6692 return ExtendedReductionOperand{
6693 MulOp, {LHSInputType, LHSExtendKind}, {RHSInputType, RHSExtendKind}};
6700static std::optional<SmallVector<VPPartialReductionChain>>
6708 return std::nullopt;
6718 VPValue *CurrentValue = ExitValue;
6719 while (CurrentValue != RedPhiR) {
6722 return std::nullopt;
6729 std::optional<ExtendedReductionOperand> ExtendedOp =
6730 matchExtendedReductionOperand(UpdateR,
Op);
6732 ExtendedOp = matchExtendedReductionOperand(UpdateR, PrevValue);
6734 return std::nullopt;
6738 Type *ExtSrcType = ExtendedOp->ExtendA.SrcType;
6741 return std::nullopt;
6746 VPPartialReductionChain Link(
6747 {UpdateR, *ExtendedOp, RK,
6751 CurrentValue = PrevValue;
6756 std::reverse(Chain.
begin(), Chain.
end());
6775 if (
auto Chains = getScaledReductions(RedPhiR, CostCtx,
Range))
6776 ChainsByPhi.
try_emplace(RedPhiR, std::move(*Chains));
6779 if (ChainsByPhi.
empty())
6786 for (
const auto &[
_, Chains] : ChainsByPhi)
6787 for (
const VPPartialReductionChain &Chain : Chains) {
6788 PartialReductionOps.
insert(Chain.ExtendedOp.ExtendsUser);
6789 ScaledReductionMap[Chain.ReductionBinOp] = Chain.ScaleFactor;
6795 auto ExtendUsersValid = [&](
VPValue *Ext) {
6797 return PartialReductionOps.contains(cast<VPRecipeBase>(U));
6801 auto IsProfitablePartialReductionChainForVF =
6808 for (
const VPPartialReductionChain &Link : Chain) {
6809 const ExtendedReductionOperand &ExtendedOp = Link.ExtendedOp;
6810 InstructionCost LinkCost = getPartialReductionLinkCost(CostCtx, Link, VF);
6814 PartialCost += LinkCost;
6815 RegularCost += Link.ReductionBinOp->
computeCost(VF, CostCtx);
6817 if (ExtendedOp.ExtendB.Kind != ExtendKind::PR_None)
6818 RegularCost += ExtendedOp.ExtendsUser->
computeCost(VF, CostCtx);
6821 RegularCost += Extend->computeCost(VF, CostCtx);
6823 return PartialCost.
isValid() && PartialCost < RegularCost;
6831 for (
auto &[RedPhiR, Chains] : ChainsByPhi) {
6832 for (
const VPPartialReductionChain &Chain : Chains) {
6833 if (!
all_of(Chain.ExtendedOp.ExtendsUser->operands(), ExtendUsersValid)) {
6837 auto UseIsValid = [&, RedPhiR = RedPhiR](
VPUser *U) {
6839 return PhiR == RedPhiR;
6841 return Chain.ScaleFactor == ScaledReductionMap.
lookup_or(R, 0) ||
6847 if (!
all_of(Chain.ReductionBinOp->users(), UseIsValid)) {
6856 auto *RepR = dyn_cast<VPReplicateRecipe>(U);
6857 return RepR && RepR->getOpcode() == Instruction::Store;
6868 return IsProfitablePartialReductionChainForVF(Chains, VF);
6874 for (
auto &[Phi, Chains] : ChainsByPhi)
6875 for (
const VPPartialReductionChain &Chain : Chains)
6876 transformToPartialReduction(Chain, Plan, Phi);
6890 if (VPI && VPI->getUnderlyingValue() &&
6902 New->insertBefore(VPI);
6903 if (VPI->getOpcode() == Instruction::Load)
6904 VPI->replaceAllUsesWith(New->getVPSingleValue());
6905 VPI->eraseFromParent();
6910 FinalRedStoresBuilder))
6919 ReplaceWith(Histogram);
6927 ReplaceWith(Recipe);
6950 if (VPI->mayHaveSideEffects())
6954 if (VPI->isMasked() && !VPI->isSafeToSpeculativelyExecute())
6959 if (VPI->getOpcode() == Instruction::Add &&
6968 I, VPI->operandsWithoutMask(),
true,
6969 nullptr, *VPI, *VPI, VPI->getDebugLoc());
6970 Recipe->insertBefore(VPI);
6971 VPI->replaceAllUsesWith(Recipe);
6972 VPI->eraseFromParent();
6982 switch (Param.ParamKind) {
6983 case VFParamKind::Vector:
6984 case VFParamKind::GlobalPredicate:
6986 case VFParamKind::OMP_Uniform:
6987 return SE->isSCEVable(Args[Param.ParamPos]->getScalarType()) &&
6988 SE->isLoopInvariant(
6989 vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6991 case VFParamKind::OMP_Linear:
6992 return match(vputils::getSCEVExprForVPValue(Args[Param.ParamPos], PSE, L),
6993 m_scev_AffineAddRec(
6994 m_SCEV(), m_scev_SpecificSInt(Param.LinearStepOrPos),
6995 m_SpecificLoop(L)));
7012 const auto *It =
find_if(Mappings, [&](
const VFInfo &Info) {
7013 return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) &&
7016 if (It == Mappings.end())
7023struct CallWideningDecision {
7024 enum class KindTy { Scalarize,
Intrinsic, VectorVariant };
7025 CallWideningDecision(KindTy Kind, Function *Variant =
nullptr)
7048 return CallWideningDecision::KindTy::Scalarize;
7058 return CallWideningDecision::KindTy::Scalarize;
7062 false, VF, CostCtx);
7077 return CallWideningDecision::KindTy::Intrinsic;
7081 if (VecFunc && ScalarCost >= VecCallCost)
7082 return {CallWideningDecision::KindTy::VectorVariant, VecFunc};
7084 return CallWideningDecision::KindTy::Scalarize;
7094 if (!VPI || !VPI->getUnderlyingValue() ||
7095 VPI->getOpcode() != Instruction::Call)
7100 VPI->op_begin() + CI->arg_size());
7102 CallWideningDecision Decision =
7111 switch (Decision.Kind) {
7112 case CallWideningDecision::KindTy::Intrinsic: {
7116 *VPI, VPI->getDebugLoc());
7119 case CallWideningDecision::KindTy::VectorVariant: {
7123 VPValue *Mask = VPI->isMasked() ? VPI->getMask() : Plan.
getTrue();
7124 Ops.push_back(Mask);
7126 Ops.push_back(VPI->getOperand(VPI->getNumOperandsWithoutMask() - 1));
7128 *VPI, VPI->getDebugLoc());
7131 case CallWideningDecision::KindTy::Scalarize:
7137 VPI->replaceAllUsesWith(Replacement);
7138 VPI->eraseFromParent();
7161 if (!LoadR || LoadR->isConsecutive())
7180 Align Alignment = LoadR->getAlign();
7183 if (!Ctx.TTI.isLegalStridedLoadStore(DataTy, Alignment))
7188 Intrinsic::experimental_vp_strided_load, DataTy,
7189 LoadR->isMasked(), Alignment, Ctx);
7190 return StridedLoadStoreCost < CurrentCost;
7201 Ctx.invalidateWideningDecision(&LoadR->getIngredient(), VF);
7206 I32VF = Builder.createScalarZExtOrTrunc(
7217 "Stride type from SCEV must match the index type");
7218 VPValue *CanIVTyStride = Builder.createScalarSExtOrTrunc(
7222 auto *
Offset = Builder.createOverflowingOp(
7224 {AddRecPtr->hasNoUnsignedWrap(), AddRecPtr->hasNoSignedWrap()});
7225 auto *BasePtr = Builder.createNoWrapPtrAdd(
7231 VPValue *NewPtr = Builder.createVectorPointer(
7233 Ptr->getGEPNoWrapFlags(), Ptr->getDebugLoc());
7235 VPValue *Mask = LoadR->getMask();
7238 auto *StridedLoad = Builder.createWidenMemIntrinsic(
7239 Intrinsic::experimental_vp_strided_load,
7240 {NewPtr, StrideInBytes, Mask, I32VF}, LoadTy, Alignment, *LoadR,
7241 LoadR->getDebugLoc());
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool isEqual(const Function &Caller, const Function &Callee)
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
static bool isSentinel(const DWARFDebugNames::AttributeEncoding &AE)
iv Induction Variable Users
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Legalize the Machine IR a function s Machine IR
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
This file implements the TypeSwitch template, which mimics a switch() statement whose cases are type ...
This file implements dominator tree analysis for a single level of a VPlan's H-CFG.
This file contains the declarations of different VPlan-related auxiliary helpers.
This file declares the class VPlanVerifier, which contains utility functions to check the consistency...
This file contains the declarations of the Vectorization Plan base classes:
static const X86InstrFMA3Group Groups[]
static const uint32_t IV[8]
Helper for extra no-alias checks via known-safe recipe and SCEV.
SinkStoreInfo(const SmallPtrSetImpl< VPRecipeBase * > &ExcludeRecipes, VPReplicateRecipe &GroupLeader, PredicatedScalarEvolution &PSE, const Loop &L)
bool shouldSkip(VPRecipeBase &R) const
Return true if R should be skipped during alias checking, either because it's in the exclude set or b...
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
uint64_t getZExtValue() const
Get zero extended value.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
An arbitrary precision integer that knows its signedness.
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
@ NoAlias
The two locations do not alias at all.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & back() const
Get the last element.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
const T & front() const
Get the first element.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
This class represents a range of values.
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI IntegerType * getIndexType(LLVMContext &C, unsigned AddressSpace) const
Returns the type of a GEP index in AddressSpace.
static DebugLoc getCompilerGenerated()
static DebugLoc getUnknown()
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
ValueT lookup_or(const_arg_type_t< KeyT > Val, U &&Default) const
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
constexpr bool isVector() const
One or more elements.
static constexpr ElementCount getScalable(ScalarTy MinVal)
constexpr bool isScalar() const
Exactly one element.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Convenience struct for specifying and reasoning about fast-math flags.
Represents flags for the getelementptr instruction/expression.
static GEPNoWrapFlags noUnsignedWrap()
GEPNoWrapFlags withoutNoUnsignedWrap() const
static GEPNoWrapFlags none()
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
A struct for saving information about induction variables.
static LLVM_ABI InductionDescriptor getCanonicalIntInduction(Type *Ty, ScalarEvolution &SE)
Returns the canonical integer induction for type Ty with start = 0 and step = 1.
InductionKind
This enum represents the kinds of inductions that we support.
@ IK_NoInduction
Not an induction variable.
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
static InstructionCost getInvalid(CostType Val=0)
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
The group of interleaved loads/stores sharing the same stride and close to each other.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static bool getDecisionAndClampRange(const std::function< bool(ElementCount)> &Predicate, VFRange &Range)
Test a Predicate on a Range of VF's.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
This class implements a map that also provides access to all stored values in a deterministic order.
ValueT lookup(const KeyT &Key) const
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Representation for a specific memory location.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Post-order traversal of a graph.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
unsigned getOpcode() const
static bool isFindLastRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
RegionT * getParent() const
Get the parent of the Region.
This class represents a constant integer value.
ConstantInt * getValue() const
This class uses information about analyze scalars to rewrite expressions in canonical form.
LLVM_ABI Value * expandCodeFor(SCEVUse SH, Type *Ty, BasicBlock::iterator I)
Insert code to directly compute the specified SCEV expression into the program.
static const SCEV * rewrite(const SCEV *Scev, ScalarEvolution &SE, ValueToSCEVMapTy &Map)
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getUDivExpr(SCEVUse LHS, SCEVUse RHS)
Get a canonical unsigned division expression, or something simpler if possible.
const DataLayout & getDataLayout() const
Return the DataLayout associated with the module this SCEV instance is operating on.
LLVM_ABI const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
LLVM_ABI bool isKnownNonZero(const SCEV *S)
Test if the given expression is known to be non-zero.
LLVM_ABI const SCEV * getConstant(ConstantInt *V)
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI const SCEV * getMinusSCEV(SCEVUse LHS, SCEVUse RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
LLVM_ABI bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
LLVM_ABI const SCEV * getElementCount(Type *Ty, ElementCount EC, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
LLVM_ABI const SCEV * getMulExpr(SmallVectorImpl< SCEVUse > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
LLVM_ABI bool isKnownPredicate(CmpPredicate Pred, SCEVUse LHS, SCEVUse RHS)
Test if the given expression is known to satisfy the condition described by Pred, LHS,...
static LLVM_ABI AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This class implements a switch-like dispatch statement for a value of 'T' using dyn_cast functionalit...
TypeSwitch< T, ResultT > & Case(CallableT &&caseFn)
Add a case on the given type.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
A recipe for generating the active lane mask for the vector loop that is used to predicate the vector...
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
void appendRecipe(VPRecipeBase *Recipe)
Augment the existing recipes of a VPBasicBlock with an additional Recipe as the last recipe.
RecipeListTy::iterator iterator
Instruction iterators...
iterator begin()
Recipe iterator methods.
iterator_range< iterator > phis()
Returns an iterator range over the PHI-like recipes in the block.
iterator getFirstNonPhi()
Return the position of the first non-phi node recipe in the block.
VPBasicBlock * splitAt(iterator SplitAt)
Split current block at SplitAt by inserting a new block between the current block and its successors ...
const VPRecipeBase & front() const
VPRecipeBase * getTerminator()
If the block has multiple successors, return the branch recipe terminating the block.
const VPRecipeBase & back() const
A recipe for vectorizing a phi-node as a sequence of mask-based select instructions.
VPValue * getMask(unsigned Idx) const
Return mask number Idx.
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
void setMask(unsigned Idx, VPValue *V)
Set mask number Idx to V.
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
void setSuccessors(ArrayRef< VPBlockBase * > NewSuccs)
Set each VPBasicBlock in NewSuccss as successor of this VPBlockBase.
VPRegionBlock * getParent()
const VPBasicBlock * getExitingBasicBlock() const
size_t getNumSuccessors() const
void setPredecessors(ArrayRef< VPBlockBase * > NewPreds)
Set each VPBasicBlock in NewPreds as predecessor of this VPBlockBase.
const VPBlocksTy & getPredecessors() const
const std::string & getName() const
void clearSuccessors()
Remove all the successors of this block.
VPBlockBase * getSinglePredecessor() const
void clearPredecessors()
Remove all the predecessor of this block.
const VPBasicBlock * getEntryBasicBlock() const
VPBlockBase * getSingleHierarchicalPredecessor()
VPBlockBase * getSingleSuccessor() const
const VPBlocksTy & getSuccessors() const
static auto blocksAs(T &&Range)
Return an iterator range over Range with each block cast to BlockTy.
static void insertOnEdge(VPBlockBase *From, VPBlockBase *To, VPBlockBase *BlockPtr)
Inserts BlockPtr on the edge between From and To.
static bool isLatch(const VPBlockBase *VPB, const VPDominatorTree &VPDT)
Returns true if VPB is a loop latch, using isHeader().
static void insertTwoBlocksAfter(VPBlockBase *IfTrue, VPBlockBase *IfFalse, VPBlockBase *BlockPtr)
Insert disconnected VPBlockBases IfTrue and IfFalse after BlockPtr.
static void connectBlocks(VPBlockBase *From, VPBlockBase *To, unsigned PredIdx=-1u, unsigned SuccIdx=-1u)
Connect VPBlockBases From and To bi-directionally.
static void disconnectBlocks(VPBlockBase *From, VPBlockBase *To)
Disconnect VPBlockBases From and To bi-directionally.
static auto blocksOnly(T &&Range)
Return an iterator range over Range which only includes BlockTy blocks.
static void transferSuccessors(VPBlockBase *Old, VPBlockBase *New)
Transfer successors from Old to New. New must have no successors.
static SmallVector< VPBasicBlock * > blocksInSingleSuccessorChainBetween(VPBasicBlock *FirstBB, VPBasicBlock *LastBB)
Returns the blocks between FirstBB and LastBB, where FirstBB to LastBB forms a single-sucessor chain.
A recipe for generating conditional branches on the bits of a mask.
RAII object that stores the current insertion point and restores it when the object is destroyed.
VPlan-based builder utility analogous to IRBuilder.
VPInstruction * createFirstActiveLane(ArrayRef< VPValue * > Masks, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAdd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", VPRecipeWithIRFlags::WrapFlagsTy WrapFlags={false, false})
VPInstruction * createOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPValue * createScalarZExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy, DebugLoc DL)
VPInstruction * createLogicalOr(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createNot(VPValue *Operand, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPInstruction * createAnyOfReduction(VPValue *ChainOp, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown())
Create an AnyOf reduction pattern: or-reduce ChainOp, freeze the result, then select between TrueVal ...
VPInstruction * createLogicalAnd(VPValue *LHS, VPValue *RHS, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
VPDerivedIVRecipe * createDerivedIV(InductionDescriptor::InductionKind Kind, FPMathOperator *FPBinOp, VPIRValue *Start, VPValue *Current, VPValue *Step)
Convert the input value Current to the corresponding value of an induction with Start and Step values...
VPInstruction * createScalarCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy, DebugLoc DL, const VPIRMetadata &Metadata={})
VPWidenPHIRecipe * createWidenPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
static VPBuilder getToInsertAfter(VPRecipeBase *R)
Create a VPBuilder to insert after R.
VPWidenCastRecipe * createWidenCast(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
VPInstruction * createICmp(CmpInst::Predicate Pred, VPValue *A, VPValue *B, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="")
Create a new ICmp VPInstruction with predicate Pred and operands A and B.
VPPhi * createScalarPhi(ArrayRef< VPValue * > IncomingValues, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={}, Type *ResultTy=nullptr)
VPInstruction * createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", const VPIRFlags &Flags={})
VPInstruction * createNaryOp(unsigned Opcode, ArrayRef< VPValue * > Operands, Instruction *Inst=nullptr, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Create an N-ary operation with Opcode, Operands and set Inst as its underlying Instruction.
void setInsertPoint(VPBasicBlock *TheBB)
This specifies that created VPInstructions should be appended to the end of the specified block.
A recipe for generating the phi node tracking the current scalar iteration index.
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
A recipe for converting the input value IV value to the corresponding value of an IV with different s...
Template specialization of the standard LLVM dominator tree utility for VPBlockBases.
bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B)
A recipe to combine multiple recipes into a single 'expression' recipe, which should be considered a ...
A recipe representing a sequence of load -> update -> store as part of a histogram operation.
A special type of VPBasicBlock that wraps an existing IR basic block.
Class to record and manage LLVM IR flags.
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
void dropPoisonGeneratingFlags()
Drop all poison-generating flags.
static LLVM_ABI_FOR_TEST VPIRInstruction * create(Instruction &I)
Create a new VPIRPhi for \I , if it is a PHINode, otherwise create a VPIRInstruction.
This is a concrete Recipe that models a single VPlan-level instruction.
unsigned getNumOperandsWithoutMask() const
Returns the number of operands, excluding the mask if the VPInstruction is masked.
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
@ ExtractPenultimateElement
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
@ BuildVector
Creates a fixed-width vector containing all operands.
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
@ CanonicalIVIncrementForPart
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
const InterleaveGroup< Instruction > * getInterleaveGroup() const
VPValue * getMask() const
Return the mask used by this recipe.
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
A recipe for interleaved memory operations with vector-predication intrinsics.
VPInterleaveRecipe is a recipe for transforming an interleave group of load or stores into one wide l...
void addIncoming(VPValue *IncomingV)
Append IncomingV as an incoming value to the phi-like recipe.
VPPredInstPHIRecipe is a recipe for generating the phi nodes needed when control converges back from ...
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
VPBasicBlock * getParent()
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Helper class to create VPRecipies from IR instructions.
VPHistogramRecipe * widenIfHistogram(VPInstruction *VPI)
If VPI represents a histogram operation (as determined by LoopVectorizationLegality) make that safe f...
VPRecipeBase * tryToWidenMemory(VPInstruction *VPI, VFRange &Range)
Check if the load or store instruction VPI should widened for Range.Start and potentially masked.
bool replaceWithFinalIfReductionStore(VPInstruction *VPI, VPBuilder &FinalRedStoresBuilder)
If VPI is a store of a reduction into an invariant address, delete it.
VPReplicateRecipe * handleReplication(VPInstruction *VPI, VFRange &Range)
Build a VPReplicationRecipe for VPI.
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
A recipe to represent inloop reduction operations with vector-predication intrinsics,...
A recipe for handling reduction phis.
void setVFScaleFactor(unsigned ScaleFactor)
Set the VFScaleFactor for this reduction phi.
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
RecurKind getRecurrenceKind() const
Returns the recurrence kind of the reduction.
A recipe to represent inloop, ordered or partial reduction operations.
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
const VPBlockBase * getEntry() const
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
VPInstruction * getOrCreateCanonicalIVIncrement()
Get the canonical IV increment instruction if it exists.
void setExiting(VPBlockBase *ExitingBlock)
Set ExitingBlock as the exiting VPBlockBase of this VPRegionBlock.
Type * getCanonicalIVType() const
Return the type of the canonical IV for loop regions.
void clearCanonicalIVNUW(VPInstruction *Increment)
Unsets NUW for the canonical IV increment Increment, for loop regions.
VPRegionValue * getCanonicalIV()
Return the canonical induction variable of the region, null for replicating regions.
const VPBlockBase * getExiting() const
VPBasicBlock * getPreheaderVPBB()
Returns the pre-header VPBasicBlock of the loop region.
VPReplicateRecipe replicates a given instruction producing multiple scalar copies of the original sca...
bool isSingleScalar() const
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
bool isPredicated() const
VPValue * getMask()
Return the mask of a predicated VPReplicateRecipe.
Lightweight SCEV-to-VPlan expander.
VPValue * tryToExpand(const SCEV *S)
Try to expand S into recipes and live-ins using the builder.
A recipe for handling phi nodes of integer and floating-point inductions, producing their scalar valu...
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
VPSingleDefRecipe * clone() override=0
Clone the current recipe.
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
void setOperand(unsigned I, VPValue *New)
unsigned getNumOperands() const
VPValue * getOperand(unsigned N) const
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
bool hasMoreThanOneUniqueUser() const
Returns true if the value has more than one unique user.
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
void setUnderlyingValue(Value *Val)
VPUser * getSingleUser()
Return the single user of this value, or nullptr if there is not exactly one user.
void replaceAllUsesWith(VPValue *New)
unsigned getNumUsers() const
void replaceUsesWithIf(VPValue *New, llvm::function_ref< bool(VPUser &U, unsigned Idx)> ShouldReplace)
Go through the uses list for this VPValue and make each use point to New if the callback ShouldReplac...
A recipe to compute a pointer to the last element of each part of a widened memory access for widened...
A recipe for widening Call instructions using library calls.
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
VPWidenCastRecipe is a recipe to create vector cast instructions.
Instruction::CastOps getOpcode() const
A recipe for handling GEP instructions.
Base class for widened induction (VPWidenIntOrFpInductionRecipe and VPWidenPointerInductionRecipe),...
VPIRValue * getStartValue() const
Returns the start value of the induction.
PHINode * getPHINode() const
Returns the underlying PHINode if one exists, or null otherwise.
VPValue * getStepValue()
Returns the step value of the induction.
const InductionDescriptor & getInductionDescriptor() const
Returns the induction descriptor for the recipe.
A recipe for handling phi nodes of integer and floating-point inductions, producing their vector valu...
VPIRValue * getStartValue() const
Returns the start value of the induction.
VPValue * getSplatVFValue() const
If the recipe has been unrolled, return the VPValue for the induction increment, otherwise return nul...
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
VPValue * getLastUnrolledPartOperand()
Returns the VPValue representing the value of this induction at the last unrolled part,...
A recipe for widening vector intrinsics.
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
A common mixin class for widening memory operations.
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.
A recipe for widened phis.
VPWidenRecipe is a recipe for producing a widened instruction using the opcode and operands of the re...
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
VPWidenRecipe * clone() override
Clone the current recipe.
unsigned getOpcode() const
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
VPIRValue * getLiveIn(Value *V) const
Return the live-in VPIRValue for V, if there is one or nullptr otherwise.
bool hasVF(ElementCount VF) const
const DataLayout & getDataLayout() const
LLVMContext & getContext() const
VPBasicBlock * getEntry()
bool hasScalableVF() const
VPValue * getTripCount() const
The trip count of the original loop.
VPValue * getOrCreateBackedgeTakenCount()
The backedge taken count of the original loop.
iterator_range< SmallSetVector< ElementCount, 2 >::iterator > vectorFactors() const
Returns an iterator range over all VFs of the plan.
VPIRValue * getFalse()
Return a VPIRValue wrapping i1 false.
VPSymbolicValue & getVFxUF()
Returns VF * UF of the vector loop region.
VPIRValue * getAllOnesValue(Type *Ty)
Return a VPIRValue wrapping the AllOnes value of type Ty.
VPRegionBlock * createReplicateRegion(VPBlockBase *Entry, VPBlockBase *Exiting, const std::string &Name="")
Create a new replicate region with Entry, Exiting and Name.
auto getLiveIns() const
Return the list of live-in VPValues available in the VPlan.
bool hasUF(unsigned UF) const
ArrayRef< VPIRBasicBlock * > getExitBlocks() const
Return an ArrayRef containing VPIRBasicBlocks wrapping the exit blocks of the original scalar loop.
VPSymbolicValue & getVectorTripCount()
The vector trip count.
VPValue * getBackedgeTakenCount() const
VPIRValue * getOrAddLiveIn(Value *V)
Gets the live-in VPIRValue for V or adds a new live-in (if none exists yet) for V.
VPIRValue * getZero(Type *Ty)
Return a VPIRValue wrapping the null value of type Ty.
void setVF(ElementCount VF)
bool isUnrolled() const
Returns true if the VPlan already has been unrolled, i.e.
LLVM_ABI_FOR_TEST VPRegionBlock * getVectorLoopRegion()
Returns the VPRegionBlock of the vector loop.
unsigned getConcreteUF() const
Returns the concrete UF of the plan, after unrolling.
void resetTripCount(VPValue *NewTripCount)
Resets the trip count for the VPlan.
VPBasicBlock * getMiddleBlock()
Returns the 'middle' block of the plan, that is the block that selects whether to execute the scalar ...
VPBasicBlock * createVPBasicBlock(const Twine &Name, VPRecipeBase *Recipe=nullptr)
Create a new VPBasicBlock with Name and containing Recipe if present.
VPIRValue * getTrue()
Return a VPIRValue wrapping i1 true.
VPBasicBlock * getVectorPreheader() const
Returns the preheader of the vector loop region, if one exists, or null otherwise.
VPSymbolicValue & getUF()
Returns the UF of the vector loop region.
bool hasScalarVFOnly() const
VPBasicBlock * getScalarPreheader() const
Return the VPBasicBlock for the preheader of the scalar loop.
VPSymbolicValue & getVF()
Returns the VF of the vector loop region.
bool hasScalarTail() const
Returns true if the scalar tail may execute after the vector loop, i.e.
LLVM_ABI_FOR_TEST VPlan * duplicate()
Clone the current VPlan, update all VPValues of the new VPlan and cloned recipes to refer to the clon...
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
iterator_range< user_iterator > users()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
constexpr ScalarTy getFixedValue() const
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_ABI APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM)
Return A unsign-divided by B, rounded by the given rounding mode.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
std::variant< std::monostate, Loc::Single, Loc::Multi, Loc::MMI, Loc::EntryValue > Variant
Alias for the std::variant specialization base class of DbgVariable.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_isa< To... > m_Isa()
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedStore(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedStore Intrinsic.
match_combine_or< CastInst_match< OpTy, TruncInst >, OpTy > m_TruncOrSelf(const OpTy &Op)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
LogicalOp_match< LHS, RHS, Instruction::And > m_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R either in the form of L & R or L ?
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, OpTy > m_ZExtOrSelf(const OpTy &Op)
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Matches MaskedLoad Intrinsic.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
SpecificCmpClass_match< LHS, RHS, CmpInst > m_SpecificCmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::UDiv > m_UDiv(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::FAdd, true > m_c_FAdd(const LHS &L, const RHS &R)
Matches FAdd with LHS and RHS in either order.
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
bind_cst_ty m_scev_APInt(const APInt *&C)
Match an SCEV constant and bind it to an APInt.
specificloop_ty m_SpecificLoop(const Loop *L)
bool match(const SCEV *S, const Pattern &P)
SCEVAffineAddRec_match< Op0_t, Op1_t, match_isa< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastLane, VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > > m_ExtractLastLaneOfLastPart(const Op0_t &Op0)
AllRecipe_commutative_match< Instruction::And, Op0_t, Op1_t > m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1)
Match a binary AND operation.
AllRecipe_match< Instruction::Or, Op0_t, Op1_t > m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
Match a binary OR operation.
VPInstruction_match< VPInstruction::AnyOf > m_AnyOf()
AllRecipe_commutative_match< Instruction::Or, Op0_t, Op1_t > m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ComputeReductionResult, Op0_t > m_ComputeReductionResult(const Op0_t &Op0)
auto m_WidenAnyExtend(const Op0_t &Op0)
match_bind< VPIRValue > m_VPIRValue(VPIRValue *&V)
Match a VPIRValue.
VPInstruction_match< VPInstruction::StepVector > m_StepVector()
auto m_VPPhi(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::BranchOnTwoConds > m_BranchOnTwoConds()
AllRecipe_match< Opcode, Op0_t, Op1_t > m_Binary(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::LastActiveLane, Op0_t > m_LastActiveLane(const Op0_t &Op0)
auto m_WidenIntrinsic(const T &...Ops)
canonical_widen_iv_match m_CanonicalWidenIV()
VPInstruction_match< VPInstruction::ExitingIVValue, Op0_t > m_ExitingIVValue(const Op0_t &Op0)
VPInstruction_match< Instruction::ExtractElement, Op0_t, Op1_t > m_ExtractElement(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_False()
VPInstruction_match< VPInstruction::ExtractLastLane, Op0_t > m_ExtractLastLane(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ActiveLaneMask, Op0_t, Op1_t, Op2_t > m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
match_bind< VPSingleDefRecipe > m_VPSingleDefRecipe(VPSingleDefRecipe *&V)
Match a VPSingleDefRecipe, capturing if we match.
VPInstruction_match< VPInstruction::BranchOnCount > m_BranchOnCount()
auto m_GetElementPtr(const Op0_t &Op0, const Op1_t &Op1)
specific_intval< 1 > m_True()
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
VectorEndPointerRecipe_match< Op0_t, Op1_t > m_VecEndPtr(const Op0_t &Op0, const Op1_t &Op1)
VPInstruction_match< VPInstruction::ExtractLastPart, Op0_t > m_ExtractLastPart(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Broadcast, Op0_t > m_Broadcast(const Op0_t &Op0)
VPInstruction_match< VPInstruction::ExplicitVectorLength, Op0_t > m_EVL(const Op0_t &Op0)
VPInstruction_match< VPInstruction::BuildVector > m_BuildVector()
BuildVector is matches only its opcode, w/o matching its operands as the number of operands is not fi...
VPInstruction_match< VPInstruction::ExtractPenultimateElement, Op0_t > m_ExtractPenultimateElement(const Op0_t &Op0)
match_bind< VPInstruction > m_VPInstruction(VPInstruction *&V)
Match a VPInstruction, capturing if we match.
VPInstruction_match< VPInstruction::FirstActiveLane, Op0_t > m_FirstActiveLane(const Op0_t &Op0)
auto m_DerivedIV(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2)
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
static VPRecipeBase * findUserOf(VPValue *V, const MatchT &P)
If V is used by a recipe matching pattern P, return it.
VPInstruction_match< VPInstruction::ExtractLane, Op0_t, Op1_t > m_ExtractLane(const Op0_t &Op0, const Op1_t &Op1)
auto m_AnyNeg(const Op0_t &Op0)
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
NodeAddr< DefNode * > Def
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
VPValue * getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr)
Get or create a VPValue that corresponds to the expansion of Expr.
bool cannotHoistOrSinkRecipe(const VPRecipeBase &R, bool Sinking=false)
Return true if we do not know how to (mechanically) hoist or sink R.
VPInstruction * findComputeReductionResult(VPReductionPHIRecipe *PhiR)
Find the ComputeReductionResult recipe for PhiR, looking through selects inserted for predicated redu...
VPInstruction * findCanonicalIVIncrement(VPlan &Plan)
Find the canonical IV increment of Plan's vector loop region.
std::optional< MemoryLocation > getMemoryLocation(const VPRecipeBase &R)
Return a MemoryLocation for R with noalias metadata populated from R, if the recipe is supported and ...
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
VPValue * findIncomingAliasMask(const VPlan &Plan)
Finds the incoming alias-mask within the vector preheader.
VPRecipeBase * findRecipe(VPValue *Start, PredT Pred)
Search Start's users for a recipe satisfying Pred, looking through recipes with definitions.
VPSingleDefRecipe * findHeaderMask(VPlan &Plan)
Collect the header mask with the pattern: (ICMP_ULE, WideCanonicalIV, backedge-taken-count) Note: If ...
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
bool isUniformAcrossVFsAndUFs(const VPValue *V)
Checks if V is uniform across all VF lanes and UF parts.
LLVM_ABI_FOR_TEST std::optional< VPValue * > getRecipesForUncountableExit(SmallVectorImpl< VPInstruction * > &Recipes, SmallVectorImpl< VPInstruction * > &GEPs, VPBasicBlock *LatchVPBB)
Returns the VPValue representing the uncountable exit comparison used by AnyOf if the recipes it depe...
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
bool isHeaderMask(const VPValue *V, const VPlan &Plan)
Return true if V is a header mask in Plan.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
SmallVector< VPBasicBlock * > vp_rpo_plain_cfg_loop_body(VPBasicBlock *Header)
Returns the VPBasicBlocks forming the loop body of a plain (pre-region) VPlan in reverse post-order s...
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
void stable_sort(R &&Range)
auto min_element(R &&Range)
Provide wrappers to std::min_element which take ranges instead of having to pass begin/end explicitly...
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
DenseMap< const Value *, const SCEV * > ValueToSCEVMapTy
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
auto cast_or_null(const Y &Val)
iterator_range< df_iterator< VPBlockShallowTraversalWrapper< VPBlockBase * > > > vp_depth_first_shallow(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order.
constexpr auto bind_back(FnT &&Fn, BindArgsT &&...BindArgs)
C++23 bind_back.
iterator_range< df_iterator< VPBlockDeepTraversalWrapper< VPBlockBase * > > > vp_depth_first_deep(VPBlockBase *G)
Returns an iterator range to traverse the graph starting at G in depth-first order while traversing t...
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
SmallVector< VPRegisterUsage, 8 > calculateRegisterUsageForPlan(VPlan &Plan, ArrayRef< ElementCount > VFs, const TargetTransformInfo &TTI, const SmallPtrSetImpl< const Value * > &ValuesToIgnore)
Estimate the register usage for Plan and vectorization factors in VFs by calculating the highest numb...
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
constexpr size_t range_size(R &&Range)
Returns the size of the Range, i.e., the number of elements.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI_FOR_TEST cl::opt< bool > EnableWideActiveLaneMask
UncountableExitStyle
Different methods of handling early exits.
@ MaskedHandleExitInScalarLoop
All memory operations other than the load(s) required to determine whether an uncountable exit occurr...
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
bool canConstantBeExtended(const APInt *C, Type *NarrowType, TTI::PartialReductionExtendKind ExtKind)
Check if a constant CI can be safely treated as having been extended from a narrower type with the gi...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ FSub
Subtraction of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, ScalarEvolution &SE, DominatorTree &DT, AssumptionCache *AC=nullptr, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Return true if we can prove that the given load (which is assumed to be within the specified loop) wo...
@ Default
The result value is uniform if and only if all operands are uniform.
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
VPBasicBlock * EarlyExitingVPBB
VPIRBasicBlock * EarlyExitVPBB
RemoveMask_match(const Op0_t &In, Op1_t &Out)
bool match(OpTy *V) const
MDNode * Scope
The tag for alias scope specification (used with noalias).
This struct is a compact representation of a valid (non-zero power of two) alignment.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This reduction is unordered with the partial result scaled down by some factor.
Holds the VFShape for a specific scalar to vector function mapping.
Encapsulates information needed to describe a parameter.
A range of powers-of-2 vectorization factors with fixed start and adjustable end.
Struct to hold various analysis needed for cost computations.
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
bool isMaskRequired(Instruction *I) const
Forwards to LoopVectorizationCostModel::isMaskRequired.
PredicatedScalarEvolution & PSE
bool willBeScalarized(Instruction *I, ElementCount VF) const
Returns true if I is known to be scalarized at VF.
TargetTransformInfo::TargetCostKind CostKind
const TargetLibraryInfo & TLI
const TargetTransformInfo & TTI
A VPValue representing a live-in from the input IR or a constant.
Type * getType() const
Returns the type of the underlying IR value.
A struct that represents some properties of the register usage of a loop.
SmallMapVector< unsigned, unsigned, 4 > MaxLocalUsers
Holds the maximum number of concurrent live intervals in the loop.
InstructionCost spillCost(const TargetTransformInfo &TTI, TargetTransformInfo::TargetCostKind CostKind, unsigned OverrideMaxNumRegs=0) const
Calculate the estimated cost of any spills due to using more registers than the number available for ...
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
bool isMaterialized() const
Returns true if this symbolic value has been materialized.
A recipe for widening load operations with vector-predication intrinsics, using the address to load f...
A recipe for widening load operations, using the address to load from and an optional mask.
A recipe for widening store operations with vector-predication intrinsics, using the value to store,...
A recipe for widening store operations, using the stored value, the address to store to and an option...