44#define DEBUG_TYPE "vector-combine"
50STATISTIC(NumVecLoad,
"Number of vector loads formed");
51STATISTIC(NumVecCmp,
"Number of vector compares formed");
52STATISTIC(NumVecBO,
"Number of vector binops formed");
53STATISTIC(NumVecCmpBO,
"Number of vector compare + binop formed");
54STATISTIC(NumShufOfBitcast,
"Number of shuffles moved after bitcast");
55STATISTIC(NumScalarOps,
"Number of scalar unary + binary ops formed");
56STATISTIC(NumScalarCmp,
"Number of scalar compares formed");
57STATISTIC(NumScalarIntrinsic,
"Number of scalar intrinsic calls formed");
61 cl::desc(
"Disable all vector combine transforms"));
65 cl::desc(
"Disable binop extract to shuffle transforms"));
69 cl::desc(
"Max number of instructions to scan for vector combining."));
71static const unsigned InvalidIndex = std::numeric_limits<unsigned>::max();
79 bool TryEarlyFoldsOnly)
82 TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
89 const TargetTransformInfo &TTI;
90 const DominatorTree &DT;
95 const SimplifyQuery SQ;
99 bool TryEarlyFoldsOnly;
101 InstructionWorklist Worklist;
110 bool vectorizeLoadInsert(Instruction &
I);
111 bool widenSubvectorLoad(Instruction &
I);
112 ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
113 ExtractElementInst *Ext1,
114 unsigned PreferredExtractIndex)
const;
115 bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
116 const Instruction &
I,
117 ExtractElementInst *&ConvertToShuffle,
118 unsigned PreferredExtractIndex);
121 bool foldExtractExtract(Instruction &
I);
122 bool foldInsExtFNeg(Instruction &
I);
123 bool foldInsExtBinop(Instruction &
I);
124 bool foldInsExtVectorToShuffle(Instruction &
I);
125 bool foldBitOpOfCastops(Instruction &
I);
126 bool foldBitOpOfCastConstant(Instruction &
I);
127 bool foldBitcastShuffle(Instruction &
I);
128 bool scalarizeOpOrCmp(Instruction &
I);
129 bool scalarizeVPIntrinsic(Instruction &
I);
130 bool foldExtractedCmps(Instruction &
I);
131 bool foldSelectsFromBitcast(Instruction &
I);
132 bool foldBinopOfReductions(Instruction &
I);
133 bool foldSingleElementStore(Instruction &
I);
134 bool scalarizeLoad(Instruction &
I);
135 bool scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
136 bool scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
Value *Ptr);
137 bool scalarizeExtExtract(Instruction &
I);
138 bool foldConcatOfBoolMasks(Instruction &
I);
139 bool foldPermuteOfBinops(Instruction &
I);
140 bool foldShuffleOfBinops(Instruction &
I);
141 bool foldShuffleOfSelects(Instruction &
I);
142 bool foldShuffleOfCastops(Instruction &
I);
143 bool foldShuffleOfShuffles(Instruction &
I);
144 bool foldPermuteOfIntrinsic(Instruction &
I);
145 bool foldShufflesOfLengthChangingShuffles(Instruction &
I);
146 bool foldShuffleOfIntrinsics(Instruction &
I);
147 bool foldShuffleToIdentity(Instruction &
I);
148 bool foldShuffleFromReductions(Instruction &
I);
149 bool foldShuffleChainsToReduce(Instruction &
I);
150 bool foldCastFromReductions(Instruction &
I);
151 bool foldSignBitReductionCmp(Instruction &
I);
152 bool foldICmpEqZeroVectorReduce(Instruction &
I);
153 bool foldEquivalentReductionCmp(Instruction &
I);
154 bool foldSelectShuffle(Instruction &
I,
bool FromReduction =
false);
155 bool foldInterleaveIntrinsics(Instruction &
I);
156 bool shrinkType(Instruction &
I);
157 bool shrinkLoadForShuffles(Instruction &
I);
158 bool shrinkPhiOfShuffles(Instruction &
I);
160 void replaceValue(Instruction &Old,
Value &New,
bool Erase =
true) {
166 Worklist.pushUsersToWorkList(*NewI);
167 Worklist.pushValue(NewI);
184 SmallPtrSet<Value *, 4> Visited;
189 OpI,
nullptr,
nullptr, [&](
Value *V) {
194 NextInst = NextInst->getNextNode();
199 Worklist.pushUsersToWorkList(*OpI);
200 Worklist.pushValue(OpI);
220 if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
221 Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
227 Type *ScalarTy = Load->getType()->getScalarType();
229 unsigned MinVectorSize =
TTI.getMinVectorRegisterBitWidth();
230 if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
237bool VectorCombine::vectorizeLoadInsert(
Instruction &
I) {
263 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
266 unsigned MinVecNumElts = MinVectorSize / ScalarSize;
267 auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts,
false);
268 unsigned OffsetEltIndex = 0;
276 unsigned OffsetBitWidth =
DL->getIndexTypeSizeInBits(SrcPtr->
getType());
277 APInt
Offset(OffsetBitWidth, 0);
287 uint64_t ScalarSizeInBytes = ScalarSize / 8;
288 if (
Offset.urem(ScalarSizeInBytes) != 0)
292 OffsetEltIndex =
Offset.udiv(ScalarSizeInBytes).getZExtValue();
293 if (OffsetEltIndex >= MinVecNumElts)
310 unsigned AS =
Load->getPointerAddressSpace();
329 unsigned OutputNumElts = Ty->getNumElements();
331 assert(OffsetEltIndex < MinVecNumElts &&
"Address offset too big");
332 Mask[0] = OffsetEltIndex;
339 if (OldCost < NewCost || !NewCost.
isValid())
350 replaceValue(
I, *VecLd);
358bool VectorCombine::widenSubvectorLoad(Instruction &
I) {
361 if (!Shuf->isIdentityWithPadding())
367 unsigned OpIndex =
any_of(Shuf->getShuffleMask(), [&NumOpElts](
int M) {
368 return M >= (int)(NumOpElts);
379 Value *SrcPtr =
Load->getPointerOperand()->stripPointerCasts();
387 unsigned AS =
Load->getPointerAddressSpace();
402 if (OldCost < NewCost || !NewCost.
isValid())
409 replaceValue(
I, *VecLd);
416ExtractElementInst *VectorCombine::getShuffleExtract(
417 ExtractElementInst *Ext0, ExtractElementInst *Ext1,
421 assert(Index0C && Index1C &&
"Expected constant extract indexes");
423 unsigned Index0 = Index0C->getZExtValue();
424 unsigned Index1 = Index1C->getZExtValue();
427 if (Index0 == Index1)
451 if (PreferredExtractIndex == Index0)
453 if (PreferredExtractIndex == Index1)
457 return Index0 > Index1 ? Ext0 : Ext1;
465bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
466 ExtractElementInst *Ext1,
467 const Instruction &
I,
468 ExtractElementInst *&ConvertToShuffle,
469 unsigned PreferredExtractIndex) {
472 assert(Ext0IndexC && Ext1IndexC &&
"Expected constant extract indexes");
474 unsigned Opcode =
I.getOpcode();
487 assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
488 "Expected a compare");
498 unsigned Ext0Index = Ext0IndexC->getZExtValue();
499 unsigned Ext1Index = Ext1IndexC->getZExtValue();
513 unsigned BestExtIndex = Extract0Cost > Extract1Cost ? Ext0Index : Ext1Index;
514 unsigned BestInsIndex = Extract0Cost > Extract1Cost ? Ext1Index : Ext0Index;
515 InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
520 if (Ext0Src == Ext1Src && Ext0Index == Ext1Index) {
525 bool HasUseTax = Ext0 == Ext1 ? !Ext0->
hasNUses(2)
527 OldCost = CheapExtractCost + ScalarOpCost;
528 NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
532 OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
533 NewCost = VectorOpCost + CheapExtractCost +
538 ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
539 if (ConvertToShuffle) {
551 SmallVector<int> ShuffleMask(FixedVecTy->getNumElements(),
553 ShuffleMask[BestInsIndex] = BestExtIndex;
555 VecTy, VecTy, ShuffleMask,
CostKind, 0,
556 nullptr, {ConvertToShuffle});
559 VecTy, VecTy, {},
CostKind, 0,
nullptr,
567 return OldCost < NewCost;
579 ShufMask[NewIndex] = OldIndex;
580 return Builder.CreateShuffleVector(Vec, ShufMask,
"shift");
632 V1,
"foldExtExtBinop");
637 VecBOInst->copyIRFlags(&
I);
643bool VectorCombine::foldExtractExtract(Instruction &
I) {
674 ExtractElementInst *ExtractToChange;
675 if (isExtractExtractCheap(Ext0, Ext1,
I, ExtractToChange, InsertIndex))
681 if (ExtractToChange) {
682 unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
687 if (ExtractToChange == Ext0)
696 ? foldExtExtCmp(ExtOp0, ExtOp1, ExtIndex,
I)
697 : foldExtExtBinop(ExtOp0, ExtOp1, ExtIndex,
I);
700 replaceValue(
I, *NewExt);
706bool VectorCombine::foldInsExtFNeg(Instruction &
I) {
709 uint64_t ExtIdx, InsIdx;
724 auto *DstVecScalarTy = DstVecTy->getScalarType();
726 if (!SrcVecTy || DstVecScalarTy != SrcVecTy->getScalarType())
731 unsigned NumDstElts = DstVecTy->getNumElements();
732 unsigned NumSrcElts = SrcVecTy->getNumElements();
733 if (ExtIdx > NumSrcElts || InsIdx >= NumDstElts || NumDstElts == 1)
739 SmallVector<int>
Mask(NumDstElts);
740 std::iota(
Mask.begin(),
Mask.end(), 0);
741 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
757 bool NeedLenChg = SrcVecTy->getNumElements() != NumDstElts;
760 SmallVector<int> SrcMask;
763 SrcMask[ExtIdx % NumDstElts] = ExtIdx;
765 DstVecTy, SrcVecTy, SrcMask,
CostKind);
769 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
771 if (NewCost > OldCost)
774 Value *NewShuf, *LenChgShuf =
nullptr;
788 replaceValue(
I, *NewShuf);
794bool VectorCombine::foldInsExtBinop(Instruction &
I) {
795 BinaryOperator *VecBinOp, *SclBinOp;
827 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
829 if (NewCost > OldCost)
840 NewInst->copyIRFlags(VecBinOp);
841 NewInst->andIRFlags(SclBinOp);
846 replaceValue(
I, *NewBO);
852bool VectorCombine::foldBitOpOfCastops(Instruction &
I) {
855 if (!BinOp || !BinOp->isBitwiseLogicOp())
861 if (!LHSCast || !RHSCast) {
862 LLVM_DEBUG(
dbgs() <<
" One or both operands are not cast instructions\n");
868 if (CastOpcode != RHSCast->getOpcode())
872 switch (CastOpcode) {
873 case Instruction::BitCast:
874 case Instruction::Trunc:
875 case Instruction::SExt:
876 case Instruction::ZExt:
882 Value *LHSSrc = LHSCast->getOperand(0);
883 Value *RHSSrc = RHSCast->getOperand(0);
889 auto *SrcTy = LHSSrc->
getType();
890 auto *DstTy =
I.getType();
893 if (CastOpcode != Instruction::BitCast &&
898 if (!SrcTy->getScalarType()->isIntegerTy() ||
899 !DstTy->getScalarType()->isIntegerTy())
914 LHSCastCost + RHSCastCost;
925 if (!LHSCast->hasOneUse())
926 NewCost += LHSCastCost;
927 if (!RHSCast->hasOneUse())
928 NewCost += RHSCastCost;
931 <<
" NewCost=" << NewCost <<
"\n");
933 if (NewCost > OldCost)
938 BinOp->getName() +
".inner");
940 NewBinOp->copyIRFlags(BinOp);
954 replaceValue(
I, *Result);
963bool VectorCombine::foldBitOpOfCastConstant(Instruction &
I) {
979 switch (CastOpcode) {
980 case Instruction::BitCast:
981 case Instruction::ZExt:
982 case Instruction::SExt:
983 case Instruction::Trunc:
989 Value *LHSSrc = LHSCast->getOperand(0);
991 auto *SrcTy = LHSSrc->
getType();
992 auto *DstTy =
I.getType();
995 if (CastOpcode != Instruction::BitCast &&
1000 if (!SrcTy->getScalarType()->isIntegerTy() ||
1001 !DstTy->getScalarType()->isIntegerTy())
1005 PreservedCastFlags RHSFlags;
1030 if (!LHSCast->hasOneUse())
1031 NewCost += LHSCastCost;
1033 LLVM_DEBUG(
dbgs() <<
"foldBitOpOfCastConstant: OldCost=" << OldCost
1034 <<
" NewCost=" << NewCost <<
"\n");
1036 if (NewCost > OldCost)
1041 LHSSrc, InvC,
I.getName() +
".inner");
1043 NewBinOp->copyIRFlags(&
I);
1063 replaceValue(
I, *Result);
1070bool VectorCombine::foldBitcastShuffle(Instruction &
I) {
1084 if (!DestTy || !SrcTy)
1087 unsigned DestEltSize = DestTy->getScalarSizeInBits();
1088 unsigned SrcEltSize = SrcTy->getScalarSizeInBits();
1089 if (SrcTy->getPrimitiveSizeInBits() % DestEltSize != 0)
1099 if (!(BCTy0 && BCTy0->getElementType() == DestTy->getElementType()) &&
1100 !(BCTy1 && BCTy1->getElementType() == DestTy->getElementType()))
1104 SmallVector<int, 16> NewMask;
1105 if (DestEltSize <= SrcEltSize) {
1108 if (SrcEltSize % DestEltSize != 0)
1110 unsigned ScaleFactor = SrcEltSize / DestEltSize;
1115 if (DestEltSize % SrcEltSize != 0)
1117 unsigned ScaleFactor = DestEltSize / SrcEltSize;
1124 unsigned NumSrcElts = SrcTy->getPrimitiveSizeInBits() / DestEltSize;
1125 auto *NewShuffleTy =
1127 auto *OldShuffleTy =
1129 unsigned NumOps = IsUnary ? 1 : 2;
1139 TargetTransformInfo::CastContextHint::None,
1144 TargetTransformInfo::CastContextHint::None,
1147 LLVM_DEBUG(
dbgs() <<
"Found a bitcasted shuffle: " <<
I <<
"\n OldCost: "
1148 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
1150 if (NewCost > OldCost || !NewCost.
isValid())
1158 replaceValue(
I, *Shuf);
1165bool VectorCombine::scalarizeVPIntrinsic(Instruction &
I) {
1179 if (!ScalarOp0 || !ScalarOp1)
1187 auto IsAllTrueMask = [](
Value *MaskVal) {
1190 return ConstValue->isAllOnesValue();
1204 SmallVector<int>
Mask;
1206 Mask.resize(FVTy->getNumElements(), 0);
1215 Args.push_back(
V->getType());
1216 IntrinsicCostAttributes
Attrs(IntrID, VecTy, Args);
1221 std::optional<unsigned> FunctionalOpcode =
1223 std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
1224 if (!FunctionalOpcode) {
1233 IntrinsicCostAttributes
Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
1243 InstructionCost NewCost = ScalarOpCost + SplatCost + CostToKeepSplats;
1245 LLVM_DEBUG(
dbgs() <<
"Found a VP Intrinsic to scalarize: " << VPI
1248 <<
", Cost of scalarizing:" << NewCost <<
"\n");
1251 if (OldCost < NewCost || !NewCost.
isValid())
1262 bool SafeToSpeculate;
1268 *FunctionalOpcode, &VPI,
nullptr, &AC, &DT);
1269 if (!SafeToSpeculate &&
1276 {ScalarOp0, ScalarOp1})
1278 ScalarOp0, ScalarOp1);
1287bool VectorCombine::scalarizeOpOrCmp(Instruction &
I) {
1292 if (!UO && !BO && !CI && !
II)
1300 if (Arg->getType() !=
II->getType() &&
1310 for (User *U :
I.users())
1317 std::optional<uint64_t>
Index;
1319 auto Ops =
II ?
II->args() :
I.operands();
1323 uint64_t InsIdx = 0;
1328 if (OpTy->getElementCount().getKnownMinValue() <= InsIdx)
1334 else if (InsIdx != *Index)
1351 if (!
Index.has_value())
1355 Type *ScalarTy = VecTy->getScalarType();
1356 assert(VecTy->isVectorTy() &&
1359 "Unexpected types for insert element into binop or cmp");
1361 unsigned Opcode =
I.getOpcode();
1369 }
else if (UO || BO) {
1373 IntrinsicCostAttributes ScalarICA(
1374 II->getIntrinsicID(), ScalarTy,
1377 IntrinsicCostAttributes VectorICA(
1378 II->getIntrinsicID(), VecTy,
1385 Value *NewVecC =
nullptr;
1387 NewVecC =
simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ);
1390 simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ);
1392 NewVecC =
simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ);
1406 for (
auto [Idx,
Op, VecC, Scalar] :
enumerate(
Ops, VecCs, ScalarOps)) {
1408 II->getIntrinsicID(), Idx, &
TTI)))
1411 Instruction::InsertElement, VecTy,
CostKind, *Index, VecC, Scalar);
1412 OldCost += InsertCost;
1413 NewCost += !
Op->hasOneUse() * InsertCost;
1417 if (OldCost < NewCost || !NewCost.
isValid())
1427 ++NumScalarIntrinsic;
1437 Scalar = Builder.
CreateCmp(CI->getPredicate(), ScalarOps[0], ScalarOps[1]);
1443 Scalar->setName(
I.getName() +
".scalar");
1448 ScalarInst->copyIRFlags(&
I);
1451 replaceValue(
I, *Insert);
1458bool VectorCombine::foldExtractedCmps(Instruction &
I) {
1463 if (!BI || !
I.getType()->isIntegerTy(1))
1468 Value *B0 =
I.getOperand(0), *B1 =
I.getOperand(1);
1471 CmpPredicate
P0,
P1;
1483 uint64_t Index0, Index1;
1490 ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1,
CostKind);
1493 assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) &&
1494 "Unknown ExtractElementInst");
1499 unsigned CmpOpcode =
1514 Ext0Cost + Ext1Cost + CmpCost * 2 +
1520 int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
1521 int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
1526 ShufMask[CheapIndex] = ExpensiveIndex;
1531 NewCost += Ext0->
hasOneUse() ? 0 : Ext0Cost;
1532 NewCost += Ext1->
hasOneUse() ? 0 : Ext1Cost;
1537 if (OldCost < NewCost || !NewCost.
isValid())
1547 Value *
LHS = ConvertToShuf == Ext0 ? Shuf : VCmp;
1548 Value *
RHS = ConvertToShuf == Ext0 ? VCmp : Shuf;
1551 replaceValue(
I, *NewExt);
1578bool VectorCombine::foldSelectsFromBitcast(Instruction &
I) {
1585 if (!SrcVecTy || !DstVecTy)
1595 if (SrcEltBits != 32 && SrcEltBits != 64)
1598 if (!DstEltTy->
isIntegerTy() || DstEltBits >= SrcEltBits)
1615 if (!ScalarSelCost.
isValid() || ScalarSelCost == 0)
1618 unsigned MinSelects = (VecSelCost.
getValue() / ScalarSelCost.
getValue()) + 1;
1621 if (!BC->hasNUsesOrMore(MinSelects))
1626 DenseMap<Value *, SmallVector<SelectInst *, 8>> CondToSelects;
1628 for (User *U : BC->users()) {
1633 for (User *ExtUser : Ext->users()) {
1637 Cond->getType()->isIntegerTy(1))
1642 if (CondToSelects.
empty())
1645 bool MadeChange =
false;
1646 Value *SrcVec = BC->getOperand(0);
1649 for (
auto [
Cond, Selects] : CondToSelects) {
1651 if (Selects.size() < MinSelects) {
1652 LLVM_DEBUG(
dbgs() <<
"VectorCombine: foldSelectsFromBitcast not "
1653 <<
"profitable (VecCost=" << VecSelCost
1654 <<
", ScalarCost=" << ScalarSelCost
1655 <<
", NumSelects=" << Selects.size() <<
")\n");
1660 auto InsertPt = std::next(BC->getIterator());
1664 InsertPt = std::next(CondInst->getIterator());
1672 for (SelectInst *Sel : Selects) {
1674 Value *Idx = Ext->getIndexOperand();
1678 replaceValue(*Sel, *NewExt);
1683 <<
" selects into vector select\n");
1697 unsigned ReductionOpc =
1703 CostBeforeReduction =
1704 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, ExtType,
1706 CostAfterReduction =
1707 TTI.getExtendedReductionCost(ReductionOpc, IsUnsigned,
II.getType(),
1711 if (RedOp &&
II.getIntrinsicID() == Intrinsic::vector_reduce_add &&
1717 (Op0->
getOpcode() == RedOp->getOpcode() || Op0 == Op1)) {
1724 TTI.getCastInstrCost(Op0->
getOpcode(), MulType, ExtType,
1727 TTI.getArithmeticInstrCost(Instruction::Mul, MulType,
CostKind);
1729 TTI.getCastInstrCost(RedOp->getOpcode(), VecRedTy, MulType,
1732 CostBeforeReduction = ExtCost * 2 + MulCost + Ext2Cost;
1733 CostAfterReduction =
TTI.getMulAccReductionCost(
1734 IsUnsigned, ReductionOpc,
II.getType(), ExtType,
CostKind);
1737 CostAfterReduction =
TTI.getArithmeticReductionCost(ReductionOpc, VecRedTy,
1741bool VectorCombine::foldBinopOfReductions(Instruction &
I) {
1744 if (BinOpOpc == Instruction::Sub)
1745 ReductionIID = Intrinsic::vector_reduce_add;
1749 auto checkIntrinsicAndGetItsArgument = [](
Value *
V,
1754 if (
II->getIntrinsicID() == IID &&
II->hasOneUse())
1755 return II->getArgOperand(0);
1759 Value *V0 = checkIntrinsicAndGetItsArgument(
I.getOperand(0), ReductionIID);
1762 Value *V1 = checkIntrinsicAndGetItsArgument(
I.getOperand(1), ReductionIID);
1771 unsigned ReductionOpc =
1784 CostOfRedOperand0 + CostOfRedOperand1 +
1787 if (NewCost >= OldCost || !NewCost.
isValid())
1791 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
1794 if (BinOpOpc == Instruction::Or)
1795 VectorBO = Builder.
CreateOr(V0, V1,
"",
1801 replaceValue(
I, *Rdx);
1809 unsigned NumScanned = 0;
1810 return std::any_of(Begin, End, [&](
const Instruction &Instr) {
1819class ScalarizationResult {
1820 enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
1825 ScalarizationResult(StatusTy Status,
Value *ToFreeze =
nullptr)
1826 : Status(Status), ToFreeze(ToFreeze) {}
1829 ScalarizationResult(
const ScalarizationResult &
Other) =
default;
1830 ~ScalarizationResult() {
1831 assert(!ToFreeze &&
"freeze() not called with ToFreeze being set");
1834 static ScalarizationResult unsafe() {
return {StatusTy::Unsafe}; }
1835 static ScalarizationResult safe() {
return {StatusTy::Safe}; }
1836 static ScalarizationResult safeWithFreeze(
Value *ToFreeze) {
1837 return {StatusTy::SafeWithFreeze, ToFreeze};
1841 bool isSafe()
const {
return Status == StatusTy::Safe; }
1843 bool isUnsafe()
const {
return Status == StatusTy::Unsafe; }
1846 bool isSafeWithFreeze()
const {
return Status == StatusTy::SafeWithFreeze; }
1851 Status = StatusTy::Unsafe;
1855 void freeze(IRBuilderBase &Builder, Instruction &UserI) {
1856 assert(isSafeWithFreeze() &&
1857 "should only be used when freezing is required");
1859 "UserI must be a user of ToFreeze");
1860 IRBuilder<>::InsertPointGuard Guard(Builder);
1865 if (
U.get() == ToFreeze)
1882 uint64_t NumElements = VecTy->getElementCount().getKnownMinValue();
1886 if (
C->getValue().ult(NumElements))
1887 return ScalarizationResult::safe();
1888 return ScalarizationResult::unsafe();
1893 return ScalarizationResult::unsafe();
1895 APInt Zero(IntWidth, 0);
1896 APInt MaxElts(IntWidth, NumElements);
1902 true, &AC, CtxI, &DT)))
1903 return ScalarizationResult::safe();
1904 return ScalarizationResult::unsafe();
1917 if (ValidIndices.
contains(IdxRange))
1918 return ScalarizationResult::safeWithFreeze(IdxBase);
1919 return ScalarizationResult::unsafe();
1931 C->getZExtValue() *
DL.getTypeStoreSize(ScalarType));
1943bool VectorCombine::foldSingleElementStore(Instruction &
I) {
1955 if (!
match(
SI->getValueOperand(),
1962 Value *SrcAddr =
Load->getPointerOperand()->stripPointerCasts();
1965 if (!
Load->isSimple() ||
Load->getParent() !=
SI->getParent() ||
1966 !
DL->typeSizeEqualsStoreSize(
Load->getType()->getScalarType()) ||
1967 SrcAddr !=
SI->getPointerOperand()->stripPointerCasts())
1971 if (ScalarizableIdx.isUnsafe() ||
1978 Worklist.
push(Load);
1980 if (ScalarizableIdx.isSafeWithFreeze())
1983 SI->getValueOperand()->getType(),
SI->getPointerOperand(),
1984 {ConstantInt::get(Idx->getType(), 0), Idx});
1988 std::max(
SI->getAlign(),
Load->getAlign()), NewElement->
getType(), Idx,
1991 replaceValue(
I, *NSI);
2001bool VectorCombine::scalarizeLoad(Instruction &
I) {
2008 if (LI->isVolatile() || !
DL->typeSizeEqualsStoreSize(VecTy->getScalarType()))
2011 bool AllExtracts =
true;
2012 bool AllBitcasts =
true;
2014 unsigned NumInstChecked = 0;
2019 for (User *U : LI->users()) {
2021 if (!UI || UI->getParent() != LI->getParent())
2026 if (UI->use_empty())
2030 AllExtracts =
false;
2032 AllBitcasts =
false;
2036 for (Instruction &
I :
2037 make_range(std::next(LI->getIterator()), UI->getIterator())) {
2044 LastCheckedInst = UI;
2049 return scalarizeLoadExtract(LI, VecTy, Ptr);
2051 return scalarizeLoadBitcast(LI, VecTy, Ptr);
2056bool VectorCombine::scalarizeLoadExtract(LoadInst *LI, VectorType *VecTy,
2061 DenseMap<ExtractElementInst *, ScalarizationResult> NeedFreeze;
2064 for (
auto &Pair : NeedFreeze)
2065 Pair.second.discard();
2073 for (User *U : LI->
users()) {
2078 if (ScalarIdx.isUnsafe())
2080 if (ScalarIdx.isSafeWithFreeze()) {
2081 NeedFreeze.try_emplace(UI, ScalarIdx);
2082 ScalarIdx.discard();
2088 Index ?
Index->getZExtValue() : -1);
2096 LLVM_DEBUG(
dbgs() <<
"Found all extractions of a vector load: " << *LI
2097 <<
"\n LoadExtractCost: " << OriginalCost
2098 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2100 if (ScalarizedCost >= OriginalCost)
2107 Type *ElemType = VecTy->getElementType();
2110 for (User *U : LI->
users()) {
2112 Value *Idx = EI->getIndexOperand();
2115 auto It = NeedFreeze.find(EI);
2116 if (It != NeedFreeze.end())
2123 Builder.
CreateLoad(ElemType,
GEP, EI->getName() +
".scalar"));
2125 Align ScalarOpAlignment =
2127 NewLoad->setAlignment(ScalarOpAlignment);
2130 size_t Offset = ConstIdx->getZExtValue() *
DL->getTypeStoreSize(ElemType);
2135 replaceValue(*EI, *NewLoad,
false);
2138 FailureGuard.release();
2143bool VectorCombine::scalarizeLoadBitcast(LoadInst *LI, VectorType *VecTy,
2149 Type *TargetScalarType =
nullptr;
2150 unsigned VecBitWidth =
DL->getTypeSizeInBits(VecTy);
2152 for (User *U : LI->
users()) {
2155 Type *DestTy = BC->getDestTy();
2159 unsigned DestBitWidth =
DL->getTypeSizeInBits(DestTy);
2160 if (DestBitWidth != VecBitWidth)
2164 if (!TargetScalarType)
2165 TargetScalarType = DestTy;
2166 else if (TargetScalarType != DestTy)
2174 if (!TargetScalarType)
2182 LLVM_DEBUG(
dbgs() <<
"Found vector load feeding only bitcasts: " << *LI
2183 <<
"\n OriginalCost: " << OriginalCost
2184 <<
" vs ScalarizedCost: " << ScalarizedCost <<
"\n");
2186 if (ScalarizedCost >= OriginalCost)
2197 ScalarLoad->copyMetadata(*LI);
2200 for (User *U : LI->
users()) {
2202 replaceValue(*BC, *ScalarLoad,
false);
2208bool VectorCombine::scalarizeExtExtract(Instruction &
I) {
2223 Type *ScalarDstTy = DstTy->getElementType();
2224 if (
DL->getTypeSizeInBits(SrcTy) !=
DL->getTypeSizeInBits(ScalarDstTy))
2230 unsigned ExtCnt = 0;
2231 bool ExtLane0 =
false;
2232 for (User *U : Ext->users()) {
2246 Instruction::And, ScalarDstTy,
CostKind,
2249 (ExtCnt - ExtLane0) *
2251 Instruction::LShr, ScalarDstTy,
CostKind,
2254 if (ScalarCost > VectorCost)
2257 Value *ScalarV = Ext->getOperand(0);
2264 SmallDenseSet<ConstantInt *, 8> ExtractedLanes;
2265 bool AllExtractsTriggerUB =
true;
2266 ExtractElementInst *LastExtract =
nullptr;
2268 for (User *U : Ext->users()) {
2271 AllExtractsTriggerUB =
false;
2275 if (!LastExtract || LastExtract->
comesBefore(Extract))
2276 LastExtract = Extract;
2278 if (ExtractedLanes.
size() != DstTy->getNumElements() ||
2279 !AllExtractsTriggerUB ||
2287 uint64_t SrcEltSizeInBits =
DL->getTypeSizeInBits(SrcTy->getElementType());
2288 uint64_t TotalBits =
DL->getTypeSizeInBits(SrcTy);
2291 Value *
Mask = ConstantInt::get(PackedTy, EltBitMask);
2292 for (User *U : Ext->users()) {
2298 ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits)
2299 : (Idx * SrcEltSizeInBits);
2302 U->replaceAllUsesWith(
And);
2310bool VectorCombine::foldConcatOfBoolMasks(Instruction &
I) {
2311 Type *Ty =
I.getType();
2316 if (
DL->isBigEndian())
2327 uint64_t ShAmtX = 0;
2335 uint64_t ShAmtY = 0;
2343 if (ShAmtX > ShAmtY) {
2351 uint64_t ShAmtDiff = ShAmtY - ShAmtX;
2352 unsigned NumSHL = (ShAmtX > 0) + (ShAmtY > 0);
2357 MaskTy->getNumElements() != ShAmtDiff ||
2358 MaskTy->getNumElements() > (
BitWidth / 2))
2363 Type::getIntNTy(Ty->
getContext(), ConcatTy->getNumElements());
2364 auto *MaskIntTy = Type::getIntNTy(Ty->
getContext(), ShAmtDiff);
2367 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
2384 if (Ty != ConcatIntTy)
2390 LLVM_DEBUG(
dbgs() <<
"Found a concatenation of bitcasted bool masks: " <<
I
2391 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2394 if (NewCost > OldCost)
2404 if (Ty != ConcatIntTy) {
2414 replaceValue(
I, *Result);
2420bool VectorCombine::foldPermuteOfBinops(Instruction &
I) {
2421 BinaryOperator *BinOp;
2422 ArrayRef<int> OuterMask;
2430 Value *Op00, *Op01, *Op10, *Op11;
2431 ArrayRef<int> Mask0, Mask1;
2436 if (!Match0 && !Match1)
2449 if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
2452 unsigned NumSrcElts = BinOpTy->getNumElements();
2457 any_of(OuterMask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
2461 SmallVector<int> NewMask0, NewMask1;
2462 for (
int M : OuterMask) {
2463 if (M < 0 || M >= (
int)NumSrcElts) {
2467 NewMask0.
push_back(Match0 ? Mask0[M] : M);
2468 NewMask1.
push_back(Match1 ? Mask1[M] : M);
2472 unsigned NumOpElts = Op0Ty->getNumElements();
2473 bool IsIdentity0 = ShuffleDstTy == Op0Ty &&
2474 all_of(NewMask0, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2476 bool IsIdentity1 = ShuffleDstTy == Op1Ty &&
2477 all_of(NewMask1, [NumOpElts](
int M) {
return M < (int)NumOpElts; }) &&
2486 ShuffleDstTy, BinOpTy, OuterMask,
CostKind,
2487 0,
nullptr, {BinOp}, &
I);
2489 NewCost += BinOpCost;
2495 OldCost += Shuf0Cost;
2497 NewCost += Shuf0Cost;
2503 OldCost += Shuf1Cost;
2505 NewCost += Shuf1Cost;
2513 Op0Ty, NewMask0,
CostKind, 0,
nullptr, {Op00, Op01});
2517 Op1Ty, NewMask1,
CostKind, 0,
nullptr, {Op10, Op11});
2519 LLVM_DEBUG(
dbgs() <<
"Found a shuffle feeding a shuffled binop: " <<
I
2520 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2524 if (NewCost > OldCost)
2535 NewInst->copyIRFlags(BinOp);
2539 replaceValue(
I, *NewBO);
2545bool VectorCombine::foldShuffleOfBinops(Instruction &
I) {
2546 ArrayRef<int> OldMask;
2553 if (
LHS->getOpcode() !=
RHS->getOpcode())
2557 bool IsCommutative =
false;
2566 IsCommutative = BinaryOperator::isCommutative(BO->getOpcode());
2577 if (!ShuffleDstTy || !BinResTy || !BinOpTy ||
X->getType() !=
Z->getType())
2580 bool SameBinOp =
LHS ==
RHS;
2581 unsigned NumSrcElts = BinOpTy->getNumElements();
2584 if (IsCommutative &&
X != Z &&
Y != W && (
X == W ||
Y == Z))
2587 auto ConvertToUnary = [NumSrcElts](
int &
M) {
2588 if (M >= (
int)NumSrcElts)
2592 SmallVector<int> NewMask0(OldMask);
2601 SmallVector<int> NewMask1(OldMask);
2620 ShuffleDstTy, BinResTy, OldMask,
CostKind, 0,
2630 ArrayRef<int> InnerMask;
2632 m_Mask(InnerMask)))) &&
2635 [NumSrcElts](
int M) {
return M < (int)NumSrcElts; })) {
2647 bool ReducedInstCount =
false;
2648 ReducedInstCount |= MergeInner(
X, 0, NewMask0,
CostKind);
2649 ReducedInstCount |= MergeInner(
Y, 0, NewMask1,
CostKind);
2650 ReducedInstCount |= MergeInner(Z, NumSrcElts, NewMask0,
CostKind);
2651 ReducedInstCount |= MergeInner(W, NumSrcElts, NewMask1,
CostKind);
2652 bool SingleSrcBinOp = (
X ==
Y) && (Z == W) && (NewMask0 == NewMask1);
2657 auto *ShuffleCmpTy =
2660 SK0, ShuffleCmpTy, BinOpTy, NewMask0,
CostKind, 0,
nullptr, {
X,
Z});
2661 if (!SingleSrcBinOp)
2671 PredLHS,
CostKind, Op0Info, Op1Info);
2681 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2688 if (ReducedInstCount ? (NewCost > OldCost) : (NewCost >= OldCost))
2697 : Builder.
CreateCmp(PredLHS, Shuf0, Shuf1);
2701 NewInst->copyIRFlags(
LHS);
2702 NewInst->andIRFlags(
RHS);
2707 replaceValue(
I, *NewBO);
2714bool VectorCombine::foldShuffleOfSelects(Instruction &
I) {
2716 Value *C1, *
T1, *F1, *C2, *T2, *F2;
2727 if (!C1VecTy || !C2VecTy || C1VecTy != C2VecTy)
2733 if (((SI0FOp ==
nullptr) != (SI1FOp ==
nullptr)) ||
2734 ((SI0FOp !=
nullptr) &&
2735 (SI0FOp->getFastMathFlags() != SI1FOp->getFastMathFlags())))
2741 auto SelOp = Instruction::Select;
2749 CostSel1 + CostSel2 +
2751 {
I.getOperand(0),
I.getOperand(1)}, &
I);
2755 Mask,
CostKind, 0,
nullptr, {C1, C2});
2765 if (!Sel1->hasOneUse())
2766 NewCost += CostSel1;
2767 if (!Sel2->hasOneUse())
2768 NewCost += CostSel2;
2771 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2773 if (NewCost > OldCost)
2782 NewSel = Builder.
CreateSelectFMF(ShuffleCmp, ShuffleTrue, ShuffleFalse,
2783 SI0FOp->getFastMathFlags());
2785 NewSel = Builder.
CreateSelect(ShuffleCmp, ShuffleTrue, ShuffleFalse);
2790 replaceValue(
I, *NewSel);
2796bool VectorCombine::foldShuffleOfCastops(Instruction &
I) {
2798 ArrayRef<int> OldMask;
2807 if (!C0 || (IsBinaryShuffle && !C1))
2814 if (!IsBinaryShuffle && Opcode == Instruction::BitCast)
2817 if (IsBinaryShuffle) {
2818 if (C0->getSrcTy() != C1->getSrcTy())
2821 if (Opcode != C1->getOpcode()) {
2823 Opcode = Instruction::SExt;
2832 if (!ShuffleDstTy || !CastDstTy || !CastSrcTy)
2835 unsigned NumSrcElts = CastSrcTy->getNumElements();
2836 unsigned NumDstElts = CastDstTy->getNumElements();
2837 assert((NumDstElts == NumSrcElts || Opcode == Instruction::BitCast) &&
2838 "Only bitcasts expected to alter src/dst element counts");
2842 if (NumDstElts != NumSrcElts && (NumSrcElts % NumDstElts) != 0 &&
2843 (NumDstElts % NumSrcElts) != 0)
2846 SmallVector<int, 16> NewMask;
2847 if (NumSrcElts >= NumDstElts) {
2850 assert(NumSrcElts % NumDstElts == 0 &&
"Unexpected shuffle mask");
2851 unsigned ScaleFactor = NumSrcElts / NumDstElts;
2856 assert(NumDstElts % NumSrcElts == 0 &&
"Unexpected shuffle mask");
2857 unsigned ScaleFactor = NumDstElts / NumSrcElts;
2862 auto *NewShuffleDstTy =
2871 if (IsBinaryShuffle)
2886 if (IsBinaryShuffle) {
2896 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
2898 if (NewCost > OldCost)
2902 if (IsBinaryShuffle)
2912 NewInst->copyIRFlags(C0);
2913 if (IsBinaryShuffle)
2914 NewInst->andIRFlags(C1);
2918 replaceValue(
I, *Cast);
2928bool VectorCombine::foldShuffleOfShuffles(Instruction &
I) {
2929 ArrayRef<int> OuterMask;
2930 Value *OuterV0, *OuterV1;
2935 ArrayRef<int> InnerMask0, InnerMask1;
2936 Value *X0, *X1, *Y0, *Y1;
2941 if (!Match0 && !Match1)
2946 SmallVector<int, 16> PoisonMask1;
2951 InnerMask1 = PoisonMask1;
2955 X0 = Match0 ? X0 : OuterV0;
2956 Y0 = Match0 ? Y0 : OuterV0;
2957 X1 = Match1 ? X1 : OuterV1;
2958 Y1 = Match1 ? Y1 : OuterV1;
2962 if (!ShuffleDstTy || !ShuffleSrcTy || !ShuffleImmTy ||
2966 unsigned NumSrcElts = ShuffleSrcTy->getNumElements();
2967 unsigned NumImmElts = ShuffleImmTy->getNumElements();
2972 SmallVector<int, 16> NewMask(OuterMask);
2973 Value *NewX =
nullptr, *NewY =
nullptr;
2974 for (
int &M : NewMask) {
2975 Value *Src =
nullptr;
2976 if (0 <= M && M < (
int)NumImmElts) {
2980 Src =
M >= (int)NumSrcElts ? Y0 : X0;
2981 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2983 }
else if (M >= (
int)NumImmElts) {
2988 Src =
M >= (int)NumSrcElts ? Y1 : X1;
2989 M =
M >= (int)NumSrcElts ? (M - NumSrcElts) :
M;
2993 assert(0 <= M && M < (
int)NumSrcElts &&
"Unexpected shuffle mask index");
3002 if (!NewX || NewX == Src) {
3006 if (!NewY || NewY == Src) {
3022 replaceValue(
I, *NewX);
3039 bool IsUnary =
all_of(NewMask, [&](
int M) {
return M < (int)NumSrcElts; });
3045 nullptr, {NewX, NewY});
3047 NewCost += InnerCost0;
3049 NewCost += InnerCost1;
3052 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3054 if (NewCost > OldCost)
3058 replaceValue(
I, *Shuf);
3074bool VectorCombine::foldShufflesOfLengthChangingShuffles(Instruction &
I) {
3079 unsigned ChainLength = 0;
3080 SmallVector<int>
Mask;
3081 SmallVector<int> YMask;
3091 ArrayRef<int> OuterMask;
3092 Value *OuterV0, *OuterV1;
3093 if (ChainLength != 0 && !Trunk->
hasOneUse())
3096 m_Mask(OuterMask))))
3098 if (OuterV0->
getType() != TrunkType) {
3104 ArrayRef<int> InnerMask0, InnerMask1;
3105 Value *A0, *A1, *B0, *B1;
3110 bool Match0Leaf = Match0 && A0->
getType() !=
I.getType();
3111 bool Match1Leaf = Match1 && A1->
getType() !=
I.getType();
3112 if (Match0Leaf == Match1Leaf) {
3118 SmallVector<int> CommutedOuterMask;
3125 for (
int &M : CommutedOuterMask) {
3128 if (M < (
int)NumTrunkElts)
3133 OuterMask = CommutedOuterMask;
3152 int NumLeafElts = YType->getNumElements();
3153 SmallVector<int> LocalYMask(InnerMask1);
3154 for (
int &M : LocalYMask) {
3155 if (M >= NumLeafElts)
3165 Mask.assign(OuterMask);
3166 YMask.
assign(LocalYMask);
3167 OldCost = NewCost = LocalOldCost;
3174 SmallVector<int> NewYMask(YMask);
3176 for (
auto [CombinedM, LeafM] :
llvm::zip(NewYMask, LocalYMask)) {
3177 if (LeafM == -1 || CombinedM == LeafM)
3179 if (CombinedM == -1) {
3189 SmallVector<int> NewMask;
3190 NewMask.
reserve(NumTrunkElts);
3191 for (
int M : Mask) {
3192 if (M < 0 || M >=
static_cast<int>(NumTrunkElts))
3207 if (LocalNewCost >= NewCost && LocalOldCost < LocalNewCost - NewCost)
3211 if (ChainLength == 1) {
3212 dbgs() <<
"Found chain of shuffles fed by length-changing shuffles: "
3215 dbgs() <<
" next chain link: " << *Trunk <<
'\n'
3216 <<
" old cost: " << (OldCost + LocalOldCost)
3217 <<
" new cost: " << LocalNewCost <<
'\n';
3222 OldCost += LocalOldCost;
3223 NewCost = LocalNewCost;
3227 if (ChainLength <= 1)
3231 return M < 0 || M >=
static_cast<int>(NumTrunkElts);
3234 for (
int &M : Mask) {
3235 if (M >=
static_cast<int>(NumTrunkElts))
3236 M = YMask[
M - NumTrunkElts];
3240 replaceValue(
I, *Root);
3247 replaceValue(
I, *Root);
3253bool VectorCombine::foldShuffleOfIntrinsics(Instruction &
I) {
3255 ArrayRef<int> OldMask;
3265 if (IID != II1->getIntrinsicID())
3274 if (!ShuffleDstTy || !II0Ty)
3280 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3282 II0->getArgOperand(
I) != II1->getArgOperand(
I))
3288 II0Ty, OldMask,
CostKind, 0,
nullptr, {II0, II1}, &
I);
3292 SmallDenseSet<std::pair<Value *, Value *>> SeenOperandPairs;
3293 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3295 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3299 ShuffleDstTy->getNumElements());
3301 std::pair<Value *, Value *> OperandPair =
3302 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3303 if (!SeenOperandPairs.
insert(OperandPair).second) {
3309 CostKind, 0,
nullptr, {II0->getArgOperand(
I), II1->getArgOperand(
I)});
3312 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3315 if (!II0->hasOneUse())
3317 if (II1 != II0 && !II1->hasOneUse())
3321 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3324 if (NewCost > OldCost)
3328 SmallDenseMap<std::pair<Value *, Value *>,
Value *> ShuffleCache;
3329 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I)
3333 std::pair<Value *, Value *> OperandPair =
3334 std::make_pair(II0->getArgOperand(
I), II1->getArgOperand(
I));
3335 auto It = ShuffleCache.
find(OperandPair);
3336 if (It != ShuffleCache.
end()) {
3342 II1->getArgOperand(
I), OldMask);
3343 ShuffleCache[OperandPair] = Shuf;
3351 NewInst->copyIRFlags(II0);
3352 NewInst->andIRFlags(II1);
3355 replaceValue(
I, *NewIntrinsic);
3361bool VectorCombine::foldPermuteOfIntrinsic(Instruction &
I) {
3373 if (!ShuffleDstTy || !IntrinsicSrcTy)
3377 unsigned NumSrcElts = IntrinsicSrcTy->getNumElements();
3378 if (
any_of(Mask, [NumSrcElts](
int M) {
return M >= (int)NumSrcElts; }))
3391 IntrinsicSrcTy, Mask,
CostKind, 0,
nullptr, {V0}, &
I);
3395 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3397 NewArgsTy.
push_back(II0->getArgOperand(
I)->getType());
3401 ShuffleDstTy->getNumElements());
3404 ArgTy, VecTy, Mask,
CostKind, 0,
nullptr,
3405 {II0->getArgOperand(
I)});
3408 IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy);
3413 if (!II0->hasOneUse())
3416 LLVM_DEBUG(
dbgs() <<
"Found a permute of intrinsic: " <<
I <<
"\n OldCost: "
3417 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
3419 if (NewCost > OldCost)
3424 for (
unsigned I = 0,
E = II0->arg_size();
I !=
E; ++
I) {
3439 replaceValue(
I, *NewIntrinsic);
3449 int M = SV->getMaskValue(Lane);
3452 if (
static_cast<unsigned>(M) < NumElts) {
3453 U = &SV->getOperandUse(0);
3456 U = &SV->getOperandUse(1);
3467 auto [U, Lane] = IL;
3481 unsigned NumElts = Ty->getNumElements();
3482 if (Item.
size() == NumElts || NumElts == 1 || Item.
size() % NumElts != 0)
3488 std::iota(ConcatMask.
begin(), ConcatMask.
end(), 0);
3494 unsigned NumSlices = Item.
size() / NumElts;
3499 for (
unsigned Slice = 0; Slice < NumSlices; ++Slice) {
3500 Use *SliceV = Item[Slice * NumElts].first;
3501 if (!SliceV || SliceV->get()->
getType() != Ty)
3503 for (
unsigned Elt = 0; Elt < NumElts; ++Elt) {
3504 auto [V, Lane] = Item[Slice * NumElts + Elt];
3505 if (Lane !=
static_cast<int>(Elt) || SliceV->get() != V->get())
3518 auto [FrontU, FrontLane] = Item.
front();
3520 if (IdentityLeafs.
contains(FrontU)) {
3521 return FrontU->get();
3525 return Builder.CreateShuffleVector(FrontU->get(), Mask);
3527 if (ConcatLeafs.
contains(FrontU)) {
3531 for (
unsigned S = 0; S < Values.
size(); ++S)
3532 Values[S] = Item[S * NumElts].first->get();
3534 while (Values.
size() > 1) {
3537 std::iota(Mask.begin(), Mask.end(), 0);
3539 for (
unsigned S = 0; S < NewValues.
size(); ++S)
3541 Builder.CreateShuffleVector(Values[S * 2], Values[S * 2 + 1], Mask);
3549 unsigned NumOps =
I->getNumOperands() - (
II ? 1 : 0);
3551 for (
unsigned Idx = 0; Idx <
NumOps; Idx++) {
3554 Ops[Idx] =
II->getOperand(Idx);
3558 Ty, IdentityLeafs, SplatLeafs, ConcatLeafs,
3563 for (
const auto &Lane : Item)
3576 auto *
Value = Builder.CreateCmp(CI->getPredicate(),
Ops[0],
Ops[1]);
3586 auto *
Value = Builder.CreateCast(CI->getOpcode(),
Ops[0], DstTy);
3591 auto *
Value = Builder.CreateIntrinsic(DstTy,
II->getIntrinsicID(),
Ops);
3605bool VectorCombine::foldShuffleToIdentity(Instruction &
I) {
3607 if (!Ty ||
I.use_empty())
3611 for (
unsigned M = 0,
E = Ty->getNumElements(); M <
E; ++M)
3616 SmallPtrSet<Use *, 4> IdentityLeafs, SplatLeafs, ConcatLeafs;
3617 unsigned NumVisited = 0;
3619 while (!Worklist.
empty()) {
3624 auto [FrontU, FrontLane] = Item.
front();
3632 return X->getType() ==
Y->getType() &&
3637 if (FrontLane == 0 &&
3639 Ty->getNumElements() &&
3642 return !
E.value().first || (IsEquiv(
E.value().first->get(), FrontV) &&
3643 E.value().second == (int)
E.index());
3645 IdentityLeafs.
insert(FrontU);
3650 C &&
C->getSplatValue() &&
3658 SplatLeafs.
insert(FrontU);
3663 auto [FrontU, FrontLane] = Item.
front();
3664 auto [
U, Lane] = IL;
3665 return !
U || (
U->get() == FrontU->get() && Lane == FrontLane);
3667 SplatLeafs.
insert(FrontU);
3673 auto CheckLaneIsEquivalentToFirst = [Item](
InstLane IL) {
3677 Value *
V = IL.first->get();
3683 if (CI->getPredicate() !=
cast<CmpInst>(FrontV)->getPredicate())
3686 if (CI->getSrcTy()->getScalarType() !=
3691 SI->getOperand(0)->getType() !=
3698 II->getIntrinsicID() ==
3700 !
II->hasOperandBundles());
3707 BO && BO->isIntDivRem())
3712 }
else if (
isa<UnaryOperator, TruncInst, ZExtInst, SExtInst, FPToSIInst,
3713 FPToUIInst, SIToFPInst, UIToFPInst>(FrontU)) {
3720 if (DstTy && SrcTy &&
3721 SrcTy->getNumElements() == DstTy->getNumElements()) {
3732 !
II->hasOperandBundles()) {
3733 for (
unsigned Op = 0,
E =
II->getNumOperands() - 1;
Op <
E;
Op++) {
3752 ConcatLeafs.
insert(FrontU);
3759 if (NumVisited <= 1)
3762 LLVM_DEBUG(
dbgs() <<
"Found a superfluous identity shuffle: " <<
I <<
"\n");
3768 ConcatLeafs, Builder, &
TTI);
3769 replaceValue(
I, *V);
3776bool VectorCombine::foldShuffleFromReductions(Instruction &
I) {
3780 switch (
II->getIntrinsicID()) {
3781 case Intrinsic::vector_reduce_add:
3782 case Intrinsic::vector_reduce_mul:
3783 case Intrinsic::vector_reduce_and:
3784 case Intrinsic::vector_reduce_or:
3785 case Intrinsic::vector_reduce_xor:
3786 case Intrinsic::vector_reduce_smin:
3787 case Intrinsic::vector_reduce_smax:
3788 case Intrinsic::vector_reduce_umin:
3789 case Intrinsic::vector_reduce_umax:
3798 std::queue<Value *> Worklist;
3799 SmallPtrSet<Value *, 4> Visited;
3800 ShuffleVectorInst *Shuffle =
nullptr;
3804 while (!Worklist.empty()) {
3805 Value *CV = Worklist.front();
3817 if (CI->isBinaryOp()) {
3818 for (
auto *
Op : CI->operand_values())
3822 if (Shuffle && Shuffle != SV)
3839 for (
auto *V : Visited)
3840 for (
auto *U :
V->users())
3841 if (!Visited.contains(U) && U != &
I)
3844 FixedVectorType *VecType =
3848 FixedVectorType *ShuffleInputType =
3850 if (!ShuffleInputType)
3856 SmallVector<int> ConcatMask;
3858 sort(ConcatMask, [](
int X,
int Y) {
return (
unsigned)
X < (unsigned)
Y; });
3859 bool UsesSecondVec =
3860 any_of(ConcatMask, [&](
int M) {
return M >= (int)NumInputElts; });
3867 ShuffleInputType, ConcatMask,
CostKind);
3869 LLVM_DEBUG(
dbgs() <<
"Found a reduction feeding from a shuffle: " << *Shuffle
3871 LLVM_DEBUG(
dbgs() <<
" OldCost: " << OldCost <<
" vs NewCost: " << NewCost
3873 bool MadeChanges =
false;
3874 if (NewCost < OldCost) {
3878 LLVM_DEBUG(
dbgs() <<
"Created new shuffle: " << *NewShuffle <<
"\n");
3879 replaceValue(*Shuffle, *NewShuffle);
3885 MadeChanges |= foldSelectShuffle(*Shuffle,
true);
3931bool VectorCombine::foldShuffleChainsToReduce(Instruction &
I) {
3933 std::queue<Value *> InstWorklist;
3937 std::optional<unsigned int> CommonCallOp = std::nullopt;
3938 std::optional<Instruction::BinaryOps> CommonBinOp = std::nullopt;
3940 bool IsFirstCallOrBinInst =
true;
3941 bool ShouldBeCallOrBinInst =
true;
3947 SmallVector<Value *, 2> PrevVecV(2,
nullptr);
3957 int64_t
VecSize = FVT->getNumElements();
3963 unsigned int NumLevels =
Log2_64_Ceil(VecSize), VisitedCnt = 0;
3964 int64_t ShuffleMaskHalf = 1, ExpectedParityMask = 0;
3974 for (
int Cur = VecSize, Mask = NumLevels - 1; Cur > 1;
3975 Cur = (Cur + 1) / 2, --
Mask) {
3977 ExpectedParityMask |= (1ll <<
Mask);
3980 InstWorklist.push(VecOpEE);
3982 while (!InstWorklist.empty()) {
3983 Value *CI = InstWorklist.front();
3987 if (!ShouldBeCallOrBinInst)
3990 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
3995 if (
II != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
3997 IsFirstCallOrBinInst =
false;
4000 CommonCallOp =
II->getIntrinsicID();
4001 if (
II->getIntrinsicID() != *CommonCallOp)
4004 switch (
II->getIntrinsicID()) {
4005 case Intrinsic::umin:
4006 case Intrinsic::umax:
4007 case Intrinsic::smin:
4008 case Intrinsic::smax: {
4009 auto *Op0 =
II->getOperand(0);
4010 auto *Op1 =
II->getOperand(1);
4018 ShouldBeCallOrBinInst ^= 1;
4020 IntrinsicCostAttributes ICA(
4021 *CommonCallOp,
II->getType(),
4022 {PrevVecV[0]->getType(), PrevVecV[1]->getType()});
4029 InstWorklist.push(PrevVecV[1]);
4030 InstWorklist.push(PrevVecV[0]);
4034 if (!ShouldBeCallOrBinInst)
4037 if (!IsFirstCallOrBinInst &&
any_of(PrevVecV,
equal_to(
nullptr)))
4040 if (BinOp != (IsFirstCallOrBinInst ? VecOpEE : PrevVecV[0]))
4042 IsFirstCallOrBinInst =
false;
4050 switch (*CommonBinOp) {
4051 case BinaryOperator::Add:
4052 case BinaryOperator::Mul:
4053 case BinaryOperator::Or:
4054 case BinaryOperator::And:
4055 case BinaryOperator::Xor: {
4065 ShouldBeCallOrBinInst ^= 1;
4072 InstWorklist.push(PrevVecV[1]);
4073 InstWorklist.push(PrevVecV[0]);
4077 if (ShouldBeCallOrBinInst ||
any_of(PrevVecV,
equal_to(
nullptr)))
4080 if (SVInst != PrevVecV[1])
4083 ArrayRef<int> CurMask;
4089 for (
int Mask = 0, MaskSize = CurMask.
size(); Mask != MaskSize; ++Mask) {
4090 if (Mask < ShuffleMaskHalf &&
4091 CurMask[Mask] != ShuffleMaskHalf + Mask - (ExpectedParityMask & 1))
4093 if (Mask >= ShuffleMaskHalf && CurMask[Mask] != -1)
4098 ShuffleMaskHalf *= 2;
4099 ShuffleMaskHalf -= (ExpectedParityMask & 1);
4100 ExpectedParityMask >>= 1;
4103 SVInst->getType(), SVInst->getType(),
4107 if (!ExpectedParityMask && VisitedCnt == NumLevels)
4110 ShouldBeCallOrBinInst ^= 1;
4117 if (ShouldBeCallOrBinInst)
4120 assert(VecSize != -1 &&
"Expected Match for Vector Size");
4122 Value *FinalVecV = PrevVecV[0];
4134 IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
4137 if (NewCost >= OrigCost)
4140 auto *ReducedResult =
4142 replaceValue(
I, *ReducedResult);
4151bool VectorCombine::foldCastFromReductions(Instruction &
I) {
4156 bool TruncOnly =
false;
4159 case Intrinsic::vector_reduce_add:
4160 case Intrinsic::vector_reduce_mul:
4163 case Intrinsic::vector_reduce_and:
4164 case Intrinsic::vector_reduce_or:
4165 case Intrinsic::vector_reduce_xor:
4172 Value *ReductionSrc =
I.getOperand(0);
4184 Type *ResultTy =
I.getType();
4187 ReductionOpc, ReductionSrcTy, std::nullopt,
CostKind);
4197 if (OldCost <= NewCost || !NewCost.
isValid())
4201 II->getIntrinsicID(), {Src});
4203 replaceValue(
I, *NewCast);
4231bool VectorCombine::foldSignBitReductionCmp(Instruction &
I) {
4233 IntrinsicInst *ReduceOp;
4234 const APInt *CmpVal;
4241 case Intrinsic::vector_reduce_or:
4242 case Intrinsic::vector_reduce_umax:
4243 case Intrinsic::vector_reduce_and:
4244 case Intrinsic::vector_reduce_umin:
4245 case Intrinsic::vector_reduce_add:
4256 unsigned BitWidth = VecTy->getScalarSizeInBits();
4260 unsigned NumElts = VecTy->getNumElements();
4269 case Intrinsic::vector_reduce_or:
4270 case Intrinsic::vector_reduce_umax:
4271 TreeOpcode = Instruction::Or;
4273 case Intrinsic::vector_reduce_and:
4274 case Intrinsic::vector_reduce_umin:
4275 TreeOpcode = Instruction::And;
4277 case Intrinsic::vector_reduce_add:
4278 TreeOpcode = Instruction::Add;
4286 SmallVector<Value *, 8> Worklist;
4287 SmallVector<Value *, 8> Sources;
4289 std::optional<bool> IsAShr;
4290 constexpr unsigned MaxSources = 8;
4295 while (!Worklist.
empty() && Worklist.
size() <= MaxSources &&
4296 Sources.
size() <= MaxSources) {
4305 bool ThisIsAShr = Shr->getOpcode() == Instruction::AShr;
4307 IsAShr = ThisIsAShr;
4308 else if (*IsAShr != ThisIsAShr)
4334 if (Sources.
empty() || Sources.
size() > MaxSources ||
4335 Worklist.
size() > MaxSources || !IsAShr)
4338 unsigned NumSources = Sources.
size();
4342 if (OrigIID == Intrinsic::vector_reduce_add &&
4350 (OrigIID == Intrinsic::vector_reduce_add) ? NumSources * NumElts : 1;
4353 NegativeVal.negate();
4385 TestsNegative =
false;
4386 }
else if (*CmpVal == NegativeVal) {
4387 TestsNegative =
true;
4391 IsEq = Pred == ICmpInst::ICMP_EQ;
4392 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeHigh) {
4394 TestsNegative = (RangeHigh == NegativeVal);
4395 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeHigh - 1) {
4397 TestsNegative = (RangeHigh == NegativeVal);
4398 }
else if (Pred == ICmpInst::ICMP_SGT && *CmpVal == RangeLow) {
4400 TestsNegative = (RangeLow == NegativeVal);
4401 }
else if (Pred == ICmpInst::ICMP_SLT && *CmpVal == RangeLow + 1) {
4403 TestsNegative = (RangeLow == NegativeVal);
4446 enum CheckKind :
unsigned {
4453 auto RequiresOr = [](CheckKind
C) ->
bool {
return C & 0b100; };
4455 auto IsNegativeCheck = [](CheckKind
C) ->
bool {
return C & 0b010; };
4457 auto Invert = [](CheckKind
C) {
return CheckKind(
C ^ 0b011); };
4461 case Intrinsic::vector_reduce_or:
4462 case Intrinsic::vector_reduce_umax:
4463 Base = TestsNegative ? AnyNeg : AllNonNeg;
4465 case Intrinsic::vector_reduce_and:
4466 case Intrinsic::vector_reduce_umin:
4467 Base = TestsNegative ? AllNeg : AnyNonNeg;
4469 case Intrinsic::vector_reduce_add:
4470 Base = TestsNegative ? AllNeg : AllNonNeg;
4485 return ArithCost <= MinMaxCost ? std::make_pair(Arith, ArithCost)
4486 : std::make_pair(MinMax, MinMaxCost);
4490 auto [NewIID, NewCost] = RequiresOr(
Check)
4491 ? PickCheaper(Intrinsic::vector_reduce_or,
4492 Intrinsic::vector_reduce_umax)
4493 : PickCheaper(
Intrinsic::vector_reduce_and,
4497 if (NumSources > 1) {
4498 unsigned CombineOpc =
4499 RequiresOr(
Check) ? Instruction::Or : Instruction::And;
4504 LLVM_DEBUG(
dbgs() <<
"Found sign-bit reduction cmp: " <<
I <<
"\n OldCost: "
4505 << OldCost <<
" vs NewCost: " << NewCost <<
"\n");
4507 if (NewCost > OldCost)
4512 Type *ScalarTy = VecTy->getScalarType();
4515 if (NumSources == 1) {
4526 replaceValue(
I, *NewCmp);
4551bool VectorCombine::foldICmpEqZeroVectorReduce(Instruction &
I) {
4562 switch (
II->getIntrinsicID()) {
4563 case Intrinsic::vector_reduce_add:
4564 case Intrinsic::vector_reduce_or:
4565 case Intrinsic::vector_reduce_umin:
4566 case Intrinsic::vector_reduce_umax:
4567 case Intrinsic::vector_reduce_smin:
4568 case Intrinsic::vector_reduce_smax:
4574 Value *InnerOp =
II->getArgOperand(0);
4617 switch (
II->getIntrinsicID()) {
4618 case Intrinsic::vector_reduce_add: {
4623 unsigned NumElems = XTy->getNumElements();
4629 if (LeadingZerosX <= LostBits || LeadingZerosFX <= LostBits)
4637 case Intrinsic::vector_reduce_smin:
4638 case Intrinsic::vector_reduce_smax:
4648 LLVM_DEBUG(
dbgs() <<
"Found a reduction to 0 comparison with removable op: "
4664 case Intrinsic::vector_reduce_add:
4665 case Intrinsic::vector_reduce_or:
4671 case Intrinsic::vector_reduce_umin:
4672 case Intrinsic::vector_reduce_umax:
4673 case Intrinsic::vector_reduce_smin:
4674 case Intrinsic::vector_reduce_smax:
4686 NewReduceCost + (InnerOp->
hasOneUse() ? 0 : ExtCost);
4688 LLVM_DEBUG(
dbgs() <<
"Found a removable extension before reduction: "
4689 << *InnerOp <<
"\n OldCost: " << OldCost
4690 <<
" vs NewCost: " << NewCost <<
"\n");
4696 if (NewCost > OldCost)
4705 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::getNullValue(Ty));
4706 replaceValue(
I, *NewCmp);
4737bool VectorCombine::foldEquivalentReductionCmp(Instruction &
I) {
4740 const APInt *CmpVal;
4745 if (!
II || !
II->hasOneUse())
4748 const auto IsValidOrUmaxCmp = [&]() {
4757 bool IsPositive = CmpVal->
isAllOnes() && Pred == ICmpInst::ICMP_SGT;
4759 bool IsNegative = (CmpVal->
isZero() || CmpVal->
isOne() || *CmpVal == 2) &&
4760 Pred == ICmpInst::ICMP_SLT;
4761 return IsEquality || IsPositive || IsNegative;
4764 const auto IsValidAndUminCmp = [&]() {
4769 const auto LeadingOnes = CmpVal->
countl_one();
4776 bool IsNegative = CmpVal->
isZero() && Pred == ICmpInst::ICMP_SLT;
4785 ((*CmpVal)[0] || (*CmpVal)[1]) && Pred == ICmpInst::ICMP_SGT;
4786 return IsEquality || IsNegative || IsPositive;
4794 switch (OriginalIID) {
4795 case Intrinsic::vector_reduce_or:
4796 if (!IsValidOrUmaxCmp())
4798 AlternativeIID = Intrinsic::vector_reduce_umax;
4800 case Intrinsic::vector_reduce_umax:
4801 if (!IsValidOrUmaxCmp())
4803 AlternativeIID = Intrinsic::vector_reduce_or;
4805 case Intrinsic::vector_reduce_and:
4806 if (!IsValidAndUminCmp())
4808 AlternativeIID = Intrinsic::vector_reduce_umin;
4810 case Intrinsic::vector_reduce_umin:
4811 if (!IsValidAndUminCmp())
4813 AlternativeIID = Intrinsic::vector_reduce_and;
4826 if (ReductionOpc != Instruction::ICmp)
4837 <<
"\n OrigCost: " << OrigCost
4838 <<
" vs AltCost: " << AltCost <<
"\n");
4840 if (AltCost >= OrigCost)
4844 Type *ScalarTy = VecTy->getScalarType();
4847 Builder.
CreateICmp(Pred, NewReduce, ConstantInt::get(ScalarTy, *CmpVal));
4849 replaceValue(
I, *NewCmp);
4858 constexpr unsigned MaxVisited = 32;
4861 bool FoundReduction =
false;
4864 while (!WorkList.
empty()) {
4866 for (
User *U :
I->users()) {
4868 if (!UI || !Visited.
insert(UI).second)
4870 if (Visited.
size() > MaxVisited)
4876 switch (
II->getIntrinsicID()) {
4877 case Intrinsic::vector_reduce_add:
4878 case Intrinsic::vector_reduce_mul:
4879 case Intrinsic::vector_reduce_and:
4880 case Intrinsic::vector_reduce_or:
4881 case Intrinsic::vector_reduce_xor:
4882 case Intrinsic::vector_reduce_smin:
4883 case Intrinsic::vector_reduce_smax:
4884 case Intrinsic::vector_reduce_umin:
4885 case Intrinsic::vector_reduce_umax:
4886 FoundReduction =
true;
4899 return FoundReduction;
4912bool VectorCombine::foldSelectShuffle(Instruction &
I,
bool FromReduction) {
4917 if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
4925 SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
4927 if (!
I ||
I->getOperand(0)->getType() != VT)
4929 return any_of(
I->users(), [&](User *U) {
4930 return U != Op0 && U != Op1 &&
4931 !(isa<ShuffleVectorInst>(U) &&
4932 (InputShuffles.contains(cast<Instruction>(U)) ||
4933 isInstructionTriviallyDead(cast<Instruction>(U))));
4936 if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
4937 checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
4945 for (
auto *U :
I->users()) {
4947 if (!SV || SV->getType() != VT)
4949 if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
4950 (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
4957 if (!collectShuffles(Op0) || !collectShuffles(Op1))
4961 if (FromReduction && Shuffles.
size() > 1)
4966 if (!FromReduction) {
4967 for (ShuffleVectorInst *SV : Shuffles) {
4968 for (
auto *U : SV->users()) {
4971 Shuffles.push_back(SSV);
4983 int MaxV1Elt = 0, MaxV2Elt = 0;
4984 unsigned NumElts = VT->getNumElements();
4985 for (ShuffleVectorInst *SVN : Shuffles) {
4986 SmallVector<int>
Mask;
4987 SVN->getShuffleMask(Mask);
4991 Value *SVOp0 = SVN->getOperand(0);
4992 Value *SVOp1 = SVN->getOperand(1);
4997 for (
int &Elem : Mask) {
5003 if (SVOp0 == Op1 && SVOp1 == Op0) {
5007 if (SVOp0 != Op0 || SVOp1 != Op1)
5013 SmallVector<int> ReconstructMask;
5014 for (
unsigned I = 0;
I <
Mask.size();
I++) {
5017 }
else if (Mask[
I] <
static_cast<int>(NumElts)) {
5018 MaxV1Elt = std::max(MaxV1Elt, Mask[
I]);
5019 auto It =
find_if(V1, [&](
const std::pair<int, int> &
A) {
5020 return Mask[
I] ==
A.first;
5029 MaxV2Elt = std::max<int>(MaxV2Elt, Mask[
I] - NumElts);
5030 auto It =
find_if(V2, [&](
const std::pair<int, int> &
A) {
5031 return Mask[
I] -
static_cast<int>(NumElts) ==
A.first;
5045 sort(ReconstructMask);
5046 OrigReconstructMasks.
push_back(std::move(ReconstructMask));
5054 (MaxV1Elt ==
static_cast<int>(V1.
size()) - 1 &&
5055 MaxV2Elt ==
static_cast<int>(V2.
size()) - 1))
5067 if (InputShuffles.contains(SSV))
5069 return SV->getMaskValue(M);
5077 std::pair<int, int>
Y) {
5078 int MXA = GetBaseMaskValue(
A,
X.first);
5079 int MYA = GetBaseMaskValue(
A,
Y.first);
5082 stable_sort(V1, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5083 return SortBase(SVI0A,
A,
B);
5085 stable_sort(V2, [&](std::pair<int, int>
A, std::pair<int, int>
B) {
5086 return SortBase(SVI1A,
A,
B);
5091 for (
const auto &Mask : OrigReconstructMasks) {
5092 SmallVector<int> ReconstructMask;
5093 for (
int M : Mask) {
5095 auto It =
find_if(V, [M](
auto A) {
return A.second ==
M; });
5096 assert(It !=
V.end() &&
"Expected all entries in Mask");
5097 return std::distance(
V.begin(), It);
5101 else if (M <
static_cast<int>(NumElts)) {
5102 ReconstructMask.
push_back(FindIndex(V1, M));
5104 ReconstructMask.
push_back(NumElts + FindIndex(V2, M));
5107 ReconstructMasks.
push_back(std::move(ReconstructMask));
5112 SmallVector<int> V1A, V1B, V2A, V2B;
5113 for (
unsigned I = 0;
I < V1.
size();
I++) {
5114 V1A.
push_back(GetBaseMaskValue(SVI0A, V1[
I].first));
5115 V1B.
push_back(GetBaseMaskValue(SVI0B, V1[
I].first));
5117 for (
unsigned I = 0;
I < V2.
size();
I++) {
5118 V2A.
push_back(GetBaseMaskValue(SVI1A, V2[
I].first));
5119 V2B.
push_back(GetBaseMaskValue(SVI1B, V2[
I].first));
5121 while (V1A.
size() < NumElts) {
5125 while (V2A.
size() < NumElts) {
5137 VT, VT, SV->getShuffleMask(),
CostKind);
5144 unsigned ElementSize = VT->getElementType()->getPrimitiveSizeInBits();
5145 unsigned MaxVectorSize =
5147 unsigned MaxElementsInVector = MaxVectorSize / ElementSize;
5148 if (MaxElementsInVector == 0)
5157 std::set<SmallVector<int, 4>> UniqueShuffles;
5162 unsigned NumFullVectors =
Mask.size() / MaxElementsInVector;
5163 if (NumFullVectors < 2)
5164 return C + ShuffleCost;
5165 SmallVector<int, 4> SubShuffle(MaxElementsInVector);
5166 unsigned NumUniqueGroups = 0;
5167 unsigned NumGroups =
Mask.size() / MaxElementsInVector;
5170 for (
unsigned I = 0;
I < NumFullVectors; ++
I) {
5171 for (
unsigned J = 0; J < MaxElementsInVector; ++J)
5172 SubShuffle[J] = Mask[MaxElementsInVector *
I + J];
5173 if (UniqueShuffles.insert(SubShuffle).second)
5174 NumUniqueGroups += 1;
5176 return C + ShuffleCost * NumUniqueGroups / NumGroups;
5182 SmallVector<int, 16>
Mask;
5183 SV->getShuffleMask(Mask);
5184 return AddShuffleMaskAdjustedCost(
C, Mask);
5187 auto AllShufflesHaveSameOperands =
5188 [](SmallPtrSetImpl<Instruction *> &InputShuffles) {
5189 if (InputShuffles.size() < 2)
5191 ShuffleVectorInst *FirstSV =
5198 std::next(InputShuffles.begin()), InputShuffles.end(),
5199 [&](Instruction *
I) {
5200 ShuffleVectorInst *SV = dyn_cast<ShuffleVectorInst>(I);
5201 return SV && SV->getOperand(0) == In0 && SV->getOperand(1) == In1;
5210 CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
5212 if (AllShufflesHaveSameOperands(InputShuffles)) {
5213 UniqueShuffles.clear();
5214 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5217 CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
5223 FixedVectorType *Op0SmallVT =
5225 FixedVectorType *Op1SmallVT =
5230 UniqueShuffles.clear();
5231 CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
5233 std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
5235 std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
5238 LLVM_DEBUG(
dbgs() <<
"Found a binop select shuffle pattern: " <<
I <<
"\n");
5240 <<
" vs CostAfter: " << CostAfter <<
"\n");
5241 if (CostBefore < CostAfter ||
5252 if (InputShuffles.contains(SSV))
5254 return SV->getOperand(
Op);
5258 GetShuffleOperand(SVI0A, 1), V1A);
5261 GetShuffleOperand(SVI0B, 1), V1B);
5264 GetShuffleOperand(SVI1A, 1), V2A);
5267 GetShuffleOperand(SVI1B, 1), V2B);
5272 I->copyIRFlags(Op0,
true);
5277 I->copyIRFlags(Op1,
true);
5279 for (
int S = 0,
E = ReconstructMasks.size(); S !=
E; S++) {
5282 replaceValue(*Shuffles[S], *NSV,
false);
5285 Worklist.pushValue(NSV0A);
5286 Worklist.pushValue(NSV0B);
5287 Worklist.pushValue(NSV1A);
5288 Worklist.pushValue(NSV1B);
5298bool VectorCombine::shrinkType(Instruction &
I) {
5299 Value *ZExted, *OtherOperand;
5305 Value *ZExtOperand =
I.getOperand(
I.getOperand(0) == OtherOperand ? 1 : 0);
5309 unsigned BW = SmallTy->getElementType()->getPrimitiveSizeInBits();
5311 if (
I.getOpcode() == Instruction::LShr) {
5328 Instruction::ZExt, BigTy, SmallTy,
5329 TargetTransformInfo::CastContextHint::None,
CostKind);
5334 for (User *U : ZExtOperand->
users()) {
5341 ShrinkCost += ZExtCost;
5356 ShrinkCost += ZExtCost;
5363 Instruction::Trunc, SmallTy, BigTy,
5364 TargetTransformInfo::CastContextHint::None,
CostKind);
5369 if (ShrinkCost > CurrentCost)
5373 Value *Op0 = ZExted;
5376 if (
I.getOperand(0) == OtherOperand)
5383 replaceValue(
I, *NewZExtr);
5389bool VectorCombine::foldInsExtVectorToShuffle(Instruction &
I) {
5390 Value *DstVec, *SrcVec;
5391 uint64_t ExtIdx, InsIdx;
5401 if (!DstVecTy || !SrcVecTy ||
5407 if (InsIdx >= NumDstElts || ExtIdx >= NumSrcElts || NumDstElts == 1)
5414 bool NeedExpOrNarrow = NumSrcElts != NumDstElts;
5416 if (NeedDstSrcSwap) {
5418 Mask[InsIdx] = ExtIdx % NumDstElts;
5422 std::iota(
Mask.begin(),
Mask.end(), 0);
5423 Mask[InsIdx] = (ExtIdx % NumDstElts) + NumDstElts;
5436 SmallVector<int> ExtToVecMask;
5437 if (!NeedExpOrNarrow) {
5442 nullptr, {DstVec, SrcVec});
5448 ExtToVecMask[ExtIdx % NumDstElts] = ExtIdx;
5451 DstVecTy, SrcVecTy, ExtToVecMask,
CostKind);
5455 if (!Ext->hasOneUse())
5458 LLVM_DEBUG(
dbgs() <<
"Found a insert/extract shuffle-like pair: " <<
I
5459 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5462 if (OldCost < NewCost)
5465 if (NeedExpOrNarrow) {
5466 if (!NeedDstSrcSwap)
5479 replaceValue(
I, *Shuf);
5488bool VectorCombine::foldInterleaveIntrinsics(Instruction &
I) {
5489 const APInt *SplatVal0, *SplatVal1;
5499 auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
5500 unsigned Width = VTy->getElementType()->getIntegerBitWidth();
5509 LLVM_DEBUG(
dbgs() <<
"VC: The cost to cast from " << *ExtVTy <<
" to "
5510 << *
I.getType() <<
" is too high.\n");
5514 APInt NewSplatVal = SplatVal1->
zext(Width * 2);
5515 NewSplatVal <<= Width;
5516 NewSplatVal |= SplatVal0->
zext(Width * 2);
5518 ExtVTy->getElementCount(), ConstantInt::get(
F.getContext(), NewSplatVal));
5526bool VectorCombine::shrinkLoadForShuffles(Instruction &
I) {
5528 if (!OldLoad || !OldLoad->isSimple())
5535 unsigned const OldNumElements = OldLoadTy->getNumElements();
5541 using IndexRange = std::pair<int, int>;
5542 auto GetIndexRangeInShuffles = [&]() -> std::optional<IndexRange> {
5543 IndexRange OutputRange = IndexRange(OldNumElements, -1);
5544 for (llvm::Use &Use :
I.uses()) {
5546 User *Shuffle =
Use.getUser();
5551 return std::nullopt;
5558 for (
int Index : Mask) {
5559 if (Index >= 0 && Index <
static_cast<int>(OldNumElements)) {
5560 OutputRange.first = std::min(Index, OutputRange.first);
5561 OutputRange.second = std::max(Index, OutputRange.second);
5566 if (OutputRange.second < OutputRange.first)
5567 return std::nullopt;
5573 if (std::optional<IndexRange> Indices = GetIndexRangeInShuffles()) {
5574 unsigned const NewNumElements = Indices->second + 1u;
5578 if (NewNumElements < OldNumElements) {
5583 Type *ElemTy = OldLoadTy->getElementType();
5585 Value *PtrOp = OldLoad->getPointerOperand();
5588 Instruction::Load, OldLoad->getType(), OldLoad->getAlign(),
5589 OldLoad->getPointerAddressSpace(),
CostKind);
5592 OldLoad->getPointerAddressSpace(),
CostKind);
5594 using UseEntry = std::pair<ShuffleVectorInst *, std::vector<int>>;
5596 unsigned const MaxIndex = NewNumElements * 2u;
5598 for (llvm::Use &Use :
I.uses()) {
5605 ArrayRef<int> OldMask = Shuffle->getShuffleMask();
5611 for (
int Index : OldMask) {
5612 if (Index >=
static_cast<int>(MaxIndex))
5626 dbgs() <<
"Found a load used only by shufflevector instructions: "
5627 <<
I <<
"\n OldCost: " << OldCost
5628 <<
" vs NewCost: " << NewCost <<
"\n");
5630 if (OldCost < NewCost || !NewCost.
isValid())
5636 NewLoad->copyMetadata(
I);
5639 for (UseEntry &Use : NewUses) {
5640 ShuffleVectorInst *Shuffle =
Use.first;
5641 std::vector<int> &NewMask =
Use.second;
5648 replaceValue(*Shuffle, *NewShuffle,
false);
5661bool VectorCombine::shrinkPhiOfShuffles(Instruction &
I) {
5663 if (!Phi ||
Phi->getNumIncomingValues() != 2u)
5667 ArrayRef<int> Mask0;
5668 ArrayRef<int> Mask1;
5681 auto const InputNumElements = InputVT->getNumElements();
5683 if (InputNumElements >= ResultVT->getNumElements())
5688 SmallVector<int, 16> NewMask;
5691 for (
auto [
M0,
M1] :
zip(Mask0, Mask1)) {
5692 if (
M0 >= 0 &&
M1 >= 0)
5694 else if (
M0 == -1 &&
M1 == -1)
5707 int MaskOffset = NewMask[0
u];
5708 unsigned Index = (InputNumElements + MaskOffset) % InputNumElements;
5711 for (
unsigned I = 0u;
I < InputNumElements; ++
I) {
5725 <<
"\n OldCost: " << OldCost <<
" vs NewCost: " << NewCost
5728 if (NewCost > OldCost)
5740 auto *NewPhi = Builder.
CreatePHI(NewShuf0->getType(), 2u);
5742 NewPhi->addIncoming(
Op,
Phi->getIncomingBlock(1u));
5748 replaceValue(*Phi, *NewShuf1);
5754bool VectorCombine::run() {
5768 auto Opcode =
I.getOpcode();
5776 if (IsFixedVectorType) {
5778 case Instruction::InsertElement:
5779 if (vectorizeLoadInsert(
I))
5782 case Instruction::ShuffleVector:
5783 if (widenSubvectorLoad(
I))
5794 if (scalarizeOpOrCmp(
I))
5796 if (scalarizeLoad(
I))
5798 if (scalarizeExtExtract(
I))
5800 if (scalarizeVPIntrinsic(
I))
5802 if (foldInterleaveIntrinsics(
I))
5806 if (Opcode == Instruction::Store)
5807 if (foldSingleElementStore(
I))
5811 if (TryEarlyFoldsOnly)
5818 if (IsFixedVectorType) {
5820 case Instruction::InsertElement:
5821 if (foldInsExtFNeg(
I))
5823 if (foldInsExtBinop(
I))
5825 if (foldInsExtVectorToShuffle(
I))
5828 case Instruction::ShuffleVector:
5829 if (foldPermuteOfBinops(
I))
5831 if (foldShuffleOfBinops(
I))
5833 if (foldShuffleOfSelects(
I))
5835 if (foldShuffleOfCastops(
I))
5837 if (foldShuffleOfShuffles(
I))
5839 if (foldPermuteOfIntrinsic(
I))
5841 if (foldShufflesOfLengthChangingShuffles(
I))
5843 if (foldShuffleOfIntrinsics(
I))
5845 if (foldSelectShuffle(
I))
5847 if (foldShuffleToIdentity(
I))
5850 case Instruction::Load:
5851 if (shrinkLoadForShuffles(
I))
5854 case Instruction::BitCast:
5855 if (foldBitcastShuffle(
I))
5857 if (foldSelectsFromBitcast(
I))
5860 case Instruction::And:
5861 case Instruction::Or:
5862 case Instruction::Xor:
5863 if (foldBitOpOfCastops(
I))
5865 if (foldBitOpOfCastConstant(
I))
5868 case Instruction::PHI:
5869 if (shrinkPhiOfShuffles(
I))
5879 case Instruction::Call:
5880 if (foldShuffleFromReductions(
I))
5882 if (foldCastFromReductions(
I))
5885 case Instruction::ExtractElement:
5886 if (foldShuffleChainsToReduce(
I))
5889 case Instruction::ICmp:
5890 if (foldSignBitReductionCmp(
I))
5892 if (foldICmpEqZeroVectorReduce(
I))
5894 if (foldEquivalentReductionCmp(
I))
5897 case Instruction::FCmp:
5898 if (foldExtractExtract(
I))
5901 case Instruction::Or:
5902 if (foldConcatOfBoolMasks(
I))
5907 if (foldExtractExtract(
I))
5909 if (foldExtractedCmps(
I))
5911 if (foldBinopOfReductions(
I))
5920 bool MadeChange =
false;
5921 for (BasicBlock &BB :
F) {
5933 if (!
I->isDebugOrPseudoInst())
5934 MadeChange |= FoldInst(*
I);
5941 while (!Worklist.isEmpty()) {
5951 MadeChange |= FoldInst(*
I);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
static cl::opt< IntrinsicCostStrategy > IntrinsicCost("intrinsic-cost-strategy", cl::desc("Costing strategy for intrinsic instructions"), cl::init(IntrinsicCostStrategy::InstructionCost), cl::values(clEnumValN(IntrinsicCostStrategy::InstructionCost, "instruction-cost", "Use TargetTransformInfo::getInstructionCost"), clEnumValN(IntrinsicCostStrategy::IntrinsicCost, "intrinsic-cost", "Use TargetTransformInfo::getIntrinsicInstrCost"), clEnumValN(IntrinsicCostStrategy::TypeBasedIntrinsicCost, "type-based-intrinsic-cost", "Calculate the intrinsic cost based only on argument types")))
This file defines the DenseMap class.
This is the interface for a simple mod/ref and alias analysis over globals.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
static Value * generateNewInstTree(ArrayRef< InstLane > Item, FixedVectorType *Ty, const SmallPtrSet< Use *, 4 > &IdentityLeafs, const SmallPtrSet< Use *, 4 > &SplatLeafs, const SmallPtrSet< Use *, 4 > &ConcatLeafs, IRBuilderBase &Builder, const TargetTransformInfo *TTI)
static bool isFreeConcat(ArrayRef< InstLane > Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI)
Detect concat of multiple values into a vector.
static void analyzeCostOfVecReduction(const IntrinsicInst &II, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI, InstructionCost &CostBeforeReduction, InstructionCost &CostAfterReduction)
static SmallVector< InstLane > generateInstLaneVectorFromOperand(ArrayRef< InstLane > Item, int Op)
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilderBase &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
static bool feedsIntoVectorReduction(ShuffleVectorInst *SVI)
Returns true if this ShuffleVectorInst eventually feeds into a vector reduction intrinsic (e....
static ScalarizationResult canScalarizeAccess(VectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
static InstLane lookThroughShuffles(Use *U, int Lane)
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
static const unsigned InvalidIndex
std::pair< Use *, int > InstLane
static Value * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilderBase &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
static constexpr int Concat[]
A manager for alias analyses.
Class for arbitrary precision integers.
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
unsigned countl_one() const
Count the number of leading one bits.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
bool isOne() const
Determine if this is a value of 1.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
const T & front() const
front - Get the first element.
size_t size() const
size - Get the array size.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
InstListType::iterator iterator
Instruction iterators...
BinaryOps getOpcode() const
Represents analyses that only rely on functions' control flow.
Value * getArgOperand(unsigned i) const
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
bool isFPPredicate() const
static LLVM_ABI std::optional< CmpPredicate > getMatching(CmpPredicate A, CmpPredicate B)
Compares two CmpPredicates taking samesign into account and returns the canonicalized CmpPredicate if...
static LLVM_ABI Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
This is the shared class of boolean and integer constants.
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
LLVM_ABI ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Value * CreateFreeze(Value *V, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Value * CreateIsNotNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg > -1.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Value * CreateIsNeg(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg < 0.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
LLVM_ABI Value * CreateNAryOp(unsigned Opc, ArrayRef< Value * > Ops, const Twine &Name="", MDNode *FPMathTag=nullptr)
Create either a UnaryOperator or BinaryOperator depending on Opc.
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFNegFMF(Value *V, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
void push(Instruction *I)
Push the instruction onto the worklist stack.
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Type * getPointerOperandType() const
Align getAlign() const
Return the alignment of the access that is being performed.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
const SDValue & getOperand(unsigned Num) const
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
void setAlignment(Align Align)
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
static LLVM_ABI bool isVPBinOp(Intrinsic::ID ID)
std::optional< unsigned > getFunctionalIntrinsicID() const
std::optional< unsigned > getFunctionalOpcode() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
unsigned getValueID() const
Return an ID for the concrete type of this object.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
std::pair< iterator, bool > insert(const ValueT &V)
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI AttributeSet getFnAttributes(LLVMContext &C, ID id)
Return the function attributes for an intrinsic.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
class_match< IntrinsicInst > m_AnyIntrinsic()
Matches any intrinsic call and ignore it.
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
DisjointOr_match< LHS, RHS > m_DisjointOr(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_non_zero_int > m_NonZeroInt()
Match a non-zero integer or a vector with all non-zero elements.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
match_combine_or< CastInst_match< OpTy, SExtInst >, NNegZExt_match< OpTy > > m_SExtLike(const OpTy &Op)
Match either "sext" or "zext nneg".
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
@ Valid
The data is already valid.
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
LLVM_ABI Value * simplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q)
Given operand for a UnaryOperator, fold the result or return null.
scope_exit(Callable) -> scope_exit< Callable >
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI unsigned getArithmeticReductionInstruction(Intrinsic::ID RdxID)
Returns the arithmetic instruction opcode used when expanding a reduction.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
unsigned M1(unsigned Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool programUndefinedIfPoison(const Instruction *Inst)
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI void propagateIRFlags(Value *I, ArrayRef< Value * > VL, Value *OpValue=nullptr, bool IncludeWrapFlags=true)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr int PoisonMaskElem
LLVM_ABI bool isSafeToSpeculativelyExecuteWithOpcode(unsigned Opcode, const Instruction *Inst, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
This returns the same result as isSafeToSpeculativelyExecute if Opcode is the actual opcode of Inst.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI Intrinsic::ID getReductionForBinop(Instruction::BinaryOps Opc)
Returns the reduction intrinsic id corresponding to the binary operation.
@ And
Bitwise or logical AND of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
constexpr unsigned BitWidth
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Value * simplifyCmpInst(CmpPredicate Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a CmpInst, fold the result or return null.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicID(Intrinsic::ID IID)
Returns the llvm.vector.reduce min/max intrinsic that corresponds to the intrinsic op.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
SimplifyQuery getWithInstruction(const Instruction *I) const