18#include "llvm/IR/IntrinsicsRISCV.h"
26#define DEBUG_TYPE "riscvtti"
29 "riscv-v-register-bit-width-lmul",
31 "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
32 "by autovectorized code. Fractional LMULs are not supported."),
38 "Overrides result used for getMaximumVF query which is used "
39 "exclusively by SLP vectorizer."),
44 cl::desc(
"Set the lower bound of a trip count to decide on "
45 "vectorization while tail-folding."),
57 size_t NumInstr = OpCodes.size();
62 return LMULCost * NumInstr;
64 for (
auto Op : OpCodes) {
66 case RISCV::VRGATHER_VI:
69 case RISCV::VRGATHER_VV:
72 case RISCV::VSLIDEUP_VI:
73 case RISCV::VSLIDEDOWN_VI:
76 case RISCV::VSLIDEUP_VX:
77 case RISCV::VSLIDEDOWN_VX:
80 case RISCV::VREDMAX_VS:
81 case RISCV::VREDMIN_VS:
82 case RISCV::VREDMAXU_VS:
83 case RISCV::VREDMINU_VS:
84 case RISCV::VREDSUM_VS:
85 case RISCV::VREDAND_VS:
86 case RISCV::VREDOR_VS:
87 case RISCV::VREDXOR_VS:
88 case RISCV::VFREDMAX_VS:
89 case RISCV::VFREDMIN_VS:
90 case RISCV::VFREDUSUM_VS: {
97 case RISCV::VFREDOSUM_VS: {
106 case RISCV::VFMV_F_S:
107 case RISCV::VFMV_S_F:
109 case RISCV::VMXOR_MM:
110 case RISCV::VMAND_MM:
111 case RISCV::VMANDN_MM:
112 case RISCV::VMNAND_MM:
114 case RISCV::VFIRST_M:
133 assert(Ty->isIntegerTy() &&
134 "getIntImmCost can only estimate cost of materialising integers");
157 if (!BO || !BO->hasOneUse())
160 if (BO->getOpcode() != Instruction::Shl)
171 if (ShAmt == Trailing)
188 if (!Cmp || !Cmp->isEquality())
204 if ((CmpC & Mask) != CmpC)
211 return NewCmpC >= -2048 && NewCmpC <= 2048;
218 assert(Ty->isIntegerTy() &&
219 "getIntImmCost can only estimate cost of materialising integers");
227 bool Takes12BitImm =
false;
228 unsigned ImmArgIdx = ~0U;
231 case Instruction::GetElementPtr:
236 case Instruction::Store: {
241 if (Idx == 1 || !Inst)
246 if (!getTLI()->allowsMemoryAccessForAlignment(
254 case Instruction::Load:
257 case Instruction::And:
259 if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
262 if (Imm == UINT64_C(0xffffffff) &&
263 ((ST->hasStdExtZba() && ST->isRV64()) || ST->isRV32()))
266 if (ST->hasStdExtZbs() && (~Imm).isPowerOf2())
268 if (Inst && Idx == 1 && Imm.getBitWidth() <= ST->getXLen() &&
271 if (Inst && Idx == 1 && Imm.getBitWidth() == 64 &&
274 Takes12BitImm =
true;
276 case Instruction::Add:
277 Takes12BitImm =
true;
279 case Instruction::Or:
280 case Instruction::Xor:
282 if (ST->hasStdExtZbs() && Imm.isPowerOf2())
284 Takes12BitImm =
true;
286 case Instruction::Mul:
288 if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
291 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2())
294 Takes12BitImm =
true;
296 case Instruction::Sub:
297 case Instruction::Shl:
298 case Instruction::LShr:
299 case Instruction::AShr:
300 Takes12BitImm =
true;
311 if (Imm.getSignificantBits() <= 64 &&
334 return ST->hasVInstructions();
344 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
348 if (Opcode == Instruction::FAdd)
353 if (!ST->hasStdExtZvdot4a8i() || ST->getELen() < 64 ||
354 Opcode != Instruction::Add || !BinOp || *BinOp != Instruction::Mul ||
355 InputTypeA != InputTypeB || !InputTypeA->
isIntegerTy(8) ||
363 getRISCVInstructionCost(RISCV::VDOTA4_VV, LT.second,
CostKind);
370 switch (
II->getIntrinsicID()) {
374 case Intrinsic::vector_reduce_mul:
375 case Intrinsic::vector_reduce_fmul:
381 if (ST->hasVInstructions())
387 if (ST->hasVInstructions())
388 if (
unsigned MinVLen = ST->getRealMinVLen();
403 ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
406 (ST->hasVInstructions() &&
429 return (ST->hasAUIPCADDIFusion() && ST->hasLUIADDIFusion()) ? 1 : 2;
435RISCVTTIImpl::getConstantPoolLoadCost(
Type *Ty,
440 return getStaticDataAddrGenerationCost(
CostKind) +
446 unsigned Size = Mask.size();
449 for (
unsigned I = 0;
I !=
Size; ++
I) {
450 if (
static_cast<unsigned>(Mask[
I]) ==
I)
456 for (
unsigned J =
I + 1; J !=
Size; ++J)
458 if (
static_cast<unsigned>(Mask[J]) != J %
I)
486 "Expected fixed vector type and non-empty mask");
489 unsigned NumOfDests =
divideCeil(Mask.size(), LegalNumElts);
493 if (NumOfDests <= 1 ||
495 Tp->getElementType()->getPrimitiveSizeInBits() ||
496 LegalNumElts >= Tp->getElementCount().getFixedValue())
499 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
502 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
506 unsigned NormalizedVF = LegalNumElts * std::max(NumOfSrcs, NumOfDests);
507 unsigned NumOfSrcRegs = NormalizedVF / LegalNumElts;
508 unsigned NumOfDestRegs = NormalizedVF / LegalNumElts;
510 assert(NormalizedVF >= Mask.size() &&
511 "Normalized mask expected to be not shorter than original mask.");
516 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
517 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
520 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
523 Cost +=
TTI.getShuffleCost(
526 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
528 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
529 Cost +=
TTI.getShuffleCost(
532 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
555 if (!VLen || Mask.empty())
559 LegalVT =
TTI.getTypeLegalizationCost(
565 if (NumOfDests <= 1 ||
567 Tp->getElementType()->getPrimitiveSizeInBits() ||
571 unsigned VecTySize =
TTI.getDataLayout().getTypeStoreSize(Tp);
574 unsigned NumOfSrcs =
divideCeil(VecTySize, LegalVTSize);
580 unsigned NormalizedVF =
585 assert(NormalizedVF >= Mask.size() &&
586 "Normalized mask expected to be not shorter than original mask.");
592 NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
593 [&](
ArrayRef<int> RegMask,
unsigned SrcReg,
unsigned DestReg) {
596 if (!ReusedSingleSrcShuffles.
insert(std::make_pair(RegMask, SrcReg))
601 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
603 [&](
ArrayRef<int> RegMask,
unsigned Idx1,
unsigned Idx2,
bool NewReg) {
605 SingleOpTy, RegMask,
CostKind, 0,
nullptr);
612 if ((NumOfDestRegs > 2 && NumShuffles <=
static_cast<int>(NumOfDestRegs)) ||
613 (NumOfDestRegs <= 2 && NumShuffles < 4))
628 if (!
LT.second.isFixedLengthVector())
636 auto GetSlideOpcode = [&](
int SlideAmt) {
638 bool IsVI =
isUInt<5>(std::abs(SlideAmt));
640 return IsVI ? RISCV::VSLIDEDOWN_VI : RISCV::VSLIDEDOWN_VX;
641 return IsVI ? RISCV::VSLIDEUP_VI : RISCV::VSLIDEUP_VX;
644 std::array<std::pair<int, int>, 2> SrcInfo;
648 if (SrcInfo[1].second == 0)
652 if (SrcInfo[0].second != 0) {
653 unsigned Opcode = GetSlideOpcode(SrcInfo[0].second);
654 FirstSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
657 if (SrcInfo[1].first == -1)
658 return FirstSlideCost;
661 if (SrcInfo[1].second != 0) {
662 unsigned Opcode = GetSlideOpcode(SrcInfo[1].second);
663 SecondSlideCost = getRISCVInstructionCost(Opcode,
LT.second,
CostKind);
666 getRISCVInstructionCost(RISCV::VMERGE_VVM,
LT.second,
CostKind);
673 return FirstSlideCost + SecondSlideCost + MaskCost;
684 "Expected the Mask to match the return size if given");
686 "Expected the same scalar types");
695 FVTp && ST->hasVInstructions() && LT.second.isFixedLengthVector()) {
697 *
this, LT.second, ST->getRealVLen(),
699 if (VRegSplittingCost.
isValid())
700 return VRegSplittingCost;
705 if (Mask.size() >= 2) {
706 MVT EltTp = LT.second.getVectorElementType();
717 return 2 * LT.first * TLI->getLMULCost(LT.second);
719 if (Mask[0] == 0 || Mask[0] == 1) {
723 if (
equal(DeinterleaveMask, Mask))
724 return LT.first * getRISCVInstructionCost(RISCV::VNSRL_WI,
729 if (LT.second.getScalarSizeInBits() != 1 &&
732 unsigned NumSlides =
Log2_32(Mask.size() / SubVectorSize);
734 for (
unsigned I = 0;
I != NumSlides; ++
I) {
735 unsigned InsertIndex = SubVectorSize * (1 <<
I);
740 std::pair<InstructionCost, MVT> DestLT =
745 Cost += DestLT.first * TLI->getLMULCost(DestLT.second);
759 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
760 LT.second.getVectorNumElements() <= 256)) {
765 getRISCVInstructionCost(RISCV::VRGATHER_VV, LT.second,
CostKind);
779 if (LT.first == 1 && (LT.second.getScalarSizeInBits() != 8 ||
780 LT.second.getVectorNumElements() <= 256)) {
781 auto &
C = SrcTy->getContext();
782 auto EC = SrcTy->getElementCount();
787 return 2 * IndexCost +
788 getRISCVInstructionCost({RISCV::VRGATHER_VV, RISCV::VRGATHER_VV},
807 if (!Mask.empty() && LT.first.isValid() && LT.first != 1 &&
835 SubLT.second.isValid() && SubLT.second.isFixedLengthVector()) {
836 if (std::optional<unsigned> VLen = ST->getRealVLen();
837 VLen && SubLT.second.getScalarSizeInBits() * Index % *VLen == 0 &&
838 SubLT.second.getSizeInBits() <= *VLen)
846 getRISCVInstructionCost(RISCV::VSLIDEDOWN_VI, LT.second,
CostKind);
853 getRISCVInstructionCost(RISCV::VSLIDEUP_VI, LT.second,
CostKind);
865 (1 + getRISCVInstructionCost({RISCV::VMV_S_X, RISCV::VMERGE_VVM},
870 Instruction::InsertElement);
871 if (LT.second.getScalarSizeInBits() == 1) {
879 (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
892 (1 + getRISCVInstructionCost({RISCV::VMV_V_I, RISCV::VMERGE_VIM,
893 RISCV::VMV_X_S, RISCV::VMV_V_X,
902 getRISCVInstructionCost(RISCV::VMV_V_X, LT.second,
CostKind);
908 getRISCVInstructionCost(RISCV::VRGATHER_VI, LT.second,
CostKind);
914 unsigned Opcodes[2] = {RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX};
915 if (Index >= 0 && Index < 32)
916 Opcodes[0] = RISCV::VSLIDEDOWN_VI;
917 else if (Index < 0 && Index > -32)
918 Opcodes[1] = RISCV::VSLIDEUP_VI;
919 return LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
923 if (!LT.second.isVector())
929 if (SrcTy->getElementType()->isIntegerTy(1)) {
941 MVT ContainerVT = LT.second;
942 if (LT.second.isFixedLengthVector())
943 ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
945 if (ContainerVT.
bitsLE(M1VT)) {
955 if (LT.second.isFixedLengthVector())
957 LenCost =
isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
958 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX, RISCV::VRGATHER_VV};
959 if (LT.second.isFixedLengthVector() &&
960 isInt<5>(LT.second.getVectorNumElements() - 1))
961 Opcodes[1] = RISCV::VRSUB_VI;
963 getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
964 return LT.first * (LenCost + GatherCost);
971 unsigned M1Opcodes[] = {RISCV::VID_V, RISCV::VRSUB_VX};
973 getRISCVInstructionCost(M1Opcodes, M1VT,
CostKind) + 3;
977 getRISCVInstructionCost({RISCV::VRGATHER_VV}, M1VT,
CostKind) * Ratio;
979 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX}, LT.second,
CostKind);
980 return FixedCost + LT.first * (GatherCost + SlideCost);
1014 Ty, DemandedElts, Insert, Extract,
CostKind);
1016 if (Insert && !Extract && LT.first.isValid() && LT.second.isVector()) {
1017 if (Ty->getScalarSizeInBits() == 1) {
1027 assert(LT.second.isFixedLengthVector());
1028 MVT ContainerVT = TLI->getContainerForFixedLengthVector(LT.second);
1032 getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second,
CostKind);
1045 switch (MICA.
getID()) {
1046 case Intrinsic::vp_load_ff: {
1047 EVT DataTypeVT = TLI->getValueType(
DL, DataTy);
1048 if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
1055 case Intrinsic::experimental_vp_strided_load:
1056 case Intrinsic::experimental_vp_strided_store:
1058 case Intrinsic::masked_compressstore:
1059 case Intrinsic::masked_expandload:
1061 case Intrinsic::vp_scatter:
1062 case Intrinsic::vp_gather:
1063 case Intrinsic::masked_scatter:
1064 case Intrinsic::masked_gather:
1066 case Intrinsic::vp_load:
1067 case Intrinsic::vp_store:
1068 case Intrinsic::masked_load:
1069 case Intrinsic::masked_store:
1078 unsigned Opcode = MICA.
getID() == Intrinsic::masked_load ? Instruction::Load
1079 : Instruction::Store;
1094 bool UseMaskForCond,
bool UseMaskForGaps)
const {
1100 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
1104 if (LT.second.isVector()) {
1107 VTy->getElementCount().divideCoefficientBy(Factor));
1108 if (VTy->getElementCount().isKnownMultipleOf(Factor) &&
1109 TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
1114 if (ST->hasOptimizedSegmentLoadStore(Factor)) {
1117 MVT SubVecVT = getTLI()->getValueType(
DL, SubVecTy).getSimpleVT();
1118 Cost += Factor * TLI->getLMULCost(SubVecVT);
1119 return LT.first *
Cost;
1126 CostKind, {TTI::OK_AnyValue, TTI::OP_None});
1127 unsigned NumLoads = getEstimatedVLFor(VTy);
1128 return NumLoads * MemOpCost;
1141 unsigned VF = FVTy->getNumElements() / Factor;
1148 if (Opcode == Instruction::Load) {
1150 for (
unsigned Index : Indices) {
1154 Mask.resize(VF * Factor, -1);
1158 Cost += ShuffleCost;
1176 UseMaskForCond, UseMaskForGaps);
1178 assert(Opcode == Instruction::Store &&
"Opcode must be a store");
1185 return MemCost + ShuffleCost;
1192 bool IsLoad = MICA.
getID() == Intrinsic::masked_gather ||
1193 MICA.
getID() == Intrinsic::vp_gather;
1194 unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;
1200 if ((Opcode == Instruction::Load &&
1202 (Opcode == Instruction::Store &&
1210 unsigned NumLoads = getEstimatedVLFor(&VTy);
1217 unsigned Opcode = MICA.
getID() == Intrinsic::masked_expandload
1219 : Instruction::Store;
1223 bool IsLegal = (Opcode == Instruction::Store &&
1225 (Opcode == Instruction::Load &&
1249 if (Opcode == Instruction::Store)
1250 Opcodes.
append({RISCV::VCOMPRESS_VM});
1252 Opcodes.
append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
1254 LT.first * getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
1261 unsigned Opcode = MICA.
getID() == Intrinsic::experimental_vp_strided_load
1263 : Instruction::Store;
1282 {TTI::OK_AnyValue, TTI::OP_None},
I);
1283 unsigned NumLoads = getEstimatedVLFor(&VTy);
1284 return NumLoads * MemOpCost;
1294 for (
auto *Ty : Tys) {
1295 if (!Ty->isVectorTy())
1309 {Intrinsic::floor, MVT::f32, 9},
1310 {Intrinsic::floor, MVT::f64, 9},
1311 {Intrinsic::ceil, MVT::f32, 9},
1312 {Intrinsic::ceil, MVT::f64, 9},
1313 {Intrinsic::trunc, MVT::f32, 7},
1314 {Intrinsic::trunc, MVT::f64, 7},
1315 {Intrinsic::round, MVT::f32, 9},
1316 {Intrinsic::round, MVT::f64, 9},
1317 {Intrinsic::roundeven, MVT::f32, 9},
1318 {Intrinsic::roundeven, MVT::f64, 9},
1319 {Intrinsic::rint, MVT::f32, 7},
1320 {Intrinsic::rint, MVT::f64, 7},
1321 {Intrinsic::nearbyint, MVT::f32, 9},
1322 {Intrinsic::nearbyint, MVT::f64, 9},
1323 {Intrinsic::bswap, MVT::i16, 3},
1324 {Intrinsic::bswap, MVT::i32, 12},
1325 {Intrinsic::bswap, MVT::i64, 31},
1326 {Intrinsic::vp_bswap, MVT::i16, 3},
1327 {Intrinsic::vp_bswap, MVT::i32, 12},
1328 {Intrinsic::vp_bswap, MVT::i64, 31},
1329 {Intrinsic::vp_fshl, MVT::i8, 7},
1330 {Intrinsic::vp_fshl, MVT::i16, 7},
1331 {Intrinsic::vp_fshl, MVT::i32, 7},
1332 {Intrinsic::vp_fshl, MVT::i64, 7},
1333 {Intrinsic::vp_fshr, MVT::i8, 7},
1334 {Intrinsic::vp_fshr, MVT::i16, 7},
1335 {Intrinsic::vp_fshr, MVT::i32, 7},
1336 {Intrinsic::vp_fshr, MVT::i64, 7},
1337 {Intrinsic::bitreverse, MVT::i8, 17},
1338 {Intrinsic::bitreverse, MVT::i16, 24},
1339 {Intrinsic::bitreverse, MVT::i32, 33},
1340 {Intrinsic::bitreverse, MVT::i64, 52},
1341 {Intrinsic::vp_bitreverse, MVT::i8, 17},
1342 {Intrinsic::vp_bitreverse, MVT::i16, 24},
1343 {Intrinsic::vp_bitreverse, MVT::i32, 33},
1344 {Intrinsic::vp_bitreverse, MVT::i64, 52},
1345 {Intrinsic::ctpop, MVT::i8, 12},
1346 {Intrinsic::ctpop, MVT::i16, 19},
1347 {Intrinsic::ctpop, MVT::i32, 20},
1348 {Intrinsic::ctpop, MVT::i64, 21},
1349 {Intrinsic::ctlz, MVT::i8, 19},
1350 {Intrinsic::ctlz, MVT::i16, 28},
1351 {Intrinsic::ctlz, MVT::i32, 31},
1352 {Intrinsic::ctlz, MVT::i64, 35},
1353 {Intrinsic::cttz, MVT::i8, 16},
1354 {Intrinsic::cttz, MVT::i16, 23},
1355 {Intrinsic::cttz, MVT::i32, 24},
1356 {Intrinsic::cttz, MVT::i64, 25},
1357 {Intrinsic::vp_ctpop, MVT::i8, 12},
1358 {Intrinsic::vp_ctpop, MVT::i16, 19},
1359 {Intrinsic::vp_ctpop, MVT::i32, 20},
1360 {Intrinsic::vp_ctpop, MVT::i64, 21},
1361 {Intrinsic::vp_ctlz, MVT::i8, 19},
1362 {Intrinsic::vp_ctlz, MVT::i16, 28},
1363 {Intrinsic::vp_ctlz, MVT::i32, 31},
1364 {Intrinsic::vp_ctlz, MVT::i64, 35},
1365 {Intrinsic::vp_cttz, MVT::i8, 16},
1366 {Intrinsic::vp_cttz, MVT::i16, 23},
1367 {Intrinsic::vp_cttz, MVT::i32, 24},
1368 {Intrinsic::vp_cttz, MVT::i64, 25},
1375 switch (ICA.
getID()) {
1376 case Intrinsic::lrint:
1377 case Intrinsic::llrint:
1378 case Intrinsic::lround:
1379 case Intrinsic::llround: {
1383 if (ST->hasVInstructions() && LT.second.isVector()) {
1385 unsigned SrcEltSz =
DL.getTypeSizeInBits(SrcTy->getScalarType());
1386 unsigned DstEltSz =
DL.getTypeSizeInBits(RetTy->getScalarType());
1387 if (LT.second.getVectorElementType() == MVT::bf16) {
1388 if (!ST->hasVInstructionsBF16Minimal())
1391 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFCVT_X_F_V};
1393 Ops = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVT_X_F_V};
1394 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1395 !ST->hasVInstructionsF16()) {
1396 if (!ST->hasVInstructionsF16Minimal())
1399 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFCVT_X_F_V};
1401 Ops = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_X_F_V};
1403 }
else if (SrcEltSz > DstEltSz) {
1404 Ops = {RISCV::VFNCVT_X_F_W};
1405 }
else if (SrcEltSz < DstEltSz) {
1406 Ops = {RISCV::VFWCVT_X_F_V};
1408 Ops = {RISCV::VFCVT_X_F_V};
1413 if (SrcEltSz > DstEltSz)
1414 return SrcLT.first *
1415 getRISCVInstructionCost(
Ops, SrcLT.second,
CostKind);
1416 return LT.first * getRISCVInstructionCost(
Ops, LT.second,
CostKind);
1420 case Intrinsic::ceil:
1421 case Intrinsic::floor:
1422 case Intrinsic::trunc:
1423 case Intrinsic::rint:
1424 case Intrinsic::round:
1425 case Intrinsic::roundeven: {
1428 if (!LT.second.isVector() && TLI->isOperationCustom(
ISD::FCEIL, LT.second))
1429 return LT.first * 8;
1432 case Intrinsic::umin:
1433 case Intrinsic::umax:
1434 case Intrinsic::smin:
1435 case Intrinsic::smax: {
1437 if (LT.second.isScalarInteger() && ST->hasStdExtZbb())
1440 if (ST->hasVInstructions() && LT.second.isVector()) {
1442 switch (ICA.
getID()) {
1443 case Intrinsic::umin:
1444 Op = RISCV::VMINU_VV;
1446 case Intrinsic::umax:
1447 Op = RISCV::VMAXU_VV;
1449 case Intrinsic::smin:
1450 Op = RISCV::VMIN_VV;
1452 case Intrinsic::smax:
1453 Op = RISCV::VMAX_VV;
1456 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1460 case Intrinsic::sadd_sat:
1461 case Intrinsic::ssub_sat:
1462 case Intrinsic::uadd_sat:
1463 case Intrinsic::usub_sat: {
1465 if (ST->hasVInstructions() && LT.second.isVector()) {
1467 switch (ICA.
getID()) {
1468 case Intrinsic::sadd_sat:
1469 Op = RISCV::VSADD_VV;
1471 case Intrinsic::ssub_sat:
1472 Op = RISCV::VSSUBU_VV;
1474 case Intrinsic::uadd_sat:
1475 Op = RISCV::VSADDU_VV;
1477 case Intrinsic::usub_sat:
1478 Op = RISCV::VSSUBU_VV;
1481 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1485 case Intrinsic::fma:
1486 case Intrinsic::fmuladd: {
1489 if (ST->hasVInstructions() && LT.second.isVector())
1491 getRISCVInstructionCost(RISCV::VFMADD_VV, LT.second,
CostKind);
1494 case Intrinsic::fabs: {
1496 if (ST->hasVInstructions() && LT.second.isVector()) {
1502 if (LT.second.getVectorElementType() == MVT::bf16 ||
1503 (LT.second.getVectorElementType() == MVT::f16 &&
1504 !ST->hasVInstructionsF16()))
1505 return LT.first * getRISCVInstructionCost(RISCV::VAND_VX, LT.second,
1510 getRISCVInstructionCost(RISCV::VFSGNJX_VV, LT.second,
CostKind);
1514 case Intrinsic::sqrt: {
1516 if (ST->hasVInstructions() && LT.second.isVector()) {
1519 MVT ConvType = LT.second;
1520 MVT FsqrtType = LT.second;
1523 if (LT.second.getVectorElementType() == MVT::bf16) {
1524 if (LT.second == MVT::nxv32bf16) {
1525 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFWCVTBF16_F_F_V,
1526 RISCV::VFNCVTBF16_F_F_W, RISCV::VFNCVTBF16_F_F_W};
1527 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1528 ConvType = MVT::nxv16f16;
1529 FsqrtType = MVT::nxv16f32;
1531 ConvOp = {RISCV::VFWCVTBF16_F_F_V, RISCV::VFNCVTBF16_F_F_W};
1532 FsqrtOp = {RISCV::VFSQRT_V};
1533 FsqrtType = TLI->getTypeToPromoteTo(
ISD::FSQRT, FsqrtType);
1535 }
else if (LT.second.getVectorElementType() == MVT::f16 &&
1536 !ST->hasVInstructionsF16()) {
1537 if (LT.second == MVT::nxv32f16) {
1538 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFWCVT_F_F_V,
1539 RISCV::VFNCVT_F_F_W, RISCV::VFNCVT_F_F_W};
1540 FsqrtOp = {RISCV::VFSQRT_V, RISCV::VFSQRT_V};
1541 ConvType = MVT::nxv16f16;
1542 FsqrtType = MVT::nxv16f32;
1544 ConvOp = {RISCV::VFWCVT_F_F_V, RISCV::VFNCVT_F_F_W};
1545 FsqrtOp = {RISCV::VFSQRT_V};
1546 FsqrtType = TLI->getTypeToPromoteTo(
ISD::FSQRT, FsqrtType);
1549 FsqrtOp = {RISCV::VFSQRT_V};
1552 return LT.first * (getRISCVInstructionCost(FsqrtOp, FsqrtType,
CostKind) +
1553 getRISCVInstructionCost(ConvOp, ConvType,
CostKind));
1557 case Intrinsic::cttz:
1558 case Intrinsic::ctlz:
1559 case Intrinsic::ctpop: {
1561 if (ST->hasStdExtZvbb() && LT.second.isVector()) {
1563 switch (ICA.
getID()) {
1564 case Intrinsic::cttz:
1567 case Intrinsic::ctlz:
1570 case Intrinsic::ctpop:
1571 Op = RISCV::VCPOP_V;
1574 return LT.first * getRISCVInstructionCost(
Op, LT.second,
CostKind);
1578 case Intrinsic::abs: {
1580 if (ST->hasVInstructions() && LT.second.isVector()) {
1582 if (ST->hasStdExtZvabd())
1584 getRISCVInstructionCost({RISCV::VABS_V}, LT.second,
CostKind);
1589 getRISCVInstructionCost({RISCV::VRSUB_VI, RISCV::VMAX_VV},
1594 case Intrinsic::fshl:
1595 case Intrinsic::fshr: {
1602 if ((ST->hasStdExtZbb() || ST->hasStdExtZbkb()) && RetTy->isIntegerTy() &&
1604 (RetTy->getIntegerBitWidth() == 32 ||
1605 RetTy->getIntegerBitWidth() == 64) &&
1606 RetTy->getIntegerBitWidth() <= ST->getXLen()) {
1611 case Intrinsic::get_active_lane_mask: {
1612 if (ST->hasVInstructions()) {
1621 getRISCVInstructionCost({RISCV::VSADDU_VX, RISCV::VMSLTU_VX},
1627 case Intrinsic::stepvector: {
1631 if (ST->hasVInstructions())
1632 return getRISCVInstructionCost(RISCV::VID_V, LT.second,
CostKind) +
1634 getRISCVInstructionCost(RISCV::VADD_VX, LT.second,
CostKind);
1635 return 1 + (LT.first - 1);
1637 case Intrinsic::vector_splice_left:
1638 case Intrinsic::vector_splice_right: {
1643 if (ST->hasVInstructions() && LT.second.isVector()) {
1645 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VX, RISCV::VSLIDEUP_VX},
1650 case Intrinsic::experimental_cttz_elts: {
1652 EVT ArgType = TLI->getValueType(
DL, ArgTy,
true);
1653 if (getTLI()->shouldExpandCttzElements(ArgType))
1670 case Intrinsic::experimental_vp_splice: {
1678 case Intrinsic::fptoui_sat:
1679 case Intrinsic::fptosi_sat: {
1681 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
1686 if (!SrcTy->isVectorTy())
1689 if (!SrcLT.first.isValid() || !DstLT.first.isValid())
1706 case Intrinsic::experimental_vector_extract_last_active: {
1728 unsigned EltWidth = getTLI()->getBitWidthForCttzElements(
1729 MaskTy->getScalarType(), MaskTy->getElementCount(),
1730 true, &VScaleRange);
1731 EltWidth = std::max(EltWidth, MaskTy->getScalarSizeInBits());
1736 unsigned Opcodes[] = {RISCV::VID_V, RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
1738 Cost += MaskLT.first *
1739 getRISCVInstructionCost(RISCV::VCPOP_M, MaskLT.second,
CostKind);
1741 Cost += StepLT.first *
1742 getRISCVInstructionCost(Opcodes, StepLT.second,
CostKind);
1746 Cost += ValLT.first *
1747 getRISCVInstructionCost({RISCV::VSLIDEDOWN_VI, RISCV::VMV_X_S},
1753 if (ST->hasVInstructions() && RetTy->isVectorTy()) {
1755 LT.second.isVector()) {
1756 MVT EltTy = LT.second.getVectorElementType();
1758 ICA.
getID(), EltTy))
1759 return LT.first * Entry->Cost;
1772 if (ST->hasVInstructions() && PtrTy->
isVectorTy())
1790 if (ST->hasStdExtP() &&
1798 if (!ST->hasVInstructions() || Src->getScalarSizeInBits() > ST->getELen() ||
1799 Dst->getScalarSizeInBits() > ST->getELen())
1802 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1817 if (Src->getScalarSizeInBits() == 1) {
1822 return getRISCVInstructionCost(RISCV::VMV_V_I, DstLT.second,
CostKind) +
1823 DstLT.first * getRISCVInstructionCost(RISCV::VMERGE_VIM,
1829 if (Dst->getScalarSizeInBits() == 1) {
1835 return SrcLT.first *
1836 getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI},
1848 if (!SrcLT.second.isVector() || !DstLT.second.isVector() ||
1849 !SrcLT.first.isValid() || !DstLT.first.isValid() ||
1851 SrcLT.second.getSizeInBits()) ||
1853 DstLT.second.getSizeInBits()) ||
1854 SrcLT.first > 1 || DstLT.first > 1)
1858 assert((SrcLT.first == 1) && (DstLT.first == 1) &&
"Illegal type");
1860 int PowDiff = (int)
Log2_32(DstLT.second.getScalarSizeInBits()) -
1861 (int)
Log2_32(SrcLT.second.getScalarSizeInBits());
1865 if ((PowDiff < 1) || (PowDiff > 3))
1867 unsigned SExtOp[] = {RISCV::VSEXT_VF2, RISCV::VSEXT_VF4, RISCV::VSEXT_VF8};
1868 unsigned ZExtOp[] = {RISCV::VZEXT_VF2, RISCV::VZEXT_VF4, RISCV::VZEXT_VF8};
1871 return getRISCVInstructionCost(
Op, DstLT.second,
CostKind);
1877 unsigned SrcEltSize = SrcLT.second.getScalarSizeInBits();
1878 unsigned DstEltSize = DstLT.second.getScalarSizeInBits();
1882 : RISCV::VFNCVT_F_F_W;
1884 for (; SrcEltSize != DstEltSize;) {
1888 MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT);
1890 (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1;
1898 unsigned FCVT = IsSigned ? RISCV::VFCVT_RTZ_X_F_V : RISCV::VFCVT_RTZ_XU_F_V;
1900 IsSigned ? RISCV::VFWCVT_RTZ_X_F_V : RISCV::VFWCVT_RTZ_XU_F_V;
1902 IsSigned ? RISCV::VFNCVT_RTZ_X_F_W : RISCV::VFNCVT_RTZ_XU_F_W;
1903 unsigned SrcEltSize = Src->getScalarSizeInBits();
1904 unsigned DstEltSize = Dst->getScalarSizeInBits();
1906 if ((SrcEltSize == 16) &&
1907 (!ST->hasVInstructionsF16() || ((DstEltSize / 2) > SrcEltSize))) {
1913 std::pair<InstructionCost, MVT> VecF32LT =
1916 VecF32LT.first * getRISCVInstructionCost(RISCV::VFWCVT_F_F_V,
1921 if (DstEltSize == SrcEltSize)
1922 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1923 else if (DstEltSize > SrcEltSize)
1924 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1929 MVT VecVT = DstLT.second.changeVectorElementType(ElementVT);
1930 Cost += getRISCVInstructionCost(FNCVT, VecVT,
CostKind);
1931 if ((SrcEltSize / 2) > DstEltSize) {
1942 unsigned FCVT = IsSigned ? RISCV::VFCVT_F_X_V : RISCV::VFCVT_F_XU_V;
1943 unsigned FWCVT = IsSigned ? RISCV::VFWCVT_F_X_V : RISCV::VFWCVT_F_XU_V;
1944 unsigned FNCVT = IsSigned ? RISCV::VFNCVT_F_X_W : RISCV::VFNCVT_F_XU_W;
1945 unsigned SrcEltSize = Src->getScalarSizeInBits();
1946 unsigned DstEltSize = Dst->getScalarSizeInBits();
1949 if ((DstEltSize == 16) &&
1950 (!ST->hasVInstructionsF16() || ((SrcEltSize / 2) > DstEltSize))) {
1956 std::pair<InstructionCost, MVT> VecF32LT =
1959 Cost += VecF32LT.first * getRISCVInstructionCost(RISCV::VFNCVT_F_F_W,
1964 if (DstEltSize == SrcEltSize)
1965 Cost += getRISCVInstructionCost(FCVT, DstLT.second,
CostKind);
1966 else if (DstEltSize > SrcEltSize) {
1967 if ((DstEltSize / 2) > SrcEltSize) {
1971 unsigned Op = IsSigned ? Instruction::SExt : Instruction::ZExt;
1974 Cost += getRISCVInstructionCost(FWCVT, DstLT.second,
CostKind);
1976 Cost += getRISCVInstructionCost(FNCVT, DstLT.second,
CostKind);
1983unsigned RISCVTTIImpl::getEstimatedVLFor(
VectorType *Ty)
const {
1985 const unsigned EltSize =
DL.getTypeSizeInBits(Ty->getElementType());
1986 const unsigned MinSize =
DL.getTypeSizeInBits(Ty).getKnownMinValue();
2001 if (Ty->getScalarSizeInBits() > ST->getELen())
2005 if (Ty->getElementType()->isIntegerTy(1)) {
2009 if (IID == Intrinsic::umax || IID == Intrinsic::smin)
2015 if (IID == Intrinsic::maximum || IID == Intrinsic::minimum) {
2019 case Intrinsic::maximum:
2021 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
2023 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMAX_VS,
2038 case Intrinsic::minimum:
2040 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
2042 Opcodes = {RISCV::VMFNE_VV, RISCV::VCPOP_M, RISCV::VFREDMIN_VS,
2048 const unsigned EltTyBits =
DL.getTypeSizeInBits(DstTy);
2057 return ExtraCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2066 case Intrinsic::smax:
2067 SplitOp = RISCV::VMAX_VV;
2068 Opcodes = {RISCV::VREDMAX_VS, RISCV::VMV_X_S};
2070 case Intrinsic::smin:
2071 SplitOp = RISCV::VMIN_VV;
2072 Opcodes = {RISCV::VREDMIN_VS, RISCV::VMV_X_S};
2074 case Intrinsic::umax:
2075 SplitOp = RISCV::VMAXU_VV;
2076 Opcodes = {RISCV::VREDMAXU_VS, RISCV::VMV_X_S};
2078 case Intrinsic::umin:
2079 SplitOp = RISCV::VMINU_VV;
2080 Opcodes = {RISCV::VREDMINU_VS, RISCV::VMV_X_S};
2082 case Intrinsic::maxnum:
2083 SplitOp = RISCV::VFMAX_VV;
2084 Opcodes = {RISCV::VFREDMAX_VS, RISCV::VFMV_F_S};
2086 case Intrinsic::minnum:
2087 SplitOp = RISCV::VFMIN_VV;
2088 Opcodes = {RISCV::VFREDMIN_VS, RISCV::VFMV_F_S};
2093 (LT.first > 1) ? (LT.first - 1) *
2094 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2096 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2101 std::optional<FastMathFlags> FMF,
2107 if (Ty->getScalarSizeInBits() > ST->getELen())
2110 int ISD = TLI->InstructionOpcodeToISD(Opcode);
2118 Type *ElementTy = Ty->getElementType();
2123 if (LT.second == MVT::v1i1)
2124 return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second,
CostKind) +
2142 return ((LT.first > 2) ? (LT.first - 2) : 0) *
2143 getRISCVInstructionCost(RISCV::VMAND_MM, LT.second,
CostKind) +
2144 getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +
2145 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2154 return (LT.first - 1) *
2155 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind) +
2156 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) + 1;
2164 return (LT.first - 1) *
2165 getRISCVInstructionCost(RISCV::VMOR_MM, LT.second,
CostKind) +
2166 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind) +
2179 SplitOp = RISCV::VADD_VV;
2180 Opcodes = {RISCV::VMV_S_X, RISCV::VREDSUM_VS, RISCV::VMV_X_S};
2183 SplitOp = RISCV::VOR_VV;
2184 Opcodes = {RISCV::VREDOR_VS, RISCV::VMV_X_S};
2187 SplitOp = RISCV::VXOR_VV;
2188 Opcodes = {RISCV::VMV_S_X, RISCV::VREDXOR_VS, RISCV::VMV_X_S};
2191 SplitOp = RISCV::VAND_VV;
2192 Opcodes = {RISCV::VREDAND_VS, RISCV::VMV_X_S};
2196 if ((LT.second.getScalarType() == MVT::f16 && !ST->hasVInstructionsF16()) ||
2197 LT.second.getScalarType() == MVT::bf16)
2201 for (
unsigned i = 0; i < LT.first.getValue(); i++)
2204 return getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2206 SplitOp = RISCV::VFADD_VV;
2207 Opcodes = {RISCV::VFMV_S_F, RISCV::VFREDUSUM_VS, RISCV::VFMV_F_S};
2212 (LT.first > 1) ? (LT.first - 1) *
2213 getRISCVInstructionCost(SplitOp, LT.second,
CostKind)
2215 return SplitCost + getRISCVInstructionCost(Opcodes, LT.second,
CostKind);
2219 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *ValTy,
2230 if (Opcode != Instruction::Add && Opcode != Instruction::FAdd)
2236 if (IsUnsigned && Opcode == Instruction::Add &&
2237 LT.second.isFixedLengthVector() && LT.second.getScalarType() == MVT::i1) {
2241 getRISCVInstructionCost(RISCV::VCPOP_M, LT.second,
CostKind);
2248 return (LT.first - 1) +
2255 assert(OpInfo.isConstant() &&
"non constant operand?");
2262 if (OpInfo.isUniform())
2268 return getConstantPoolLoadCost(Ty,
CostKind);
2277 EVT VT = TLI->getValueType(
DL, Src,
true);
2279 if (VT == MVT::Other)
2284 if (Opcode == Instruction::Store && OpInfo.isConstant())
2299 if (Src->
isVectorTy() && LT.second.isVector() &&
2301 LT.second.getSizeInBits()))
2311 if (ST->hasVInstructions() && LT.second.isVector() &&
2313 BaseCost *= TLI->getLMULCost(LT.second);
2314 return Cost + BaseCost;
2323 Op1Info, Op2Info,
I);
2327 Op1Info, Op2Info,
I);
2330 if (ValTy->isVectorTy() && ValTy->getScalarSizeInBits() > ST->getELen())
2332 Op1Info, Op2Info,
I);
2334 auto GetConstantMatCost =
2336 if (OpInfo.isUniform())
2341 return getConstantPoolLoadCost(ValTy,
CostKind);
2346 ConstantMatCost += GetConstantMatCost(Op1Info);
2348 ConstantMatCost += GetConstantMatCost(Op2Info);
2351 if (Opcode == Instruction::Select && ValTy->isVectorTy()) {
2352 if (CondTy->isVectorTy()) {
2353 if (ValTy->getScalarSizeInBits() == 1) {
2357 return ConstantMatCost +
2359 getRISCVInstructionCost(
2360 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2364 return ConstantMatCost +
2365 LT.first * getRISCVInstructionCost(RISCV::VMERGE_VVM, LT.second,
2369 if (ValTy->getScalarSizeInBits() == 1) {
2375 MVT InterimVT = LT.second.changeVectorElementType(MVT::i8);
2376 return ConstantMatCost +
2378 getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
2380 LT.first * getRISCVInstructionCost(
2381 {RISCV::VMANDN_MM, RISCV::VMAND_MM, RISCV::VMOR_MM},
2388 return ConstantMatCost +
2389 LT.first * getRISCVInstructionCost(
2390 {RISCV::VMV_V_X, RISCV::VMSNE_VI, RISCV::VMERGE_VVM},
2394 if ((Opcode == Instruction::ICmp) && ValTy->isVectorTy() &&
2398 return ConstantMatCost + LT.first * getRISCVInstructionCost(RISCV::VMSLT_VV,
2403 if ((Opcode == Instruction::FCmp) && ValTy->isVectorTy() &&
2408 return ConstantMatCost +
2409 getRISCVInstructionCost(RISCV::VMXOR_MM, LT.second,
CostKind);
2415 if ((ValTy->getScalarSizeInBits() == 16 && !ST->hasVInstructionsF16()) ||
2416 (ValTy->getScalarSizeInBits() == 32 && !ST->hasVInstructionsF32()) ||
2417 (ValTy->getScalarSizeInBits() == 64 && !ST->hasVInstructionsF64()))
2419 Op1Info, Op2Info,
I);
2428 return ConstantMatCost +
2429 LT.first * getRISCVInstructionCost(
2430 {RISCV::VMFLT_VV, RISCV::VMFLT_VV, RISCV::VMOR_MM},
2437 return ConstantMatCost +
2439 getRISCVInstructionCost({RISCV::VMFLT_VV, RISCV::VMNAND_MM},
2448 return ConstantMatCost +
2450 getRISCVInstructionCost(RISCV::VMFLT_VV, LT.second,
CostKind);
2461 ValTy->isIntegerTy() && !
I->user_empty()) {
2463 return match(U, m_Select(m_Specific(I), m_Value(), m_Value())) &&
2464 U->getType()->isIntegerTy() &&
2465 !isa<ConstantData>(U->getOperand(1)) &&
2466 !isa<ConstantData>(U->getOperand(2));
2474 Op1Info, Op2Info,
I);
2481 return Opcode == Instruction::PHI ? 0 : 1;
2498 if (Opcode != Instruction::ExtractElement &&
2499 Opcode != Instruction::InsertElement)
2507 if (!LT.second.isVector()) {
2516 Type *ElemTy = FixedVecTy->getElementType();
2517 auto NumElems = FixedVecTy->getNumElements();
2518 auto Align =
DL.getPrefTypeAlign(ElemTy);
2523 return Opcode == Instruction::ExtractElement
2524 ? StoreCost * NumElems + LoadCost
2525 : (StoreCost + LoadCost) * NumElems + StoreCost;
2529 if (LT.second.isScalableVector() && !LT.first.isValid())
2537 if (Opcode == Instruction::ExtractElement) {
2543 return ExtendCost + ExtractCost;
2553 return ExtendCost + InsertCost + TruncCost;
2559 unsigned BaseCost = 1;
2561 unsigned SlideCost = Opcode == Instruction::InsertElement ? 2 : 1;
2566 if (LT.second.isFixedLengthVector()) {
2567 unsigned Width = LT.second.getVectorNumElements();
2568 Index = Index % Width;
2573 if (
auto VLEN = ST->getRealVLen()) {
2574 unsigned EltSize = LT.second.getScalarSizeInBits();
2575 unsigned M1Max = *VLEN / EltSize;
2576 Index = Index % M1Max;
2582 else if (ST->hasVendorXRivosVisni() &&
isUInt<5>(Index) &&
2585 else if (Opcode == Instruction::InsertElement)
2593 ((Index == -1U) || (Index >= LT.second.getVectorMinNumElements() &&
2594 LT.second.isScalableVector()))) {
2596 Align VecAlign =
DL.getPrefTypeAlign(Val);
2597 Align SclAlign =
DL.getPrefTypeAlign(ScalarType);
2602 if (Opcode == Instruction::ExtractElement)
2638 BaseCost = Opcode == Instruction::InsertElement ? 3 : 4;
2640 return BaseCost + SlideCost;
2646 unsigned Index)
const {
2655 assert(Index < EC.getKnownMinValue() &&
"Unexpected reverse index");
2657 EC.getKnownMinValue() - 1 - Index,
nullptr,
2682 unsigned ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
2685 if (!LT.second.isVector()) {
2695 if (TLI->isOperationLegalOrPromote(ISDOpcode, LT.second))
2696 if (
const auto *Entry =
CostTableLookup(DivTbl, ISDOpcode, LT.second))
2697 return Entry->Cost * LT.first;
2706 if ((LT.second.getVectorElementType() == MVT::f16 ||
2707 LT.second.getVectorElementType() == MVT::bf16) &&
2708 TLI->getOperationAction(ISDOpcode, LT.second) ==
2710 MVT PromotedVT = TLI->getTypeToPromoteTo(ISDOpcode, LT.second);
2714 CastCost += LT.first * Args.size() *
2722 LT.second = PromotedVT;
2725 auto getConstantMatCost =
2735 return getConstantPoolLoadCost(Ty,
CostKind);
2741 ConstantMatCost += getConstantMatCost(0, Op1Info);
2743 ConstantMatCost += getConstantMatCost(1, Op2Info);
2746 switch (ISDOpcode) {
2749 Op = RISCV::VADD_VV;
2754 Op = RISCV::VSLL_VV;
2759 Op = (Ty->getScalarSizeInBits() == 1) ? RISCV::VMAND_MM : RISCV::VAND_VV;
2764 Op = RISCV::VMUL_VV;
2768 Op = RISCV::VDIV_VV;
2772 Op = RISCV::VREM_VV;
2776 Op = RISCV::VFADD_VV;
2779 Op = RISCV::VFMUL_VV;
2782 Op = RISCV::VFDIV_VV;
2785 Op = RISCV::VFSGNJN_VV;
2790 return CastCost + ConstantMatCost +
2799 if (Ty->isFPOrFPVectorTy())
2801 return CastCost + ConstantMatCost + LT.first *
InstrCost;
2824 if (Info.isSameBase() && V !=
Base) {
2825 if (
GEP->hasAllConstantIndices())
2831 unsigned Stride =
DL.getTypeStoreSize(AccessTy);
2832 if (Info.isUnitStride() &&
2838 GEP->getType()->getPointerAddressSpace()))
2841 {TTI::OK_AnyValue, TTI::OP_None},
2842 {TTI::OK_AnyValue, TTI::OP_None}, {});
2859 if (ST->enableDefaultUnroll())
2869 if (L->getHeader()->getParent()->hasOptSize())
2873 L->getExitingBlocks(ExitingBlocks);
2875 <<
"Blocks: " << L->getNumBlocks() <<
"\n"
2876 <<
"Exit blocks: " << ExitingBlocks.
size() <<
"\n");
2880 if (ExitingBlocks.
size() > 2)
2885 if (L->getNumBlocks() > 4)
2893 for (
auto *BB : L->getBlocks()) {
2894 for (
auto &
I : *BB) {
2898 if (IsVectorized && (
I.getType()->isVectorTy() ||
2900 return V->getType()->isVectorTy();
2941 bool HasMask =
false;
2944 bool IsWrite) -> int64_t {
2945 if (
auto *TarExtTy =
2947 return TarExtTy->getIntParameter(0);
2953 case Intrinsic::riscv_vle_mask:
2954 case Intrinsic::riscv_vse_mask:
2955 case Intrinsic::riscv_vlseg2_mask:
2956 case Intrinsic::riscv_vlseg3_mask:
2957 case Intrinsic::riscv_vlseg4_mask:
2958 case Intrinsic::riscv_vlseg5_mask:
2959 case Intrinsic::riscv_vlseg6_mask:
2960 case Intrinsic::riscv_vlseg7_mask:
2961 case Intrinsic::riscv_vlseg8_mask:
2962 case Intrinsic::riscv_vsseg2_mask:
2963 case Intrinsic::riscv_vsseg3_mask:
2964 case Intrinsic::riscv_vsseg4_mask:
2965 case Intrinsic::riscv_vsseg5_mask:
2966 case Intrinsic::riscv_vsseg6_mask:
2967 case Intrinsic::riscv_vsseg7_mask:
2968 case Intrinsic::riscv_vsseg8_mask:
2971 case Intrinsic::riscv_vle:
2972 case Intrinsic::riscv_vse:
2973 case Intrinsic::riscv_vlseg2:
2974 case Intrinsic::riscv_vlseg3:
2975 case Intrinsic::riscv_vlseg4:
2976 case Intrinsic::riscv_vlseg5:
2977 case Intrinsic::riscv_vlseg6:
2978 case Intrinsic::riscv_vlseg7:
2979 case Intrinsic::riscv_vlseg8:
2980 case Intrinsic::riscv_vsseg2:
2981 case Intrinsic::riscv_vsseg3:
2982 case Intrinsic::riscv_vsseg4:
2983 case Intrinsic::riscv_vsseg5:
2984 case Intrinsic::riscv_vsseg6:
2985 case Intrinsic::riscv_vsseg7:
2986 case Intrinsic::riscv_vsseg8: {
3003 Ty = TarExtTy->getTypeParameter(0U);
3008 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3009 unsigned VLIndex = RVVIInfo->VLOperand;
3010 unsigned PtrOperandNo = VLIndex - 1 - HasMask;
3018 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3021 unsigned ElemSize = Ty->getScalarSizeInBits();
3025 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3026 Alignment, Mask, EVL);
3029 case Intrinsic::riscv_vlse_mask:
3030 case Intrinsic::riscv_vsse_mask:
3031 case Intrinsic::riscv_vlsseg2_mask:
3032 case Intrinsic::riscv_vlsseg3_mask:
3033 case Intrinsic::riscv_vlsseg4_mask:
3034 case Intrinsic::riscv_vlsseg5_mask:
3035 case Intrinsic::riscv_vlsseg6_mask:
3036 case Intrinsic::riscv_vlsseg7_mask:
3037 case Intrinsic::riscv_vlsseg8_mask:
3038 case Intrinsic::riscv_vssseg2_mask:
3039 case Intrinsic::riscv_vssseg3_mask:
3040 case Intrinsic::riscv_vssseg4_mask:
3041 case Intrinsic::riscv_vssseg5_mask:
3042 case Intrinsic::riscv_vssseg6_mask:
3043 case Intrinsic::riscv_vssseg7_mask:
3044 case Intrinsic::riscv_vssseg8_mask:
3047 case Intrinsic::riscv_vlse:
3048 case Intrinsic::riscv_vsse:
3049 case Intrinsic::riscv_vlsseg2:
3050 case Intrinsic::riscv_vlsseg3:
3051 case Intrinsic::riscv_vlsseg4:
3052 case Intrinsic::riscv_vlsseg5:
3053 case Intrinsic::riscv_vlsseg6:
3054 case Intrinsic::riscv_vlsseg7:
3055 case Intrinsic::riscv_vlsseg8:
3056 case Intrinsic::riscv_vssseg2:
3057 case Intrinsic::riscv_vssseg3:
3058 case Intrinsic::riscv_vssseg4:
3059 case Intrinsic::riscv_vssseg5:
3060 case Intrinsic::riscv_vssseg6:
3061 case Intrinsic::riscv_vssseg7:
3062 case Intrinsic::riscv_vssseg8: {
3079 Ty = TarExtTy->getTypeParameter(0U);
3084 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3085 unsigned VLIndex = RVVIInfo->VLOperand;
3086 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3098 Alignment =
Align(1);
3105 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3108 unsigned ElemSize = Ty->getScalarSizeInBits();
3112 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3113 Alignment, Mask, EVL, Stride);
3116 case Intrinsic::riscv_vloxei_mask:
3117 case Intrinsic::riscv_vluxei_mask:
3118 case Intrinsic::riscv_vsoxei_mask:
3119 case Intrinsic::riscv_vsuxei_mask:
3120 case Intrinsic::riscv_vloxseg2_mask:
3121 case Intrinsic::riscv_vloxseg3_mask:
3122 case Intrinsic::riscv_vloxseg4_mask:
3123 case Intrinsic::riscv_vloxseg5_mask:
3124 case Intrinsic::riscv_vloxseg6_mask:
3125 case Intrinsic::riscv_vloxseg7_mask:
3126 case Intrinsic::riscv_vloxseg8_mask:
3127 case Intrinsic::riscv_vluxseg2_mask:
3128 case Intrinsic::riscv_vluxseg3_mask:
3129 case Intrinsic::riscv_vluxseg4_mask:
3130 case Intrinsic::riscv_vluxseg5_mask:
3131 case Intrinsic::riscv_vluxseg6_mask:
3132 case Intrinsic::riscv_vluxseg7_mask:
3133 case Intrinsic::riscv_vluxseg8_mask:
3134 case Intrinsic::riscv_vsoxseg2_mask:
3135 case Intrinsic::riscv_vsoxseg3_mask:
3136 case Intrinsic::riscv_vsoxseg4_mask:
3137 case Intrinsic::riscv_vsoxseg5_mask:
3138 case Intrinsic::riscv_vsoxseg6_mask:
3139 case Intrinsic::riscv_vsoxseg7_mask:
3140 case Intrinsic::riscv_vsoxseg8_mask:
3141 case Intrinsic::riscv_vsuxseg2_mask:
3142 case Intrinsic::riscv_vsuxseg3_mask:
3143 case Intrinsic::riscv_vsuxseg4_mask:
3144 case Intrinsic::riscv_vsuxseg5_mask:
3145 case Intrinsic::riscv_vsuxseg6_mask:
3146 case Intrinsic::riscv_vsuxseg7_mask:
3147 case Intrinsic::riscv_vsuxseg8_mask:
3150 case Intrinsic::riscv_vloxei:
3151 case Intrinsic::riscv_vluxei:
3152 case Intrinsic::riscv_vsoxei:
3153 case Intrinsic::riscv_vsuxei:
3154 case Intrinsic::riscv_vloxseg2:
3155 case Intrinsic::riscv_vloxseg3:
3156 case Intrinsic::riscv_vloxseg4:
3157 case Intrinsic::riscv_vloxseg5:
3158 case Intrinsic::riscv_vloxseg6:
3159 case Intrinsic::riscv_vloxseg7:
3160 case Intrinsic::riscv_vloxseg8:
3161 case Intrinsic::riscv_vluxseg2:
3162 case Intrinsic::riscv_vluxseg3:
3163 case Intrinsic::riscv_vluxseg4:
3164 case Intrinsic::riscv_vluxseg5:
3165 case Intrinsic::riscv_vluxseg6:
3166 case Intrinsic::riscv_vluxseg7:
3167 case Intrinsic::riscv_vluxseg8:
3168 case Intrinsic::riscv_vsoxseg2:
3169 case Intrinsic::riscv_vsoxseg3:
3170 case Intrinsic::riscv_vsoxseg4:
3171 case Intrinsic::riscv_vsoxseg5:
3172 case Intrinsic::riscv_vsoxseg6:
3173 case Intrinsic::riscv_vsoxseg7:
3174 case Intrinsic::riscv_vsoxseg8:
3175 case Intrinsic::riscv_vsuxseg2:
3176 case Intrinsic::riscv_vsuxseg3:
3177 case Intrinsic::riscv_vsuxseg4:
3178 case Intrinsic::riscv_vsuxseg5:
3179 case Intrinsic::riscv_vsuxseg6:
3180 case Intrinsic::riscv_vsuxseg7:
3181 case Intrinsic::riscv_vsuxseg8: {
3198 Ty = TarExtTy->getTypeParameter(0U);
3203 const auto *RVVIInfo = RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IID);
3204 unsigned VLIndex = RVVIInfo->VLOperand;
3205 unsigned PtrOperandNo = VLIndex - 2 - HasMask;
3218 unsigned SegNum = getSegNum(Inst, PtrOperandNo, IsWrite);
3221 unsigned ElemSize = Ty->getScalarSizeInBits();
3226 Info.InterestingOperands.emplace_back(Inst, PtrOperandNo, IsWrite, Ty,
3227 Align(1), Mask, EVL,
3236 if (Ty->isVectorTy()) {
3239 if ((EltTy->
isHalfTy() && !ST->hasVInstructionsF16()) ||
3245 if (
Size.isScalable() && ST->hasVInstructions())
3248 if (ST->useRVVForFixedLengthVectors())
3268 return std::max<unsigned>(1U, RegWidth.
getFixedValue() / ElemWidth);
3276 return ST->enableUnalignedVectorMem();
3282 if (ST->hasVendorXCVmem() && !ST->is64Bit())
3304 Align Alignment)
const {
3306 if (!VTy || VTy->isScalableTy())
3314 if (VTy->getElementType()->isIntegerTy(8))
3315 if (VTy->getElementCount().getFixedValue() > 256)
3316 return VTy->getPrimitiveSizeInBits() / ST->getRealMinVLen() <
3317 ST->getMaxLMULForFixedLengthVectors();
3322 Align Alignment)
const {
3324 if (!VTy || VTy->isScalableTy())
3338 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
3339 bool Considerable =
false;
3340 AllowPromotionWithoutCommonHeader =
false;
3343 Type *ConsideredSExtType =
3345 if (
I.getType() != ConsideredSExtType)
3349 for (
const User *U :
I.users()) {
3351 Considerable =
true;
3355 if (GEPInst->getNumOperands() > 2) {
3356 AllowPromotionWithoutCommonHeader =
true;
3361 return Considerable;
3366 case Instruction::Add:
3367 case Instruction::Sub:
3368 case Instruction::Mul:
3369 case Instruction::And:
3370 case Instruction::Or:
3371 case Instruction::Xor:
3372 case Instruction::FAdd:
3373 case Instruction::FSub:
3374 case Instruction::FMul:
3375 case Instruction::FDiv:
3376 case Instruction::ICmp:
3377 case Instruction::FCmp:
3379 case Instruction::Shl:
3380 case Instruction::LShr:
3381 case Instruction::AShr:
3382 case Instruction::UDiv:
3383 case Instruction::SDiv:
3384 case Instruction::URem:
3385 case Instruction::SRem:
3386 case Instruction::Select:
3387 return Operand == 1;
3394 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3404 switch (
II->getIntrinsicID()) {
3405 case Intrinsic::fma:
3406 case Intrinsic::vp_fma:
3407 case Intrinsic::fmuladd:
3408 case Intrinsic::vp_fmuladd:
3409 return Operand == 0 || Operand == 1;
3410 case Intrinsic::vp_shl:
3411 case Intrinsic::vp_lshr:
3412 case Intrinsic::vp_ashr:
3413 case Intrinsic::vp_udiv:
3414 case Intrinsic::vp_sdiv:
3415 case Intrinsic::vp_urem:
3416 case Intrinsic::vp_srem:
3417 case Intrinsic::ssub_sat:
3418 case Intrinsic::vp_ssub_sat:
3419 case Intrinsic::usub_sat:
3420 case Intrinsic::vp_usub_sat:
3421 case Intrinsic::vp_select:
3422 return Operand == 1;
3424 case Intrinsic::vp_add:
3425 case Intrinsic::vp_mul:
3426 case Intrinsic::vp_and:
3427 case Intrinsic::vp_or:
3428 case Intrinsic::vp_xor:
3429 case Intrinsic::vp_fadd:
3430 case Intrinsic::vp_fmul:
3431 case Intrinsic::vp_icmp:
3432 case Intrinsic::vp_fcmp:
3433 case Intrinsic::smin:
3434 case Intrinsic::vp_smin:
3435 case Intrinsic::umin:
3436 case Intrinsic::vp_umin:
3437 case Intrinsic::smax:
3438 case Intrinsic::vp_smax:
3439 case Intrinsic::umax:
3440 case Intrinsic::vp_umax:
3441 case Intrinsic::sadd_sat:
3442 case Intrinsic::vp_sadd_sat:
3443 case Intrinsic::uadd_sat:
3444 case Intrinsic::vp_uadd_sat:
3446 case Intrinsic::vp_sub:
3447 case Intrinsic::vp_fsub:
3448 case Intrinsic::vp_fdiv:
3449 return Operand == 0 || Operand == 1;
3462 if (
I->isBitwiseLogicOp()) {
3463 if (!
I->getType()->isVectorTy()) {
3464 if (ST->hasStdExtZbb() || ST->hasStdExtZbkb()) {
3465 for (
auto &
Op :
I->operands()) {
3473 }
else if (
I->getOpcode() == Instruction::And && ST->hasStdExtZvkb()) {
3474 for (
auto &
Op :
I->operands()) {
3486 Ops.push_back(&Not);
3487 Ops.push_back(&InsertElt);
3495 if (!
I->getType()->isVectorTy() || !ST->hasVInstructions())
3503 if (!ST->sinkSplatOperands())
3526 for (
Use &U :
Op->uses()) {
3533 Use *InsertEltUse = &
Op->getOperandUse(0);
3536 Ops.push_back(&InsertElt->getOperandUse(1));
3537 Ops.push_back(InsertEltUse);
3548 if (!ST->enableUnalignedScalarMem())
3551 if (!ST->hasStdExtZbb() && !ST->hasStdExtZbkb() && !IsZeroCmp)
3554 Options.AllowOverlappingLoads =
true;
3555 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
3557 if (ST->is64Bit()) {
3558 Options.LoadSizes = {8, 4, 2, 1};
3559 Options.AllowedTailExpansions = {3, 5, 6};
3561 Options.LoadSizes = {4, 2, 1};
3562 Options.AllowedTailExpansions = {3};
3565 if (IsZeroCmp && ST->hasVInstructions()) {
3566 unsigned VLenB = ST->getRealMinVLen() / 8;
3569 unsigned MinSize = ST->getXLen() / 8 + 1;
3570 unsigned MaxSize = VLenB * ST->getMaxLMULForFixedLengthVectors();
3584 if (
I->getOpcode() == Instruction::Or &&
3589 if (
I->getOpcode() == Instruction::Add ||
3590 I->getOpcode() == Instruction::Sub)
3608std::optional<Instruction *>
3614 if (
II.user_empty())
3619 const APInt *Scalar;
3624 return U->getType() == TargetVecTy && match(U, m_BitCast(m_Value()));
3628 unsigned TargetEltBW =
DL.getTypeSizeInBits(TargetVecTy->getElementType());
3629 unsigned SourceEltBW =
DL.getTypeSizeInBits(SourceVecTy->getElementType());
3630 if (TargetEltBW % SourceEltBW)
3632 unsigned TargetScale = TargetEltBW / SourceEltBW;
3633 if (VL % TargetScale)
3635 Type *VLTy =
II.getOperand(2)->getType();
3636 ElementCount SourceEC = SourceVecTy->getElementCount();
3637 unsigned NewEltBW = SourceEltBW * TargetScale;
3639 !
DL.fitsInLegalInteger(NewEltBW))
3642 if (!TLI->isLegalElementTypeForRVV(TLI->getValueType(
DL, NewEltTy)))
3646 assert(SourceVecTy->canLosslesslyBitCastTo(RetTy) &&
3647 "Lossless bitcast between types expected");
3653 RetTy, Intrinsic::riscv_vmv_v_x,
3654 {PoisonValue::get(RetTy), ConstantInt::get(NewEltTy, NewScalar),
3655 ConstantInt::get(VLTy, VL / TargetScale)}),
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static cl::opt< int > InstrCost("inline-instr-cost", cl::Hidden, cl::init(5), cl::desc("Cost of a single instruction when inlining"))
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
This file describes how to lower LLVM code to machine code.
Class for arbitrary precision integers.
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI bool isStringAttribute() const
Return true if the attribute is a string (target-dependent) attribute.
LLVM_ABI StringRef getKindAsString() const
Return the attribute's kind as a string.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
std::optional< unsigned > getMaxVScale() const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
bool isLegalAddImmediate(int64_t imm) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isFPPredicate(Predicate P)
static bool isIntPredicate(Predicate P)
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
This class represents a range of values.
A parsed version of the target data layout string in and methods for querying it.
Convenience struct for specifying and reasoning about fast-math flags.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * getDoubleElementsVectorType(FixedVectorType *VTy)
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
The core instruction combiner logic.
const DataLayout & getDataLayout() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Represents a single loop in the control flow graph.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
bool bitsLE(MVT VT) const
Return true if this has no more bits than VT.
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
bool bitsGT(MVT VT) const
Return true if this has more bits than VT.
bool isFixedLengthVector() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Information for memory intrinsic cost model.
Align getAlignment() const
unsigned getAddressSpace() const
Type * getDataType() const
bool getVariableMask() const
Intrinsic::ID getID() const
const Instruction * getInst() const
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool shouldCopyAttributeWhenOutliningFrom(const Function *Caller, const Attribute &Attr) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const override
InstructionCost getStridedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
unsigned getMinTripCountTailFoldingThreshold() const override
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const override
InstructionCost getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
InstructionCost getStoreImmCost(Type *VecTy, TTI::OperandValueInfo OpInfo, TTI::TargetCostKind CostKind) const
Return the cost of materializing an immediate for a value operand of a store instruction.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
bool hasActiveVectorLength() const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
Try to calculate op costs for min/max reduction operations.
bool canSplatOperand(Instruction *I, int Operand) const
Return true if the (vector) instruction I will be lowered to an instruction with a scalar splat opera...
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const override
unsigned getRegUsageForType(Type *Ty) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const override
bool isLegalMaskedCompressStore(Type *DataTy, Align Alignment) const override
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
InstructionCost getExpandCompressMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool preferAlternateOpcodeVectorization() const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< unsigned > getMaxVScale() const override
bool shouldExpandReduction(const IntrinsicInst *II) const override
std::optional< unsigned > getVScaleForTuning() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Get memory intrinsic cost based on arguments.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
Estimate the overhead of scalarizing an instruction.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpdInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
Get intrinsic cost based on arguments.
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
static MVT getM1VT(MVT VT)
Given a vector (either fixed or scalable), return the scalable vector corresponding to a vector regis...
InstructionCost getVRGatherVVCost(MVT VT) const
Return the cost of a vrgather.vv instruction for the type VT.
InstructionCost getVRGatherVICost(MVT VT) const
Return the cost of a vrgather.vi (or vx) instruction for the type VT.
static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize, unsigned MinSize)
InstructionCost getLMULCost(MVT VT) const
Return the cost of LMUL for linear operations.
InstructionCost getVSlideVICost(MVT VT) const
Return the cost of a vslidedown.vi or vslideup.vi instruction for the type VT.
InstructionCost getVSlideVXCost(MVT VT) const
Return the cost of a vslidedown.vx or vslideup.vx instruction for the type VT.
static RISCVVType::VLMUL getLMUL(MVT VT)
This class represents an analyzed expression in the program.
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
The main scalar evolution driver.
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
Implements a dense probed hash-table based set with some number of buckets stored inline.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
std::pair< iterator, bool > insert(const ValueT &V)
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
int getIntMatCost(const APInt &Val, unsigned Size, const MCSubtargetInfo &STI, bool CompressionCost, bool FreeZeroes)
static constexpr unsigned RVVBitsPerBlock
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
OutputIt copy(R &&Range, OutputIt Out)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Information about a load/store intrinsic defined by the target.