23#include "llvm/IR/IntrinsicsAArch64.h"
35#define DEBUG_TYPE "aarch64tti"
41 "sve-prefer-fixed-over-scalable-if-equal",
cl::Hidden);
59 "Penalty of calling a function that requires a change to PSTATE.SM"));
63 cl::desc(
"Penalty of inlining a call that requires a change to PSTATE.SM"));
74 cl::desc(
"The cost of a histcnt instruction"));
78 cl::desc(
"The number of instructions to search for a redundant dmb"));
82 cl::desc(
"Threshold for forced unrolling of small loops in AArch64"));
85class TailFoldingOption {
100 bool NeedsDefault =
true;
104 void setNeedsDefault(
bool V) { NeedsDefault =
V; }
119 assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
120 "Initial bits should only include one of "
121 "(disabled|all|simple|default)");
122 Bits = NeedsDefault ? DefaultBits : InitialBits;
124 Bits &= ~DisableBits;
130 errs() <<
"invalid argument '" << Opt
131 <<
"' to -sve-tail-folding=; the option should be of the form\n"
132 " (disabled|all|default|simple)[+(reductions|recurrences"
133 "|reverse|noreductions|norecurrences|noreverse)]\n";
139 void operator=(
const std::string &Val) {
148 setNeedsDefault(
false);
151 StringRef(Val).split(TailFoldTypes,
'+', -1,
false);
153 unsigned StartIdx = 1;
154 if (TailFoldTypes[0] ==
"disabled")
155 setInitialBits(TailFoldingOpts::Disabled);
156 else if (TailFoldTypes[0] ==
"all")
157 setInitialBits(TailFoldingOpts::All);
158 else if (TailFoldTypes[0] ==
"default")
159 setNeedsDefault(
true);
160 else if (TailFoldTypes[0] ==
"simple")
161 setInitialBits(TailFoldingOpts::Simple);
164 setInitialBits(TailFoldingOpts::Disabled);
167 for (
unsigned I = StartIdx;
I < TailFoldTypes.
size();
I++) {
168 if (TailFoldTypes[
I] ==
"reductions")
169 setEnableBit(TailFoldingOpts::Reductions);
170 else if (TailFoldTypes[
I] ==
"recurrences")
171 setEnableBit(TailFoldingOpts::Recurrences);
172 else if (TailFoldTypes[
I] ==
"reverse")
173 setEnableBit(TailFoldingOpts::Reverse);
174 else if (TailFoldTypes[
I] ==
"noreductions")
175 setDisableBit(TailFoldingOpts::Reductions);
176 else if (TailFoldTypes[
I] ==
"norecurrences")
177 setDisableBit(TailFoldingOpts::Recurrences);
178 else if (TailFoldTypes[
I] ==
"noreverse")
179 setDisableBit(TailFoldingOpts::Reverse);
196 "Control the use of vectorisation using tail-folding for SVE where the"
197 " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
198 "\ndisabled (Initial) No loop types will vectorize using "
200 "\ndefault (Initial) Uses the default tail-folding settings for "
202 "\nall (Initial) All legal loop types will vectorize using "
204 "\nsimple (Initial) Use tail-folding for simple loops (not "
205 "reductions or recurrences)"
206 "\nreductions Use tail-folding for loops containing reductions"
207 "\nnoreductions Inverse of above"
208 "\nrecurrences Use tail-folding for loops containing fixed order "
210 "\nnorecurrences Inverse of above"
211 "\nreverse Use tail-folding for loops requiring reversed "
213 "\nnoreverse Inverse of above"),
258 TTI->isMultiversionedFunction(
F) ?
"fmv-features" :
"target-features";
259 StringRef FeatureStr =
F.getFnAttribute(AttributeStr).getValueAsString();
260 FeatureStr.
split(Features,
",");
276 return F.hasFnAttribute(
"fmv-features");
280 AArch64::FeatureExecuteOnly,
320 FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures;
321 FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures;
323 return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits;
341 auto FVTy = dyn_cast<FixedVectorType>(Ty);
343 FVTy->getScalarSizeInBits() * FVTy->getNumElements() > 128;
352 unsigned DefaultCallPenalty)
const {
377 if (
F ==
Call.getCaller())
383 return DefaultCallPenalty;
394 ST->isSVEorStreamingSVEAvailable() &&
395 !ST->disableMaximizeScalableBandwidth();
419 assert(Ty->isIntegerTy());
421 unsigned BitSize = Ty->getPrimitiveSizeInBits();
428 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
433 for (
unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
439 return std::max<InstructionCost>(1,
Cost);
446 assert(Ty->isIntegerTy());
448 unsigned BitSize = Ty->getPrimitiveSizeInBits();
454 unsigned ImmIdx = ~0U;
458 case Instruction::GetElementPtr:
463 case Instruction::Store:
466 case Instruction::Add:
467 case Instruction::Sub:
468 case Instruction::Mul:
469 case Instruction::UDiv:
470 case Instruction::SDiv:
471 case Instruction::URem:
472 case Instruction::SRem:
473 case Instruction::And:
474 case Instruction::Or:
475 case Instruction::Xor:
476 case Instruction::ICmp:
480 case Instruction::Shl:
481 case Instruction::LShr:
482 case Instruction::AShr:
486 case Instruction::Trunc:
487 case Instruction::ZExt:
488 case Instruction::SExt:
489 case Instruction::IntToPtr:
490 case Instruction::PtrToInt:
491 case Instruction::BitCast:
492 case Instruction::PHI:
493 case Instruction::Call:
494 case Instruction::Select:
495 case Instruction::Ret:
496 case Instruction::Load:
501 int NumConstants = (BitSize + 63) / 64;
514 assert(Ty->isIntegerTy());
516 unsigned BitSize = Ty->getPrimitiveSizeInBits();
525 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
531 case Intrinsic::sadd_with_overflow:
532 case Intrinsic::uadd_with_overflow:
533 case Intrinsic::ssub_with_overflow:
534 case Intrinsic::usub_with_overflow:
535 case Intrinsic::smul_with_overflow:
536 case Intrinsic::umul_with_overflow:
538 int NumConstants = (BitSize + 63) / 64;
545 case Intrinsic::experimental_stackmap:
546 if ((Idx < 2) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
549 case Intrinsic::experimental_patchpoint_void:
550 case Intrinsic::experimental_patchpoint:
551 if ((Idx < 4) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
554 case Intrinsic::experimental_gc_statepoint:
555 if ((Idx < 5) || (Imm.getBitWidth() <= 64 &&
isInt<64>(Imm.getSExtValue())))
565 if (TyWidth == 32 || TyWidth == 64)
589 unsigned TotalHistCnts = 1;
599 unsigned EC = VTy->getElementCount().getKnownMinValue();
604 unsigned LegalEltSize = EltSize <= 32 ? 32 : 64;
606 if (EC == 2 || (LegalEltSize == 32 && EC == 4))
610 TotalHistCnts = EC / NaturalVectorWidth;
630 switch (ICA.
getID()) {
631 case Intrinsic::experimental_vector_histogram_add: {
638 case Intrinsic::umin:
639 case Intrinsic::umax:
640 case Intrinsic::smin:
641 case Intrinsic::smax: {
642 static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
643 MVT::v8i16, MVT::v2i32, MVT::v4i32,
644 MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
648 if (LT.second == MVT::v2i64)
654 case Intrinsic::scmp:
655 case Intrinsic::ucmp: {
657 {Intrinsic::scmp, MVT::i32, 3},
658 {Intrinsic::scmp, MVT::i64, 3},
659 {Intrinsic::scmp, MVT::v8i8, 3},
660 {Intrinsic::scmp, MVT::v16i8, 3},
661 {Intrinsic::scmp, MVT::v4i16, 3},
662 {Intrinsic::scmp, MVT::v8i16, 3},
663 {Intrinsic::scmp, MVT::v2i32, 3},
664 {Intrinsic::scmp, MVT::v4i32, 3},
665 {Intrinsic::scmp, MVT::v1i64, 3},
666 {Intrinsic::scmp, MVT::v2i64, 3},
672 return Entry->Cost * LT.first;
675 case Intrinsic::sadd_sat:
676 case Intrinsic::ssub_sat:
677 case Intrinsic::uadd_sat:
678 case Intrinsic::usub_sat: {
679 static const auto ValidSatTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
680 MVT::v8i16, MVT::v2i32, MVT::v4i32,
686 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits() ? 1 : 4;
688 return LT.first * Instrs;
693 if (ST->isSVEAvailable() && VectorSize >= 128 &&
isPowerOf2_64(VectorSize))
694 return LT.first * Instrs;
698 case Intrinsic::abs: {
699 static const auto ValidAbsTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
700 MVT::v8i16, MVT::v2i32, MVT::v4i32,
701 MVT::v2i64, MVT::nxv16i8, MVT::nxv8i16,
702 MVT::nxv4i32, MVT::nxv2i64};
708 case Intrinsic::bswap: {
709 static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
710 MVT::v4i32, MVT::v2i64};
713 LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
718 case Intrinsic::fmuladd: {
723 (EltTy->
isHalfTy() && ST->hasFullFP16()))
727 case Intrinsic::stepvector: {
736 Cost += AddCost * (LT.first - 1);
740 case Intrinsic::vector_extract:
741 case Intrinsic::vector_insert: {
754 bool IsExtract = ICA.
getID() == Intrinsic::vector_extract;
755 EVT SubVecVT = IsExtract ? getTLI()->getValueType(
DL, RetTy)
763 getTLI()->getTypeConversion(
C, SubVecVT);
765 getTLI()->getTypeConversion(
C, VecVT);
773 case Intrinsic::bitreverse: {
775 {Intrinsic::bitreverse, MVT::i32, 1},
776 {Intrinsic::bitreverse, MVT::i64, 1},
777 {Intrinsic::bitreverse, MVT::v8i8, 1},
778 {Intrinsic::bitreverse, MVT::v16i8, 1},
779 {Intrinsic::bitreverse, MVT::v4i16, 2},
780 {Intrinsic::bitreverse, MVT::v8i16, 2},
781 {Intrinsic::bitreverse, MVT::v2i32, 2},
782 {Intrinsic::bitreverse, MVT::v4i32, 2},
783 {Intrinsic::bitreverse, MVT::v1i64, 2},
784 {Intrinsic::bitreverse, MVT::v2i64, 2},
792 if (TLI->getValueType(
DL, RetTy,
true) == MVT::i8 ||
793 TLI->getValueType(
DL, RetTy,
true) == MVT::i16)
794 return LegalisationCost.first * Entry->Cost + 1;
796 return LegalisationCost.first * Entry->Cost;
800 case Intrinsic::ctpop: {
801 if (!ST->hasNEON()) {
822 RetTy->getScalarSizeInBits()
825 return LT.first * Entry->Cost + ExtraCost;
829 case Intrinsic::sadd_with_overflow:
830 case Intrinsic::uadd_with_overflow:
831 case Intrinsic::ssub_with_overflow:
832 case Intrinsic::usub_with_overflow:
833 case Intrinsic::smul_with_overflow:
834 case Intrinsic::umul_with_overflow: {
836 {Intrinsic::sadd_with_overflow, MVT::i8, 3},
837 {Intrinsic::uadd_with_overflow, MVT::i8, 3},
838 {Intrinsic::sadd_with_overflow, MVT::i16, 3},
839 {Intrinsic::uadd_with_overflow, MVT::i16, 3},
840 {Intrinsic::sadd_with_overflow, MVT::i32, 1},
841 {Intrinsic::uadd_with_overflow, MVT::i32, 1},
842 {Intrinsic::sadd_with_overflow, MVT::i64, 1},
843 {Intrinsic::uadd_with_overflow, MVT::i64, 1},
844 {Intrinsic::ssub_with_overflow, MVT::i8, 3},
845 {Intrinsic::usub_with_overflow, MVT::i8, 3},
846 {Intrinsic::ssub_with_overflow, MVT::i16, 3},
847 {Intrinsic::usub_with_overflow, MVT::i16, 3},
848 {Intrinsic::ssub_with_overflow, MVT::i32, 1},
849 {Intrinsic::usub_with_overflow, MVT::i32, 1},
850 {Intrinsic::ssub_with_overflow, MVT::i64, 1},
851 {Intrinsic::usub_with_overflow, MVT::i64, 1},
852 {Intrinsic::smul_with_overflow, MVT::i8, 5},
853 {Intrinsic::umul_with_overflow, MVT::i8, 4},
854 {Intrinsic::smul_with_overflow, MVT::i16, 5},
855 {Intrinsic::umul_with_overflow, MVT::i16, 4},
856 {Intrinsic::smul_with_overflow, MVT::i32, 2},
857 {Intrinsic::umul_with_overflow, MVT::i32, 2},
858 {Intrinsic::smul_with_overflow, MVT::i64, 3},
859 {Intrinsic::umul_with_overflow, MVT::i64, 3},
861 EVT MTy = TLI->getValueType(
DL, RetTy->getContainedType(0),
true);
868 case Intrinsic::fptosi_sat:
869 case Intrinsic::fptoui_sat: {
872 bool IsSigned = ICA.
getID() == Intrinsic::fptosi_sat;
874 EVT MTy = TLI->getValueType(
DL, RetTy);
877 if ((LT.second == MVT::f32 || LT.second == MVT::f64 ||
878 LT.second == MVT::v2f32 || LT.second == MVT::v4f32 ||
879 LT.second == MVT::v2f64)) {
881 (LT.second == MVT::f64 && MTy == MVT::i32) ||
882 (LT.second == MVT::f32 && MTy == MVT::i64)))
891 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
898 if ((LT.second == MVT::f16 && MTy == MVT::i32) ||
899 (LT.second == MVT::f16 && MTy == MVT::i64) ||
900 ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) &&
914 if ((LT.second.getScalarType() == MVT::f32 ||
915 LT.second.getScalarType() == MVT::f64 ||
916 LT.second.getScalarType() == MVT::f16) &&
920 if (LT.second.isVector())
924 LegalTy, {LegalTy, LegalTy});
927 LegalTy, {LegalTy, LegalTy});
929 return LT.first *
Cost +
930 ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0
936 RetTy = RetTy->getScalarType();
937 if (LT.second.isVector()) {
955 return LT.first *
Cost;
957 case Intrinsic::fshl:
958 case Intrinsic::fshr: {
967 if (RetTy->isIntegerTy() && ICA.
getArgs()[0] == ICA.
getArgs()[1] &&
968 (RetTy->getPrimitiveSizeInBits() == 32 ||
969 RetTy->getPrimitiveSizeInBits() == 64)) {
982 {Intrinsic::fshl, MVT::v4i32, 2},
983 {Intrinsic::fshl, MVT::v2i64, 2}, {Intrinsic::fshl, MVT::v16i8, 2},
984 {Intrinsic::fshl, MVT::v8i16, 2}, {Intrinsic::fshl, MVT::v2i32, 2},
985 {Intrinsic::fshl, MVT::v8i8, 2}, {Intrinsic::fshl, MVT::v4i16, 2}};
991 return LegalisationCost.first * Entry->Cost;
995 if (!RetTy->isIntegerTy())
1000 bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
1001 RetTy->getScalarSizeInBits() < 64) ||
1002 (RetTy->getScalarSizeInBits() % 64 != 0);
1003 unsigned ExtraCost = HigherCost ? 1 : 0;
1004 if (RetTy->getScalarSizeInBits() == 32 ||
1005 RetTy->getScalarSizeInBits() == 64)
1008 else if (HigherCost)
1012 return TyL.first + ExtraCost;
1014 case Intrinsic::get_active_lane_mask: {
1016 EVT RetVT = getTLI()->getValueType(
DL, RetTy);
1018 if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT))
1021 if (RetTy->isScalableTy()) {
1022 if (TLI->getTypeAction(RetTy->getContext(), RetVT) !=
1032 if (ST->hasSVE2p1() || ST->hasSME2()) {
1047 return Cost + (SplitCost * (
Cost - 1));
1062 case Intrinsic::experimental_vector_match: {
1065 unsigned SearchSize = NeedleTy->getNumElements();
1066 if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
1079 case Intrinsic::experimental_cttz_elts: {
1081 if (!getTLI()->shouldExpandCttzElements(ArgVT)) {
1089 case Intrinsic::loop_dependence_raw_mask:
1090 case Intrinsic::loop_dependence_war_mask: {
1092 if (ST->hasSVE2() || ST->hasSME()) {
1093 EVT VecVT = getTLI()->getValueType(
DL, RetTy);
1094 unsigned EltSizeInBytes =
1104 case Intrinsic::experimental_vector_extract_last_active:
1105 if (ST->isSVEorStreamingSVEAvailable()) {
1111 case Intrinsic::pow: {
1112 EVT VT = getTLI()->getValueType(
DL, RetTy);
1114 if (getTLI()->getLibcallImpl(LC) != RTLIB::Unsupported)
1118 case Intrinsic::sqrt:
1119 case Intrinsic::fabs:
1120 case Intrinsic::ceil:
1121 case Intrinsic::floor:
1122 case Intrinsic::nearbyint:
1123 case Intrinsic::round:
1124 case Intrinsic::rint:
1125 case Intrinsic::roundeven:
1126 case Intrinsic::trunc:
1127 case Intrinsic::minnum:
1128 case Intrinsic::maxnum:
1129 case Intrinsic::minimum:
1130 case Intrinsic::maximum: {
1148 auto RequiredType =
II.getType();
1151 assert(PN &&
"Expected Phi Node!");
1154 if (!PN->hasOneUse())
1155 return std::nullopt;
1157 for (
Value *IncValPhi : PN->incoming_values()) {
1160 Reinterpret->getIntrinsicID() !=
1161 Intrinsic::aarch64_sve_convert_to_svbool ||
1162 RequiredType != Reinterpret->getArgOperand(0)->getType())
1163 return std::nullopt;
1171 for (
unsigned I = 0;
I < PN->getNumIncomingValues();
I++) {
1173 NPN->
addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(
I));
1246 return GoverningPredicateIdx != std::numeric_limits<unsigned>::max();
1251 return GoverningPredicateIdx;
1256 GoverningPredicateIdx = Index;
1274 return UndefIntrinsic;
1279 UndefIntrinsic = IID;
1301 return ResultLanes == InactiveLanesTakenFromOperand;
1306 return OperandIdxForInactiveLanes;
1310 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1311 ResultLanes = InactiveLanesTakenFromOperand;
1312 OperandIdxForInactiveLanes = Index;
1317 return ResultLanes == InactiveLanesAreNotDefined;
1321 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1322 ResultLanes = InactiveLanesAreNotDefined;
1327 return ResultLanes == InactiveLanesAreUnused;
1331 assert(ResultLanes == Uninitialized &&
"Cannot set property twice!");
1332 ResultLanes = InactiveLanesAreUnused;
1342 ResultIsZeroInitialized =
true;
1353 return OperandIdxWithNoActiveLanes != std::numeric_limits<unsigned>::max();
1358 return OperandIdxWithNoActiveLanes;
1363 OperandIdxWithNoActiveLanes = Index;
1368 unsigned GoverningPredicateIdx = std::numeric_limits<unsigned>::max();
1371 unsigned IROpcode = 0;
1373 enum PredicationStyle {
1375 InactiveLanesTakenFromOperand,
1376 InactiveLanesAreNotDefined,
1377 InactiveLanesAreUnused
1380 bool ResultIsZeroInitialized =
false;
1381 unsigned OperandIdxForInactiveLanes = std::numeric_limits<unsigned>::max();
1382 unsigned OperandIdxWithNoActiveLanes = std::numeric_limits<unsigned>::max();
1390 return !isa<ScalableVectorType>(V->getType());
1398 case Intrinsic::aarch64_sve_fcvt_bf16f32_v2:
1399 case Intrinsic::aarch64_sve_fcvt_f16f32:
1400 case Intrinsic::aarch64_sve_fcvt_f16f64:
1401 case Intrinsic::aarch64_sve_fcvt_f32f16:
1402 case Intrinsic::aarch64_sve_fcvt_f32f64:
1403 case Intrinsic::aarch64_sve_fcvt_f64f16:
1404 case Intrinsic::aarch64_sve_fcvt_f64f32:
1405 case Intrinsic::aarch64_sve_fcvtlt_f32f16:
1406 case Intrinsic::aarch64_sve_fcvtlt_f64f32:
1407 case Intrinsic::aarch64_sve_fcvtx_f32f64:
1408 case Intrinsic::aarch64_sve_fcvtzs:
1409 case Intrinsic::aarch64_sve_fcvtzs_i32f16:
1410 case Intrinsic::aarch64_sve_fcvtzs_i32f64:
1411 case Intrinsic::aarch64_sve_fcvtzs_i64f16:
1412 case Intrinsic::aarch64_sve_fcvtzs_i64f32:
1413 case Intrinsic::aarch64_sve_fcvtzu:
1414 case Intrinsic::aarch64_sve_fcvtzu_i32f16:
1415 case Intrinsic::aarch64_sve_fcvtzu_i32f64:
1416 case Intrinsic::aarch64_sve_fcvtzu_i64f16:
1417 case Intrinsic::aarch64_sve_fcvtzu_i64f32:
1418 case Intrinsic::aarch64_sve_scvtf:
1419 case Intrinsic::aarch64_sve_scvtf_f16i32:
1420 case Intrinsic::aarch64_sve_scvtf_f16i64:
1421 case Intrinsic::aarch64_sve_scvtf_f32i64:
1422 case Intrinsic::aarch64_sve_scvtf_f64i32:
1423 case Intrinsic::aarch64_sve_ucvtf:
1424 case Intrinsic::aarch64_sve_ucvtf_f16i32:
1425 case Intrinsic::aarch64_sve_ucvtf_f16i64:
1426 case Intrinsic::aarch64_sve_ucvtf_f32i64:
1427 case Intrinsic::aarch64_sve_ucvtf_f64i32:
1430 case Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2:
1431 case Intrinsic::aarch64_sve_fcvtnt_f16f32:
1432 case Intrinsic::aarch64_sve_fcvtnt_f32f64:
1433 case Intrinsic::aarch64_sve_fcvtxnt_f32f64:
1436 case Intrinsic::aarch64_sve_fabd:
1438 case Intrinsic::aarch64_sve_fadd:
1441 case Intrinsic::aarch64_sve_fdiv:
1444 case Intrinsic::aarch64_sve_fmax:
1446 case Intrinsic::aarch64_sve_fmaxnm:
1448 case Intrinsic::aarch64_sve_fmin:
1450 case Intrinsic::aarch64_sve_fminnm:
1452 case Intrinsic::aarch64_sve_fmla:
1454 case Intrinsic::aarch64_sve_fmls:
1456 case Intrinsic::aarch64_sve_fmul:
1459 case Intrinsic::aarch64_sve_fmulx:
1461 case Intrinsic::aarch64_sve_fnmla:
1463 case Intrinsic::aarch64_sve_fnmls:
1465 case Intrinsic::aarch64_sve_fsub:
1468 case Intrinsic::aarch64_sve_add:
1471 case Intrinsic::aarch64_sve_mla:
1473 case Intrinsic::aarch64_sve_mls:
1475 case Intrinsic::aarch64_sve_mul:
1478 case Intrinsic::aarch64_sve_sabd:
1480 case Intrinsic::aarch64_sve_sdiv:
1483 case Intrinsic::aarch64_sve_smax:
1485 case Intrinsic::aarch64_sve_smin:
1487 case Intrinsic::aarch64_sve_smulh:
1489 case Intrinsic::aarch64_sve_sub:
1492 case Intrinsic::aarch64_sve_uabd:
1494 case Intrinsic::aarch64_sve_udiv:
1497 case Intrinsic::aarch64_sve_umax:
1499 case Intrinsic::aarch64_sve_umin:
1501 case Intrinsic::aarch64_sve_umulh:
1503 case Intrinsic::aarch64_sve_asr:
1506 case Intrinsic::aarch64_sve_lsl:
1509 case Intrinsic::aarch64_sve_lsr:
1512 case Intrinsic::aarch64_sve_and:
1515 case Intrinsic::aarch64_sve_bic:
1517 case Intrinsic::aarch64_sve_eor:
1520 case Intrinsic::aarch64_sve_orr:
1523 case Intrinsic::aarch64_sve_shsub:
1525 case Intrinsic::aarch64_sve_shsubr:
1527 case Intrinsic::aarch64_sve_sqrshl:
1529 case Intrinsic::aarch64_sve_sqshl:
1531 case Intrinsic::aarch64_sve_sqsub:
1533 case Intrinsic::aarch64_sve_srshl:
1535 case Intrinsic::aarch64_sve_uhsub:
1537 case Intrinsic::aarch64_sve_uhsubr:
1539 case Intrinsic::aarch64_sve_uqrshl:
1541 case Intrinsic::aarch64_sve_uqshl:
1543 case Intrinsic::aarch64_sve_uqsub:
1545 case Intrinsic::aarch64_sve_urshl:
1548 case Intrinsic::aarch64_sve_add_u:
1551 case Intrinsic::aarch64_sve_and_u:
1554 case Intrinsic::aarch64_sve_asr_u:
1557 case Intrinsic::aarch64_sve_eor_u:
1560 case Intrinsic::aarch64_sve_fadd_u:
1563 case Intrinsic::aarch64_sve_fdiv_u:
1566 case Intrinsic::aarch64_sve_fmul_u:
1569 case Intrinsic::aarch64_sve_fsub_u:
1572 case Intrinsic::aarch64_sve_lsl_u:
1575 case Intrinsic::aarch64_sve_lsr_u:
1578 case Intrinsic::aarch64_sve_mul_u:
1581 case Intrinsic::aarch64_sve_orr_u:
1584 case Intrinsic::aarch64_sve_sdiv_u:
1587 case Intrinsic::aarch64_sve_sub_u:
1590 case Intrinsic::aarch64_sve_udiv_u:
1594 case Intrinsic::aarch64_sve_addqv:
1595 case Intrinsic::aarch64_sve_and_z:
1596 case Intrinsic::aarch64_sve_bic_z:
1597 case Intrinsic::aarch64_sve_brka_z:
1598 case Intrinsic::aarch64_sve_brkb_z:
1599 case Intrinsic::aarch64_sve_brkn_z:
1600 case Intrinsic::aarch64_sve_brkpa_z:
1601 case Intrinsic::aarch64_sve_brkpb_z:
1602 case Intrinsic::aarch64_sve_cntp:
1603 case Intrinsic::aarch64_sve_compact:
1604 case Intrinsic::aarch64_sve_eor_z:
1605 case Intrinsic::aarch64_sve_eorv:
1606 case Intrinsic::aarch64_sve_eorqv:
1607 case Intrinsic::aarch64_sve_nand_z:
1608 case Intrinsic::aarch64_sve_nor_z:
1609 case Intrinsic::aarch64_sve_orn_z:
1610 case Intrinsic::aarch64_sve_orr_z:
1611 case Intrinsic::aarch64_sve_orv:
1612 case Intrinsic::aarch64_sve_orqv:
1613 case Intrinsic::aarch64_sve_pnext:
1614 case Intrinsic::aarch64_sve_rdffr_z:
1615 case Intrinsic::aarch64_sve_saddv:
1616 case Intrinsic::aarch64_sve_uaddv:
1617 case Intrinsic::aarch64_sve_umaxv:
1618 case Intrinsic::aarch64_sve_umaxqv:
1619 case Intrinsic::aarch64_sve_cmpeq:
1620 case Intrinsic::aarch64_sve_cmpeq_wide:
1621 case Intrinsic::aarch64_sve_cmpge:
1622 case Intrinsic::aarch64_sve_cmpge_wide:
1623 case Intrinsic::aarch64_sve_cmpgt:
1624 case Intrinsic::aarch64_sve_cmpgt_wide:
1625 case Intrinsic::aarch64_sve_cmphi:
1626 case Intrinsic::aarch64_sve_cmphi_wide:
1627 case Intrinsic::aarch64_sve_cmphs:
1628 case Intrinsic::aarch64_sve_cmphs_wide:
1629 case Intrinsic::aarch64_sve_cmple_wide:
1630 case Intrinsic::aarch64_sve_cmplo_wide:
1631 case Intrinsic::aarch64_sve_cmpls_wide:
1632 case Intrinsic::aarch64_sve_cmplt_wide:
1633 case Intrinsic::aarch64_sve_cmpne:
1634 case Intrinsic::aarch64_sve_cmpne_wide:
1635 case Intrinsic::aarch64_sve_facge:
1636 case Intrinsic::aarch64_sve_facgt:
1637 case Intrinsic::aarch64_sve_fcmpeq:
1638 case Intrinsic::aarch64_sve_fcmpge:
1639 case Intrinsic::aarch64_sve_fcmpgt:
1640 case Intrinsic::aarch64_sve_fcmpne:
1641 case Intrinsic::aarch64_sve_fcmpuo:
1642 case Intrinsic::aarch64_sve_ld1:
1643 case Intrinsic::aarch64_sve_ld1_gather:
1644 case Intrinsic::aarch64_sve_ld1_gather_index:
1645 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
1646 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
1647 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
1648 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
1649 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
1650 case Intrinsic::aarch64_sve_ld1q_gather_index:
1651 case Intrinsic::aarch64_sve_ld1q_gather_scalar_offset:
1652 case Intrinsic::aarch64_sve_ld1q_gather_vector_offset:
1653 case Intrinsic::aarch64_sve_ld1ro:
1654 case Intrinsic::aarch64_sve_ld1rq:
1655 case Intrinsic::aarch64_sve_ld1udq:
1656 case Intrinsic::aarch64_sve_ld1uwq:
1657 case Intrinsic::aarch64_sve_ld2_sret:
1658 case Intrinsic::aarch64_sve_ld2q_sret:
1659 case Intrinsic::aarch64_sve_ld3_sret:
1660 case Intrinsic::aarch64_sve_ld3q_sret:
1661 case Intrinsic::aarch64_sve_ld4_sret:
1662 case Intrinsic::aarch64_sve_ld4q_sret:
1663 case Intrinsic::aarch64_sve_ldff1:
1664 case Intrinsic::aarch64_sve_ldff1_gather:
1665 case Intrinsic::aarch64_sve_ldff1_gather_index:
1666 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
1667 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
1668 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
1669 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
1670 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
1671 case Intrinsic::aarch64_sve_ldnf1:
1672 case Intrinsic::aarch64_sve_ldnt1:
1673 case Intrinsic::aarch64_sve_ldnt1_gather:
1674 case Intrinsic::aarch64_sve_ldnt1_gather_index:
1675 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
1676 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
1679 case Intrinsic::aarch64_sve_prf:
1680 case Intrinsic::aarch64_sve_prfb_gather_index:
1681 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
1682 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
1683 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
1684 case Intrinsic::aarch64_sve_prfd_gather_index:
1685 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
1686 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
1687 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
1688 case Intrinsic::aarch64_sve_prfh_gather_index:
1689 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
1690 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
1691 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
1692 case Intrinsic::aarch64_sve_prfw_gather_index:
1693 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
1694 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
1695 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
1698 case Intrinsic::aarch64_sve_st1_scatter:
1699 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
1700 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
1701 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
1702 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
1703 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
1704 case Intrinsic::aarch64_sve_st1dq:
1705 case Intrinsic::aarch64_sve_st1q_scatter_index:
1706 case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset:
1707 case Intrinsic::aarch64_sve_st1q_scatter_vector_offset:
1708 case Intrinsic::aarch64_sve_st1wq:
1709 case Intrinsic::aarch64_sve_stnt1:
1710 case Intrinsic::aarch64_sve_stnt1_scatter:
1711 case Intrinsic::aarch64_sve_stnt1_scatter_index:
1712 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
1713 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
1715 case Intrinsic::aarch64_sve_st2:
1716 case Intrinsic::aarch64_sve_st2q:
1718 case Intrinsic::aarch64_sve_st3:
1719 case Intrinsic::aarch64_sve_st3q:
1721 case Intrinsic::aarch64_sve_st4:
1722 case Intrinsic::aarch64_sve_st4q:
1730 Value *UncastedPred;
1736 Pred = UncastedPred;
1742 if (OrigPredTy->getMinNumElements() <=
1744 ->getMinNumElements())
1745 Pred = UncastedPred;
1749 return C &&
C->isAllOnesValue();
1756 if (Dup && Dup->getIntrinsicID() == Intrinsic::aarch64_sve_dup &&
1757 Dup->getOperand(1) == Pg &&
isa<Constant>(Dup->getOperand(2)))
1765static std::optional<Instruction *>
1772 Value *Op1 =
II.getOperand(1);
1773 Value *Op2 =
II.getOperand(2);
1799 return std::nullopt;
1807 if (SimpleII == Inactive)
1817static std::optional<Instruction *>
1821 return std::nullopt;
1850 II.setCalledFunction(NewDecl);
1860 return std::nullopt;
1872static std::optional<Instruction *>
1876 return std::nullopt;
1878 auto IntrinsicID = BinOp->getIntrinsicID();
1879 switch (IntrinsicID) {
1880 case Intrinsic::aarch64_sve_and_z:
1881 case Intrinsic::aarch64_sve_bic_z:
1882 case Intrinsic::aarch64_sve_eor_z:
1883 case Intrinsic::aarch64_sve_nand_z:
1884 case Intrinsic::aarch64_sve_nor_z:
1885 case Intrinsic::aarch64_sve_orn_z:
1886 case Intrinsic::aarch64_sve_orr_z:
1889 return std::nullopt;
1892 auto BinOpPred = BinOp->getOperand(0);
1893 auto BinOpOp1 = BinOp->getOperand(1);
1894 auto BinOpOp2 = BinOp->getOperand(2);
1898 PredIntr->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool)
1899 return std::nullopt;
1901 auto PredOp = PredIntr->getOperand(0);
1903 if (PredOpTy !=
II.getType())
1904 return std::nullopt;
1908 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
1909 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1910 if (BinOpOp1 == BinOpOp2)
1911 NarrowedBinOpArgs.
push_back(NarrowBinOpOp1);
1914 Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
1916 auto NarrowedBinOp =
1921static std::optional<Instruction *>
1928 return BinOpCombine;
1933 return std::nullopt;
1936 Value *Cursor =
II.getOperand(0), *EarliestReplacement =
nullptr;
1945 if (CursorVTy->getElementCount().getKnownMinValue() <
1946 IVTy->getElementCount().getKnownMinValue())
1950 if (Cursor->getType() == IVTy)
1951 EarliestReplacement = Cursor;
1956 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() ==
1957 Intrinsic::aarch64_sve_convert_to_svbool ||
1958 IntrinsicCursor->getIntrinsicID() ==
1959 Intrinsic::aarch64_sve_convert_from_svbool))
1962 CandidatesForRemoval.
insert(CandidatesForRemoval.
begin(), IntrinsicCursor);
1963 Cursor = IntrinsicCursor->getOperand(0);
1968 if (!EarliestReplacement)
1969 return std::nullopt;
1977 auto *OpPredicate =
II.getOperand(0);
1994 II.getArgOperand(2));
2000 return std::nullopt;
2004 II.getArgOperand(0),
II.getArgOperand(2),
uint64_t(0));
2013 II.getArgOperand(0));
2023 return std::nullopt;
2028 if (!SplatValue || !SplatValue->isZero())
2029 return std::nullopt;
2034 DupQLane->getIntrinsicID() != Intrinsic::aarch64_sve_dupq_lane)
2035 return std::nullopt;
2039 if (!DupQLaneIdx || !DupQLaneIdx->isZero())
2040 return std::nullopt;
2043 if (!VecIns || VecIns->getIntrinsicID() != Intrinsic::vector_insert)
2044 return std::nullopt;
2049 return std::nullopt;
2052 return std::nullopt;
2056 return std::nullopt;
2060 if (!VecTy || !OutTy || VecTy->getNumElements() != OutTy->getMinNumElements())
2061 return std::nullopt;
2063 unsigned NumElts = VecTy->getNumElements();
2064 unsigned PredicateBits = 0;
2067 for (
unsigned I = 0;
I < NumElts; ++
I) {
2070 return std::nullopt;
2072 PredicateBits |= 1 << (
I * (16 / NumElts));
2076 if (PredicateBits == 0) {
2078 PFalse->takeName(&
II);
2084 for (
unsigned I = 0;
I < 16; ++
I)
2085 if ((PredicateBits & (1 <<
I)) != 0)
2088 unsigned PredSize = Mask & -Mask;
2093 for (
unsigned I = 0;
I < 16;
I += PredSize)
2094 if ((PredicateBits & (1 <<
I)) == 0)
2095 return std::nullopt;
2100 {PredType}, {PTruePat});
2102 Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
2103 auto *ConvertFromSVBool =
2105 {
II.getType()}, {ConvertToSVBool});
2113 Value *Pg =
II.getArgOperand(0);
2114 Value *Vec =
II.getArgOperand(1);
2115 auto IntrinsicID =
II.getIntrinsicID();
2116 bool IsAfter = IntrinsicID == Intrinsic::aarch64_sve_lasta;
2128 auto OpC = OldBinOp->getOpcode();
2134 OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(),
II.getIterator());
2140 if (IsAfter &&
C &&
C->isNullValue()) {
2144 Extract->insertBefore(
II.getIterator());
2145 Extract->takeName(&
II);
2151 return std::nullopt;
2153 if (IntrPG->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
2154 return std::nullopt;
2156 const auto PTruePattern =
2162 return std::nullopt;
2164 unsigned Idx = MinNumElts - 1;
2174 if (Idx >= PgVTy->getMinNumElements())
2175 return std::nullopt;
2180 Extract->insertBefore(
II.getIterator());
2181 Extract->takeName(&
II);
2194 Value *Pg =
II.getArgOperand(0);
2196 Value *Vec =
II.getArgOperand(2);
2199 if (!Ty->isIntegerTy())
2200 return std::nullopt;
2205 return std::nullopt;
2222 II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
2235 {
II.getType()}, {AllPat});
2242static std::optional<Instruction *>
2246 if (
Pattern == AArch64SVEPredPattern::all) {
2255 return MinNumElts && NumElts >= MinNumElts
2257 II, ConstantInt::get(
II.getType(), MinNumElts)))
2261static std::optional<Instruction *>
2264 if (!ST->isStreaming())
2265 return std::nullopt;
2277 Value *PgVal =
II.getArgOperand(0);
2278 Value *OpVal =
II.getArgOperand(1);
2282 if (PgVal == OpVal &&
2283 (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_first ||
2284 II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_last)) {
2299 return std::nullopt;
2303 if (Pg->
getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool &&
2304 OpIID == Intrinsic::aarch64_sve_convert_to_svbool &&
2318 if ((Pg ==
Op) && (
II.getIntrinsicID() == Intrinsic::aarch64_sve_ptest_any) &&
2319 ((OpIID == Intrinsic::aarch64_sve_brka_z) ||
2320 (OpIID == Intrinsic::aarch64_sve_brkb_z) ||
2321 (OpIID == Intrinsic::aarch64_sve_brkpa_z) ||
2322 (OpIID == Intrinsic::aarch64_sve_brkpb_z) ||
2323 (OpIID == Intrinsic::aarch64_sve_rdffr_z) ||
2324 (OpIID == Intrinsic::aarch64_sve_and_z) ||
2325 (OpIID == Intrinsic::aarch64_sve_bic_z) ||
2326 (OpIID == Intrinsic::aarch64_sve_eor_z) ||
2327 (OpIID == Intrinsic::aarch64_sve_nand_z) ||
2328 (OpIID == Intrinsic::aarch64_sve_nor_z) ||
2329 (OpIID == Intrinsic::aarch64_sve_orn_z) ||
2330 (OpIID == Intrinsic::aarch64_sve_orr_z))) {
2340 return std::nullopt;
2343template <Intrinsic::ID MulOpc, Intrinsic::ID FuseOpc>
2344static std::optional<Instruction *>
2346 bool MergeIntoAddendOp) {
2348 Value *MulOp0, *MulOp1, *AddendOp, *
Mul;
2349 if (MergeIntoAddendOp) {
2350 AddendOp =
II.getOperand(1);
2351 Mul =
II.getOperand(2);
2353 AddendOp =
II.getOperand(2);
2354 Mul =
II.getOperand(1);
2359 return std::nullopt;
2361 if (!
Mul->hasOneUse())
2362 return std::nullopt;
2365 if (
II.getType()->isFPOrFPVectorTy()) {
2370 return std::nullopt;
2372 return std::nullopt;
2377 if (MergeIntoAddendOp)
2387static std::optional<Instruction *>
2389 Value *Pred =
II.getOperand(0);
2390 Value *PtrOp =
II.getOperand(1);
2391 Type *VecTy =
II.getType();
2395 Load->copyMetadata(
II);
2406static std::optional<Instruction *>
2408 Value *VecOp =
II.getOperand(0);
2409 Value *Pred =
II.getOperand(1);
2410 Value *PtrOp =
II.getOperand(2);
2414 Store->copyMetadata(
II);
2426 case Intrinsic::aarch64_sve_fmul_u:
2427 return Instruction::BinaryOps::FMul;
2428 case Intrinsic::aarch64_sve_fadd_u:
2429 return Instruction::BinaryOps::FAdd;
2430 case Intrinsic::aarch64_sve_fsub_u:
2431 return Instruction::BinaryOps::FSub;
2433 return Instruction::BinaryOpsEnd;
2437static std::optional<Instruction *>
2440 if (
II.isStrictFP())
2441 return std::nullopt;
2443 auto *OpPredicate =
II.getOperand(0);
2445 if (BinOpCode == Instruction::BinaryOpsEnd ||
2447 return std::nullopt;
2449 BinOpCode,
II.getOperand(1),
II.getOperand(2),
II.getFastMathFlags());
2456 Intrinsic::aarch64_sve_mla>(
2460 Intrinsic::aarch64_sve_mad>(
2463 return std::nullopt;
2466static std::optional<Instruction *>
2470 Intrinsic::aarch64_sve_fmla>(IC,
II,
2475 Intrinsic::aarch64_sve_fmad>(IC,
II,
2480 Intrinsic::aarch64_sve_fmla>(IC,
II,
2483 return std::nullopt;
2486static std::optional<Instruction *>
2490 Intrinsic::aarch64_sve_fmla>(IC,
II,
2495 Intrinsic::aarch64_sve_fmad>(IC,
II,
2500 Intrinsic::aarch64_sve_fmla_u>(
2506static std::optional<Instruction *>
2510 Intrinsic::aarch64_sve_fmls>(IC,
II,
2515 Intrinsic::aarch64_sve_fnmsb>(
2520 Intrinsic::aarch64_sve_fmls>(IC,
II,
2523 return std::nullopt;
2526static std::optional<Instruction *>
2530 Intrinsic::aarch64_sve_fmls>(IC,
II,
2535 Intrinsic::aarch64_sve_fnmsb>(
2540 Intrinsic::aarch64_sve_fmls_u>(
2549 Intrinsic::aarch64_sve_mls>(
2552 return std::nullopt;
2557 Value *UnpackArg =
II.getArgOperand(0);
2559 bool IsSigned =
II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
2560 II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpklo;
2573 return std::nullopt;
2577 auto *OpVal =
II.getOperand(0);
2578 auto *OpIndices =
II.getOperand(1);
2585 SplatValue->getValue().uge(VTy->getElementCount().getKnownMinValue()))
2586 return std::nullopt;
2601 Type *RetTy =
II.getType();
2602 constexpr Intrinsic::ID FromSVB = Intrinsic::aarch64_sve_convert_from_svbool;
2603 constexpr Intrinsic::ID ToSVB = Intrinsic::aarch64_sve_convert_to_svbool;
2607 if ((
match(
II.getArgOperand(0),
2614 if (TyA ==
B->getType() &&
2619 TyA->getMinNumElements());
2625 return std::nullopt;
2633 if (
match(
II.getArgOperand(0),
2638 II, (
II.getIntrinsicID() == Intrinsic::aarch64_sve_zip1 ?
A :
B));
2640 return std::nullopt;
2643static std::optional<Instruction *>
2645 Value *Mask =
II.getOperand(0);
2646 Value *BasePtr =
II.getOperand(1);
2647 Value *Index =
II.getOperand(2);
2658 BasePtr->getPointerAlignment(
II.getDataLayout());
2661 BasePtr, IndexBase);
2668 return std::nullopt;
2671static std::optional<Instruction *>
2673 Value *Val =
II.getOperand(0);
2674 Value *Mask =
II.getOperand(1);
2675 Value *BasePtr =
II.getOperand(2);
2676 Value *Index =
II.getOperand(3);
2686 BasePtr->getPointerAlignment(
II.getDataLayout());
2689 BasePtr, IndexBase);
2695 return std::nullopt;
2701 Value *Pred =
II.getOperand(0);
2702 Value *Vec =
II.getOperand(1);
2703 Value *DivVec =
II.getOperand(2);
2707 if (!SplatConstantInt)
2708 return std::nullopt;
2712 if (DivisorValue == -1)
2713 return std::nullopt;
2714 if (DivisorValue == 1)
2720 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2727 Intrinsic::aarch64_sve_asrd, {
II.getType()}, {Pred, Vec, DivisorLog2});
2729 Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
2733 return std::nullopt;
2737 size_t VecSize = Vec.
size();
2742 size_t HalfVecSize = VecSize / 2;
2746 if (*
LHS !=
nullptr && *
RHS !=
nullptr) {
2754 if (*
LHS ==
nullptr && *
RHS !=
nullptr)
2772 return std::nullopt;
2779 Elts[Idx->getValue().getZExtValue()] = InsertElt->getOperand(1);
2780 CurrentInsertElt = InsertElt->getOperand(0);
2786 return std::nullopt;
2790 for (
size_t I = 0;
I < Elts.
size();
I++) {
2791 if (Elts[
I] ==
nullptr)
2796 if (InsertEltChain ==
nullptr)
2797 return std::nullopt;
2803 unsigned PatternWidth = IIScalableTy->getScalarSizeInBits() * Elts.
size();
2804 unsigned PatternElementCount = IIScalableTy->getScalarSizeInBits() *
2805 IIScalableTy->getMinNumElements() /
2810 auto *WideShuffleMaskTy =
2821 auto NarrowBitcast =
2834 return std::nullopt;
2839 Value *Pred =
II.getOperand(0);
2840 Value *Vec =
II.getOperand(1);
2841 Value *Shift =
II.getOperand(2);
2844 Value *AbsPred, *MergedValue;
2850 return std::nullopt;
2858 return std::nullopt;
2863 return std::nullopt;
2866 {
II.getType()}, {Pred, Vec, Shift});
2873 Value *Vec =
II.getOperand(0);
2878 return std::nullopt;
2884 auto *NI =
II.getNextNode();
2887 return !
I->mayReadOrWriteMemory() && !
I->mayHaveSideEffects();
2889 while (LookaheadThreshold-- && CanSkipOver(NI)) {
2890 auto *NIBB = NI->getParent();
2891 NI = NI->getNextNode();
2893 if (
auto *SuccBB = NIBB->getUniqueSuccessor())
2894 NI = &*SuccBB->getFirstNonPHIOrDbgOrLifetime();
2900 if (NextII &&
II.isIdenticalTo(NextII))
2903 return std::nullopt;
2911 {II.getType(), II.getOperand(0)->getType()},
2912 {II.getOperand(0), II.getOperand(1)}));
2919 return std::nullopt;
2925 Value *Passthru =
II.getOperand(0);
2933 auto *Mask = ConstantInt::get(Ty, MaskValue);
2939 return std::nullopt;
2942static std::optional<Instruction *>
2949 return std::nullopt;
2952std::optional<Instruction *>
2963 case Intrinsic::aarch64_dmb:
2965 case Intrinsic::aarch64_neon_fmaxnm:
2966 case Intrinsic::aarch64_neon_fminnm:
2968 case Intrinsic::aarch64_sve_convert_from_svbool:
2970 case Intrinsic::aarch64_sve_dup:
2972 case Intrinsic::aarch64_sve_dup_x:
2974 case Intrinsic::aarch64_sve_cmpne:
2975 case Intrinsic::aarch64_sve_cmpne_wide:
2977 case Intrinsic::aarch64_sve_rdffr:
2979 case Intrinsic::aarch64_sve_lasta:
2980 case Intrinsic::aarch64_sve_lastb:
2982 case Intrinsic::aarch64_sve_clasta_n:
2983 case Intrinsic::aarch64_sve_clastb_n:
2985 case Intrinsic::aarch64_sve_cntd:
2987 case Intrinsic::aarch64_sve_cntw:
2989 case Intrinsic::aarch64_sve_cnth:
2991 case Intrinsic::aarch64_sve_cntb:
2993 case Intrinsic::aarch64_sme_cntsd:
2995 case Intrinsic::aarch64_sve_ptest_any:
2996 case Intrinsic::aarch64_sve_ptest_first:
2997 case Intrinsic::aarch64_sve_ptest_last:
2999 case Intrinsic::aarch64_sve_fadd:
3001 case Intrinsic::aarch64_sve_fadd_u:
3003 case Intrinsic::aarch64_sve_fmul_u:
3005 case Intrinsic::aarch64_sve_fsub:
3007 case Intrinsic::aarch64_sve_fsub_u:
3009 case Intrinsic::aarch64_sve_add:
3011 case Intrinsic::aarch64_sve_add_u:
3013 Intrinsic::aarch64_sve_mla_u>(
3015 case Intrinsic::aarch64_sve_sub:
3017 case Intrinsic::aarch64_sve_sub_u:
3019 Intrinsic::aarch64_sve_mls_u>(
3021 case Intrinsic::aarch64_sve_tbl:
3023 case Intrinsic::aarch64_sve_uunpkhi:
3024 case Intrinsic::aarch64_sve_uunpklo:
3025 case Intrinsic::aarch64_sve_sunpkhi:
3026 case Intrinsic::aarch64_sve_sunpklo:
3028 case Intrinsic::aarch64_sve_uzp1:
3030 case Intrinsic::aarch64_sve_zip1:
3031 case Intrinsic::aarch64_sve_zip2:
3033 case Intrinsic::aarch64_sve_ld1_gather_index:
3035 case Intrinsic::aarch64_sve_st1_scatter_index:
3037 case Intrinsic::aarch64_sve_ld1:
3039 case Intrinsic::aarch64_sve_st1:
3041 case Intrinsic::aarch64_sve_sdiv:
3043 case Intrinsic::aarch64_sve_sel:
3045 case Intrinsic::aarch64_sve_srshl:
3047 case Intrinsic::aarch64_sve_dupq_lane:
3049 case Intrinsic::aarch64_sve_insr:
3051 case Intrinsic::aarch64_sve_whilelo:
3053 case Intrinsic::aarch64_sve_ptrue:
3055 case Intrinsic::aarch64_sve_uxtb:
3057 case Intrinsic::aarch64_sve_uxth:
3059 case Intrinsic::aarch64_sve_uxtw:
3061 case Intrinsic::aarch64_sme_in_streaming_mode:
3065 return std::nullopt;
3072 SimplifyAndSetOp)
const {
3073 switch (
II.getIntrinsicID()) {
3076 case Intrinsic::aarch64_neon_fcvtxn:
3077 case Intrinsic::aarch64_neon_rshrn:
3078 case Intrinsic::aarch64_neon_sqrshrn:
3079 case Intrinsic::aarch64_neon_sqrshrun:
3080 case Intrinsic::aarch64_neon_sqshrn:
3081 case Intrinsic::aarch64_neon_sqshrun:
3082 case Intrinsic::aarch64_neon_sqxtn:
3083 case Intrinsic::aarch64_neon_sqxtun:
3084 case Intrinsic::aarch64_neon_uqrshrn:
3085 case Intrinsic::aarch64_neon_uqshrn:
3086 case Intrinsic::aarch64_neon_uqxtn:
3087 SimplifyAndSetOp(&
II, 0, OrigDemandedElts, UndefElts);
3091 return std::nullopt;
3095 return ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3105 if (ST->useSVEForFixedLengthVectors() &&
3108 std::max(ST->getMinSVEVectorSizeInBits(), 128u));
3109 else if (ST->isNeonAvailable())
3114 if (ST->isSVEAvailable() || (ST->isSVEorStreamingSVEAvailable() &&
3123bool AArch64TTIImpl::isSingleExtWideningInstruction(
3125 Type *SrcOverrideTy)
const {
3140 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3143 Type *SrcTy = SrcOverrideTy;
3145 case Instruction::Add:
3146 case Instruction::Sub: {
3155 if (Opcode == Instruction::Sub)
3179 assert(SrcTy &&
"Expected some SrcTy");
3181 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
3187 DstTyL.first * DstTyL.second.getVectorMinNumElements();
3189 SrcTyL.first * SrcTyL.second.getVectorMinNumElements();
3193 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
3196Type *AArch64TTIImpl::isBinExtWideningInstruction(
unsigned Opcode,
Type *DstTy,
3198 Type *SrcOverrideTy)
const {
3199 if (Opcode != Instruction::Add && Opcode != Instruction::Sub &&
3200 Opcode != Instruction::Mul)
3210 (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
3213 auto getScalarSizeWithOverride = [&](
const Value *
V) {
3219 ->getScalarSizeInBits();
3222 unsigned MaxEltSize = 0;
3225 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3226 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3227 MaxEltSize = std::max(EltSize0, EltSize1);
3230 unsigned EltSize0 = getScalarSizeWithOverride(Args[0]);
3231 unsigned EltSize1 = getScalarSizeWithOverride(Args[1]);
3234 if (EltSize0 >= DstEltSize / 2 || EltSize1 >= DstEltSize / 2)
3236 MaxEltSize = DstEltSize / 2;
3237 }
else if (Opcode == Instruction::Mul &&
3250 getScalarSizeWithOverride(
isa<ZExtInst>(Args[0]) ? Args[0] : Args[1]);
3254 if (MaxEltSize * 2 > DstEltSize)
3272 if (!Src->isVectorTy() || !TLI->isTypeLegal(TLI->getValueType(
DL, Src)) ||
3273 (Src->isScalableTy() && !ST->hasSVE2()))
3283 if (AddUser && AddUser->getOpcode() == Instruction::Add)
3287 if (!Shr || Shr->getOpcode() != Instruction::LShr)
3291 if (!Trunc || Trunc->getOpcode() != Instruction::Trunc ||
3292 Src->getScalarSizeInBits() !=
3316 int ISD = TLI->InstructionOpcodeToISD(Opcode);
3320 if (
I &&
I->hasOneUser()) {
3323 if (
Type *ExtTy = isBinExtWideningInstruction(
3324 SingleUser->getOpcode(), Dst, Operands,
3325 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3338 if (isSingleExtWideningInstruction(
3339 SingleUser->getOpcode(), Dst, Operands,
3340 Src !=
I->getOperand(0)->getType() ? Src :
nullptr)) {
3344 if (SingleUser->getOpcode() == Instruction::Add) {
3345 if (
I == SingleUser->getOperand(1) ||
3347 cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
3362 EVT SrcTy = TLI->getValueType(
DL, Src);
3363 EVT DstTy = TLI->getValueType(
DL, Dst);
3365 if (!SrcTy.isSimple() || !DstTy.
isSimple())
3370 if (!ST->hasSVE2() && !ST->isStreamingSVEAvailable() &&
3402 const unsigned int SVE_EXT_COST = 1;
3403 const unsigned int SVE_FCVT_COST = 1;
3404 const unsigned int SVE_UNPACK_ONCE = 4;
3405 const unsigned int SVE_UNPACK_TWICE = 16;
3534 SVE_EXT_COST + SVE_FCVT_COST},
3539 SVE_EXT_COST + SVE_FCVT_COST},
3546 SVE_EXT_COST + SVE_FCVT_COST},
3550 SVE_EXT_COST + SVE_FCVT_COST},
3556 SVE_EXT_COST + SVE_FCVT_COST},
3559 SVE_EXT_COST + SVE_FCVT_COST},
3564 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3566 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3576 SVE_EXT_COST + SVE_FCVT_COST},
3581 SVE_EXT_COST + SVE_FCVT_COST},
3594 SVE_EXT_COST + SVE_FCVT_COST},
3598 SVE_EXT_COST + SVE_FCVT_COST},
3610 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3612 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3614 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3616 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3620 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3622 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3638 SVE_EXT_COST + SVE_FCVT_COST},
3643 SVE_EXT_COST + SVE_FCVT_COST},
3654 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3656 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3658 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3660 SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3662 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3664 SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3668 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3670 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3672 SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3674 SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3873 EVT WiderTy = SrcTy.
bitsGT(DstTy) ? SrcTy : DstTy;
3876 ST->useSVEForFixedLengthVectors(WiderTy)) {
3877 std::pair<InstructionCost, MVT> LT =
3879 unsigned NumElements =
3918 if (ST->hasFullFP16())
3930 Src->getScalarType(), CCH,
CostKind) +
3938 ST->isSVEorStreamingSVEAvailable() &&
3939 TLI->getTypeAction(Src->getContext(), SrcTy) ==
3941 TLI->getTypeAction(Dst->getContext(), DstTy) ==
3950 Opcode, LegalTy, Src, CCH,
CostKind,
I);
3953 return Part1 + Part2;
3960 ST->isSVEorStreamingSVEAvailable() && TLI->isTypeLegal(DstTy))
3972 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
3985 CostKind, Index,
nullptr,
nullptr);
3989 auto DstVT = TLI->getValueType(
DL, Dst);
3990 auto SrcVT = TLI->getValueType(
DL, Src);
3995 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
4001 if (DstVT.getFixedSizeInBits() < SrcVT.getFixedSizeInBits())
4011 case Instruction::SExt:
4016 case Instruction::ZExt:
4017 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
4030 return Opcode == Instruction::PHI ? 0 : 1;
4039 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4048 if (!LT.second.isVector())
4053 if (LT.second.isFixedLengthVector()) {
4054 unsigned Width = LT.second.getVectorNumElements();
4055 Index = Index % Width;
4070 if (ST->hasFastLD1Single())
4082 : ST->getVectorInsertExtractBaseCost() + 1;
4106 auto ExtractCanFuseWithFmul = [&]() {
4113 auto IsAllowedScalarTy = [&](
const Type *
T) {
4114 return T->isFloatTy() ||
T->isDoubleTy() ||
4115 (
T->isHalfTy() && ST->hasFullFP16());
4119 auto IsUserFMulScalarTy = [](
const Value *EEUser) {
4122 return BO && BO->getOpcode() == BinaryOperator::FMul &&
4123 !BO->getType()->isVectorTy();
4128 auto IsExtractLaneEquivalentToZero = [&](
unsigned Idx,
unsigned EltSz) {
4132 return Idx == 0 || (RegWidth != 0 && (Idx * EltSz) % RegWidth == 0);
4141 DenseMap<User *, unsigned> UserToExtractIdx;
4142 for (
auto *U :
Scalar->users()) {
4143 if (!IsUserFMulScalarTy(U))
4147 UserToExtractIdx[
U];
4149 if (UserToExtractIdx.
empty())
4151 for (
auto &[S, U, L] : ScalarUserAndIdx) {
4152 for (
auto *U : S->users()) {
4153 if (UserToExtractIdx.
contains(U)) {
4155 auto *Op0 =
FMul->getOperand(0);
4156 auto *Op1 =
FMul->getOperand(1);
4157 if ((Op0 == S && Op1 == S) || Op0 != S || Op1 != S) {
4158 UserToExtractIdx[
U] =
L;
4164 for (
auto &[U, L] : UserToExtractIdx) {
4176 return !EE->users().empty() &&
all_of(EE->users(), [&](
const User *U) {
4177 if (!IsUserFMulScalarTy(U))
4182 const auto *BO = cast<BinaryOperator>(U);
4183 const auto *OtherEE = dyn_cast<ExtractElementInst>(
4184 BO->getOperand(0) == EE ? BO->getOperand(1) : BO->getOperand(0));
4186 const auto *IdxOp = dyn_cast<ConstantInt>(OtherEE->getIndexOperand());
4189 return IsExtractLaneEquivalentToZero(
4190 cast<ConstantInt>(OtherEE->getIndexOperand())
4193 OtherEE->getType()->getScalarSizeInBits());
4201 if (Opcode == Instruction::ExtractElement && (
I || Scalar) &&
4202 ExtractCanFuseWithFmul())
4207 :
ST->getVectorInsertExtractBaseCost();
4216 if (Opcode == Instruction::InsertElement && Index == 0 && Op0 &&
4219 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr,
4225 Value *Scalar,
ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
4227 return getVectorInstrCostHelper(Opcode, Val,
CostKind, Index,
nullptr, Scalar,
4228 ScalarUserAndIdx, VIC);
4235 return getVectorInstrCostHelper(
I.getOpcode(), Val,
CostKind, Index, &
I,
4242 unsigned Index)
const {
4254 : ST->getVectorInsertExtractBaseCost() + 1;
4263 if (Ty->getElementType()->isFloatingPointTy())
4266 unsigned VecInstCost =
4268 return DemandedElts.
popcount() * (Insert + Extract) * VecInstCost;
4275 if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
4276 return std::nullopt;
4277 if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
4278 return std::nullopt;
4279 if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4280 ST->isNonStreamingSVEorSME2Available())
4281 return std::nullopt;
4288 Cost += InstCost(PromotedTy);
4311 Op2Info, Args, CxtI);
4315 int ISD = TLI->InstructionOpcodeToISD(Opcode);
4322 Ty,
CostKind, Op1Info, Op2Info,
true,
4325 [&](
Type *PromotedTy) {
4329 return *PromotedCost;
4335 if (
Type *ExtTy = isBinExtWideningInstruction(Opcode, Ty, Args)) {
4402 auto VT = TLI->getValueType(
DL, Ty);
4403 if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
4407 : (3 * AsrCost + AddCost);
4409 return MulCost + AsrCost + 2 * AddCost;
4411 }
else if (VT.isVector()) {
4421 if (Ty->isScalableTy() && ST->hasSVE())
4422 Cost += 2 * AsrCost;
4427 ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
4431 }
else if (LT.second == MVT::v2i64) {
4432 return VT.getVectorNumElements() *
4439 if (Ty->isScalableTy() && ST->hasSVE())
4440 return MulCost + 2 * AddCost + 2 * AsrCost;
4441 return 2 * MulCost + AddCost + AsrCost + UsraCost;
4446 LT.second.isFixedLengthVector()) {
4456 return ExtractCost + InsertCost +
4464 auto VT = TLI->getValueType(
DL, Ty);
4480 bool HasMULH = VT == MVT::i64 || LT.second == MVT::nxv2i64 ||
4481 LT.second == MVT::nxv4i32 || LT.second == MVT::nxv8i16 ||
4482 LT.second == MVT::nxv16i8;
4483 bool Is128bit = LT.second.is128BitVector();
4495 (HasMULH ? 0 : ShrCost) +
4496 AddCost * 2 + ShrCost;
4497 return DivCost + (
ISD ==
ISD::UREM ? MulCost + AddCost : 0);
4504 if (!VT.isVector() && VT.getSizeInBits() > 64)
4508 Opcode, Ty,
CostKind, Op1Info, Op2Info);
4510 if (TLI->isOperationLegalOrCustom(
ISD, LT.second) && ST->hasSVE()) {
4514 Ty->getPrimitiveSizeInBits().getFixedValue() < 128) {
4524 if (
nullptr != Entry)
4529 if (LT.second.getScalarType() == MVT::i8)
4531 else if (LT.second.getScalarType() == MVT::i16)
4543 Opcode, Ty->getScalarType(),
CostKind, Op1Info, Op2Info);
4544 return (4 + DivCost) * VTy->getNumElements();
4550 -1,
nullptr,
nullptr);
4564 if (LT.second == MVT::v2i64 && ST->hasSVE())
4577 if (LT.second != MVT::v2i64)
4599 if ((Ty->isFloatTy() || Ty->isDoubleTy() ||
4600 (Ty->isHalfTy() && ST->hasFullFP16())) &&
4609 if (!Ty->getScalarType()->isFP128Ty())
4616 if (!Ty->getScalarType()->isFP128Ty())
4617 return 2 * LT.first;
4624 if (!Ty->isVectorTy())
4640 int MaxMergeDistance = 64;
4644 return NumVectorInstToHideOverhead;
4654 unsigned Opcode1,
unsigned Opcode2)
const {
4657 if (!
Sched.hasInstrSchedModel())
4661 Sched.getSchedClassDesc(
TII->get(Opcode1).getSchedClass());
4663 Sched.getSchedClassDesc(
TII->get(Opcode2).getSchedClass());
4669 "Cannot handle variant scheduling classes without an MI");
4685 const int AmortizationCost = 20;
4693 VecPred = CurrentPred;
4701 static const auto ValidMinMaxTys = {
4702 MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
4703 MVT::v4i32, MVT::v2i64, MVT::v2f32, MVT::v4f32, MVT::v2f64};
4704 static const auto ValidFP16MinMaxTys = {MVT::v4f16, MVT::v8f16};
4708 (ST->hasFullFP16() &&
4714 {Instruction::Select, MVT::v2i1, MVT::v2f32, 2},
4715 {Instruction::Select, MVT::v2i1, MVT::v2f64, 2},
4716 {Instruction::Select, MVT::v4i1, MVT::v4f32, 2},
4717 {Instruction::Select, MVT::v4i1, MVT::v4f16, 2},
4718 {Instruction::Select, MVT::v8i1, MVT::v8f16, 2},
4719 {Instruction::Select, MVT::v16i1, MVT::v16i16, 16},
4720 {Instruction::Select, MVT::v8i1, MVT::v8i32, 8},
4721 {Instruction::Select, MVT::v16i1, MVT::v16i32, 16},
4722 {Instruction::Select, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost},
4723 {Instruction::Select, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost},
4724 {Instruction::Select, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost}};
4726 EVT SelCondTy = TLI->getValueType(
DL, CondTy);
4727 EVT SelValTy = TLI->getValueType(
DL, ValTy);
4736 if (Opcode == Instruction::FCmp) {
4738 ValTy,
CostKind, Op1Info, Op2Info,
false,
4740 false, [&](
Type *PromotedTy) {
4752 return *PromotedCost;
4756 if (LT.second.getScalarType() != MVT::f64 &&
4757 LT.second.getScalarType() != MVT::f32 &&
4758 LT.second.getScalarType() != MVT::f16)
4763 unsigned Factor = 1;
4764 if (!CondTy->isVectorTy() &&
4778 AArch64::FCMEQv4f32))
4790 TLI->isTypeLegal(TLI->getValueType(
DL, ValTy)) &&
4809 Op1Info, Op2Info,
I);
4815 if (ST->requiresStrictAlign()) {
4820 Options.AllowOverlappingLoads =
true;
4821 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
4826 Options.LoadSizes = {8, 4, 2, 1};
4827 Options.AllowedTailExpansions = {3, 5, 6};
4832 return ST->hasSVE();
4838 switch (MICA.
getID()) {
4839 case Intrinsic::masked_scatter:
4840 case Intrinsic::masked_gather:
4842 case Intrinsic::masked_load:
4843 case Intrinsic::masked_store:
4857 if (!LT.first.isValid())
4862 if (VT->getElementType()->isIntegerTy(1))
4879 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
4880 "Should be called on only load or stores.");
4882 case Instruction::Load:
4885 return ST->getGatherOverhead();
4887 case Instruction::Store:
4890 return ST->getScatterOverhead();
4901 unsigned Opcode = (MICA.
getID() == Intrinsic::masked_gather ||
4902 MICA.
getID() == Intrinsic::vp_gather)
4904 : Instruction::Store;
4914 if (!LT.first.isValid())
4918 if (!LT.second.isVector() ||
4920 VT->getElementType()->isIntegerTy(1))
4930 ElementCount LegalVF = LT.second.getVectorElementCount();
4933 {TTI::OK_AnyValue, TTI::OP_None},
I);
4949 EVT VT = TLI->getValueType(
DL, Ty,
true);
4951 if (VT == MVT::Other)
4956 if (!LT.first.isValid())
4966 (VTy->getElementType()->isIntegerTy(1) &&
4967 !VTy->getElementCount().isKnownMultipleOf(
4978 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
4979 LT.second.is128BitVector() && Alignment <
Align(16)) {
4985 const int AmortizationCost = 6;
4987 return LT.first * 2 * AmortizationCost;
4991 if (Ty->isPtrOrPtrVectorTy())
4996 if (Ty->getScalarSizeInBits() != LT.second.getScalarSizeInBits()) {
4998 if (VT == MVT::v4i8)
5005 if (!
isPowerOf2_32(EltSize) || EltSize < 8 || EltSize > 64 ||
5020 while (!TypeWorklist.
empty()) {
5042 bool UseMaskForCond,
bool UseMaskForGaps)
const {
5043 assert(Factor >= 2 &&
"Invalid interleave factor");
5058 if (!VecTy->
isScalableTy() && (UseMaskForCond || UseMaskForGaps))
5061 if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
5062 unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
5065 VecVTy->getElementCount().divideCoefficientBy(Factor));
5071 if (MinElts % Factor == 0 &&
5072 TLI->isLegalInterleavedAccessType(SubVecTy,
DL, UseScalable))
5073 return Factor * TLI->getNumInterleavedAccesses(SubVecTy,
DL, UseScalable);
5078 UseMaskForCond, UseMaskForGaps);
5085 for (
auto *
I : Tys) {
5086 if (!
I->isVectorTy())
5097 return ST->getMaxInterleaveFactor();
5107 enum { MaxStridedLoads = 7 };
5109 int StridedLoads = 0;
5112 for (
const auto BB : L->blocks()) {
5113 for (
auto &
I : *BB) {
5119 if (L->isLoopInvariant(PtrValue))
5124 if (!LSCEVAddRec || !LSCEVAddRec->
isAffine())
5133 if (StridedLoads > MaxStridedLoads / 2)
5134 return StridedLoads;
5137 return StridedLoads;
5140 int StridedLoads = countStridedLoads(L, SE);
5142 <<
" strided loads\n");
5158 unsigned *FinalSize) {
5162 for (
auto *BB : L->getBlocks()) {
5163 for (
auto &
I : *BB) {
5169 if (!Cost.isValid())
5173 if (LoopCost > Budget)
5195 if (MaxTC > 0 && MaxTC <= 32)
5206 if (Blocks.
size() != 2)
5228 if (!L->isInnermost() || L->getNumBlocks() > 8)
5232 if (!L->getExitBlock())
5238 bool HasParellelizableReductions =
5239 L->getNumBlocks() == 1 &&
5240 any_of(L->getHeader()->phis(),
5242 return canParallelizeReductionWhenUnrolling(Phi, L, &SE);
5245 if (HasParellelizableReductions &&
5267 if (HasParellelizableReductions) {
5278 if (Header == Latch) {
5281 unsigned Width = 10;
5287 unsigned MaxInstsPerLine = 16;
5289 unsigned BestUC = 1;
5290 unsigned SizeWithBestUC = BestUC *
Size;
5292 unsigned SizeWithUC = UC *
Size;
5293 if (SizeWithUC > 48)
5295 if ((SizeWithUC % MaxInstsPerLine) == 0 ||
5296 (SizeWithBestUC % MaxInstsPerLine) < (SizeWithUC % MaxInstsPerLine)) {
5298 SizeWithBestUC = BestUC *
Size;
5308 for (
auto *BB : L->blocks()) {
5309 for (
auto &
I : *BB) {
5319 for (
auto *U :
I.users())
5321 LoadedValuesPlus.
insert(U);
5328 return LoadedValuesPlus.
contains(
SI->getOperand(0));
5341 if (!Term || !Term->isConditional() || Preds.
size() == 1 ||
5355 auto *I = dyn_cast<Instruction>(V);
5356 return I && DependsOnLoopLoad(I, Depth + 1);
5363 DependsOnLoopLoad(
I, 0)) {
5379 if (L->getLoopDepth() > 1)
5390 for (
auto *BB : L->getBlocks()) {
5391 for (
auto &
I : *BB) {
5395 if (IsVectorized &&
I.getType()->isVectorTy())
5412 if (ST->isAppleMLike())
5414 else if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
5436 !ST->getSchedModel().isOutOfOrder()) {
5459 bool CanCreate)
const {
5463 case Intrinsic::aarch64_neon_st2:
5464 case Intrinsic::aarch64_neon_st3:
5465 case Intrinsic::aarch64_neon_st4: {
5468 if (!CanCreate || !ST)
5470 unsigned NumElts = Inst->
arg_size() - 1;
5471 if (ST->getNumElements() != NumElts)
5473 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5479 for (
unsigned i = 0, e = NumElts; i != e; ++i) {
5481 Res = Builder.CreateInsertValue(Res, L, i);
5485 case Intrinsic::aarch64_neon_ld2:
5486 case Intrinsic::aarch64_neon_ld3:
5487 case Intrinsic::aarch64_neon_ld4:
5488 if (Inst->
getType() == ExpectedType)
5499 case Intrinsic::aarch64_neon_ld2:
5500 case Intrinsic::aarch64_neon_ld3:
5501 case Intrinsic::aarch64_neon_ld4:
5502 Info.ReadMem =
true;
5503 Info.WriteMem =
false;
5506 case Intrinsic::aarch64_neon_st2:
5507 case Intrinsic::aarch64_neon_st3:
5508 case Intrinsic::aarch64_neon_st4:
5509 Info.ReadMem =
false;
5510 Info.WriteMem =
true;
5518 case Intrinsic::aarch64_neon_ld2:
5519 case Intrinsic::aarch64_neon_st2:
5520 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
5522 case Intrinsic::aarch64_neon_ld3:
5523 case Intrinsic::aarch64_neon_st3:
5524 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
5526 case Intrinsic::aarch64_neon_ld4:
5527 case Intrinsic::aarch64_neon_st4:
5528 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
5540 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const {
5541 bool Considerable =
false;
5542 AllowPromotionWithoutCommonHeader =
false;
5545 Type *ConsideredSExtType =
5547 if (
I.getType() != ConsideredSExtType)
5551 for (
const User *U :
I.users()) {
5553 Considerable =
true;
5557 if (GEPInst->getNumOperands() > 2) {
5558 AllowPromotionWithoutCommonHeader =
true;
5563 return Considerable;
5612 if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
5622 return LegalizationCost + 2;
5632 LegalizationCost *= LT.first - 1;
5635 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5644 return LegalizationCost + 2;
5652 std::optional<FastMathFlags> FMF,
5668 return BaseCost + FixedVTy->getNumElements();
5671 if (Opcode != Instruction::FAdd)
5685 MVT MTy = LT.second;
5686 int ISD = TLI->InstructionOpcodeToISD(Opcode);
5734 MTy.
isVector() && (EltTy->isFloatTy() || EltTy->isDoubleTy() ||
5735 (EltTy->isHalfTy() && ST->hasFullFP16()))) {
5737 if (ValTy->getElementCount().getFixedValue() >= 2 && NElts >= 2 &&
5747 return (LT.first - 1) +
Log2_32(NElts);
5752 return (LT.first - 1) + Entry->Cost;
5764 if (LT.first != 1) {
5770 ExtraCost *= LT.first - 1;
5773 auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
5774 return Cost + ExtraCost;
5782 unsigned Opcode,
bool IsUnsigned,
Type *ResTy,
VectorType *VecTy,
5784 EVT VecVT = TLI->getValueType(
DL, VecTy);
5785 EVT ResVT = TLI->getValueType(
DL, ResTy);
5795 if (((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5797 ((LT.second == MVT::v4i16 || LT.second == MVT::v8i16) &&
5799 ((LT.second == MVT::v2i32 || LT.second == MVT::v4i32) &&
5801 return (LT.first - 1) * 2 + 2;
5812 EVT VecVT = TLI->getValueType(
DL, VecTy);
5813 EVT ResVT = TLI->getValueType(
DL, ResTy);
5816 RedOpcode == Instruction::Add) {
5822 if ((LT.second == MVT::v8i8 || LT.second == MVT::v16i8) &&
5824 return LT.first + 2;
5859 EVT PromotedVT = LT.second.getScalarType() == MVT::i1
5860 ? TLI->getPromotedVTForPredicate(
EVT(LT.second))
5874 if (LT.second.getScalarType() == MVT::i1) {
5883 assert(Entry &&
"Illegal Type for Splice");
5884 LegalizationCost += Entry->Cost;
5885 return LegalizationCost * LT.first;
5889 unsigned Opcode,
Type *InputTypeA,
Type *InputTypeB,
Type *AccumType,
5898 if (VF.
isFixed() && !ST->isSVEorStreamingSVEAvailable() &&
5899 (!ST->isNeonAvailable() || !ST->hasDotProd()))
5902 if ((Opcode != Instruction::Add && Opcode != Instruction::Sub &&
5903 Opcode != Instruction::FAdd) ||
5910 assert(FMF &&
"Missing FastMathFlags for floating-point partial reduction");
5911 if (!FMF->allowReassoc() || !FMF->allowContract())
5915 "FastMathFlags only apply to floating-point partial reductions");
5919 (!BinOp || (OpBExtend !=
TTI::PR_None && InputTypeB)) &&
5920 "Unexpected values for OpBExtend or InputTypeB");
5924 if (BinOp && ((*BinOp != Instruction::Mul && *BinOp != Instruction::FMul) ||
5925 InputTypeA != InputTypeB))
5928 bool IsUSDot = OpBExtend !=
TTI::PR_None && OpAExtend != OpBExtend;
5929 if (IsUSDot && !ST->hasMatMulInt8())
5941 auto TC = TLI->getTypeConversion(AccumVectorType->
getContext(),
5950 if (TLI->getTypeAction(AccumVectorType->
getContext(), TC.second) !=
5956 std::pair<InstructionCost, MVT> AccumLT =
5958 std::pair<InstructionCost, MVT> InputLT =
5965 if (Opcode == Instruction::Sub)
5976 if (ST->isSVEorStreamingSVEAvailable() && !IsUSDot) {
5978 if (AccumLT.second.getScalarType() == MVT::i64 &&
5979 InputLT.second.getScalarType() == MVT::i16)
5982 if (AccumLT.second.getScalarType() == MVT::i32 &&
5983 InputLT.second.getScalarType() == MVT::i16 &&
5984 (ST->hasSVE2p1() || ST->hasSME2()))
5987 if (AccumLT.second.getScalarType() == MVT::i64 &&
5988 InputLT.second.getScalarType() == MVT::i8)
5998 if (ST->isSVEorStreamingSVEAvailable() ||
5999 (AccumLT.second.isFixedLengthVector() && ST->isNeonAvailable() &&
6000 ST->hasDotProd())) {
6001 if (AccumLT.second.getScalarType() == MVT::i32 &&
6002 InputLT.second.getScalarType() == MVT::i8)
6007 if (Opcode == Instruction::FAdd && (ST->hasSME2() || ST->hasSVE2p1())) {
6008 if (AccumLT.second.getScalarType() == MVT::f32 &&
6009 InputLT.second.getScalarType() == MVT::f16 &&
6010 AccumLT.second.getVectorMinNumElements() == 4 &&
6011 InputLT.second.getVectorMinNumElements() == 8)
6030 "Expected the Mask to match the return size if given");
6032 "Expected the same scalar types");
6038 LT.second.getScalarSizeInBits() * Mask.size() > 128 &&
6039 SrcTy->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
6040 Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
6048 return std::max<InstructionCost>(1, LT.first / 4);
6056 Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2) ||
6058 Mask, 3, SrcTy->getElementCount().getKnownMinValue() * 2)))
6061 unsigned TpNumElts = Mask.size();
6062 unsigned LTNumElts = LT.second.getVectorNumElements();
6063 unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
6065 LT.second.getVectorElementCount());
6067 std::map<std::tuple<unsigned, unsigned, SmallVector<int>>,
InstructionCost>
6069 for (
unsigned N = 0;
N < NumVecs;
N++) {
6073 unsigned Source1 = -1U, Source2 = -1U;
6074 unsigned NumSources = 0;
6075 for (
unsigned E = 0; E < LTNumElts; E++) {
6076 int MaskElt = (
N * LTNumElts + E < TpNumElts) ? Mask[
N * LTNumElts + E]
6085 unsigned Source = MaskElt / LTNumElts;
6086 if (NumSources == 0) {
6089 }
else if (NumSources == 1 && Source != Source1) {
6092 }
else if (NumSources >= 2 && Source != Source1 && Source != Source2) {
6098 if (Source == Source1)
6100 else if (Source == Source2)
6101 NMask.
push_back(MaskElt % LTNumElts + LTNumElts);
6110 PreviousCosts.insert({std::make_tuple(Source1, Source2, NMask), 0});
6121 NTp, NTp, NMask,
CostKind, 0,
nullptr, Args,
6124 Result.first->second = NCost;
6138 if (IsExtractSubvector && LT.second.isFixedLengthVector()) {
6139 if (LT.second.getFixedSizeInBits() >= 128 &&
6141 LT.second.getVectorNumElements() / 2) {
6144 if (Index == (
int)LT.second.getVectorNumElements() / 2)
6158 if (!Mask.empty() && LT.second.isFixedLengthVector() &&
6161 return M.value() < 0 || M.value() == (int)M.index();
6167 !Mask.empty() && SrcTy->getPrimitiveSizeInBits().isNonZero() &&
6168 SrcTy->getPrimitiveSizeInBits().isKnownMultipleOf(
6177 if ((ST->hasSVE2p1() || ST->hasSME2p1()) &&
6178 ST->isSVEorStreamingSVEAvailable() &&
6183 if (ST->isSVEorStreamingSVEAvailable() &&
6197 if (IsLoad && LT.second.isVector() &&
6199 LT.second.getVectorElementCount()))
6205 if (Mask.size() == 4 &&
6207 (SrcTy->getScalarSizeInBits() == 16 ||
6208 SrcTy->getScalarSizeInBits() == 32) &&
6209 all_of(Mask, [](
int E) {
return E < 8; }))
6215 if (LT.second.isFixedLengthVector() &&
6216 LT.second.getVectorNumElements() == Mask.size() &&
6222 (
isZIPMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6223 isTRNMask(Mask, LT.second.getVectorNumElements(), Unused, Unused) ||
6224 isUZPMask(Mask, LT.second.getVectorNumElements(), Unused) ||
6225 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6226 LT.second.getVectorNumElements(), 16) ||
6227 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6228 LT.second.getVectorNumElements(), 32) ||
6229 isREVMask(Mask, LT.second.getScalarSizeInBits(),
6230 LT.second.getVectorNumElements(), 64) ||
6233 [&Mask](
int M) {
return M < 0 || M == Mask[0]; })))
6362 return LT.first * Entry->Cost;
6371 LT.second.getSizeInBits() <= 128 && SubTp) {
6373 if (SubLT.second.isVector()) {
6374 int NumElts = LT.second.getVectorNumElements();
6375 int NumSubElts = SubLT.second.getVectorNumElements();
6376 if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0)
6382 if (IsExtractSubvector)
6399 if (
getPtrStride(*PSE, AccessTy, Ptr, TheLoop, DT, Strides,
6418 return ST->useFixedOverScalableIfEqualCost();
6422 return ST->getEpilogueVectorizationMinVF();
6457 unsigned NumInsns = 0;
6459 NumInsns += BB->sizeWithoutDebug();
6469 int64_t Scale,
unsigned AddrSpace)
const {
6497 if (
I->getOpcode() == Instruction::Or &&
6502 if (
I->getOpcode() == Instruction::Add ||
6503 I->getOpcode() == Instruction::Sub)
6528 return all_equal(Shuf->getShuffleMask());
6535 bool AllowSplat =
false) {
6540 auto areTypesHalfed = [](
Value *FullV,
Value *HalfV) {
6541 auto *FullTy = FullV->
getType();
6542 auto *HalfTy = HalfV->getType();
6544 2 * HalfTy->getPrimitiveSizeInBits().getFixedValue();
6547 auto extractHalf = [](
Value *FullV,
Value *HalfV) {
6550 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
6554 Value *S1Op1 =
nullptr, *S2Op1 =
nullptr;
6568 if ((S1Op1 && (!areTypesHalfed(S1Op1, Op1) || !extractHalf(S1Op1, Op1))) ||
6569 (S2Op1 && (!areTypesHalfed(S2Op1, Op2) || !extractHalf(S2Op1, Op2))))
6583 if ((M1Start != 0 && M1Start != (NumElements / 2)) ||
6584 (M2Start != 0 && M2Start != (NumElements / 2)))
6586 if (S1Op1 && S2Op1 && M1Start != M2Start)
6596 return Ext->getType()->getScalarSizeInBits() ==
6597 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
6611 Value *VectorOperand =
nullptr;
6628 if (!
GEP ||
GEP->getNumOperands() != 2)
6632 Value *Offsets =
GEP->getOperand(1);
6635 if (
Base->getType()->isVectorTy() || !Offsets->getType()->isVectorTy())
6641 if (OffsetsInst->getType()->getScalarSizeInBits() > 32 &&
6642 OffsetsInst->getOperand(0)->getType()->getScalarSizeInBits() <= 32)
6643 Ops.push_back(&
GEP->getOperandUse(1));
6679 switch (
II->getIntrinsicID()) {
6680 case Intrinsic::aarch64_neon_smull:
6681 case Intrinsic::aarch64_neon_umull:
6684 Ops.push_back(&
II->getOperandUse(0));
6685 Ops.push_back(&
II->getOperandUse(1));
6690 case Intrinsic::fma:
6691 case Intrinsic::fmuladd:
6698 Ops.push_back(&
II->getOperandUse(0));
6700 Ops.push_back(&
II->getOperandUse(1));
6703 case Intrinsic::aarch64_neon_sqdmull:
6704 case Intrinsic::aarch64_neon_sqdmulh:
6705 case Intrinsic::aarch64_neon_sqrdmulh:
6708 Ops.push_back(&
II->getOperandUse(0));
6710 Ops.push_back(&
II->getOperandUse(1));
6711 return !
Ops.empty();
6712 case Intrinsic::aarch64_neon_fmlal:
6713 case Intrinsic::aarch64_neon_fmlal2:
6714 case Intrinsic::aarch64_neon_fmlsl:
6715 case Intrinsic::aarch64_neon_fmlsl2:
6718 Ops.push_back(&
II->getOperandUse(1));
6720 Ops.push_back(&
II->getOperandUse(2));
6721 return !
Ops.empty();
6722 case Intrinsic::aarch64_sve_ptest_first:
6723 case Intrinsic::aarch64_sve_ptest_last:
6725 if (IIOp->getIntrinsicID() == Intrinsic::aarch64_sve_ptrue)
6726 Ops.push_back(&
II->getOperandUse(0));
6727 return !
Ops.empty();
6728 case Intrinsic::aarch64_sme_write_horiz:
6729 case Intrinsic::aarch64_sme_write_vert:
6730 case Intrinsic::aarch64_sme_writeq_horiz:
6731 case Intrinsic::aarch64_sme_writeq_vert: {
6733 if (!Idx || Idx->getOpcode() != Instruction::Add)
6735 Ops.push_back(&
II->getOperandUse(1));
6738 case Intrinsic::aarch64_sme_read_horiz:
6739 case Intrinsic::aarch64_sme_read_vert:
6740 case Intrinsic::aarch64_sme_readq_horiz:
6741 case Intrinsic::aarch64_sme_readq_vert:
6742 case Intrinsic::aarch64_sme_ld1b_vert:
6743 case Intrinsic::aarch64_sme_ld1h_vert:
6744 case Intrinsic::aarch64_sme_ld1w_vert:
6745 case Intrinsic::aarch64_sme_ld1d_vert:
6746 case Intrinsic::aarch64_sme_ld1q_vert:
6747 case Intrinsic::aarch64_sme_st1b_vert:
6748 case Intrinsic::aarch64_sme_st1h_vert:
6749 case Intrinsic::aarch64_sme_st1w_vert:
6750 case Intrinsic::aarch64_sme_st1d_vert:
6751 case Intrinsic::aarch64_sme_st1q_vert:
6752 case Intrinsic::aarch64_sme_ld1b_horiz:
6753 case Intrinsic::aarch64_sme_ld1h_horiz:
6754 case Intrinsic::aarch64_sme_ld1w_horiz:
6755 case Intrinsic::aarch64_sme_ld1d_horiz:
6756 case Intrinsic::aarch64_sme_ld1q_horiz:
6757 case Intrinsic::aarch64_sme_st1b_horiz:
6758 case Intrinsic::aarch64_sme_st1h_horiz:
6759 case Intrinsic::aarch64_sme_st1w_horiz:
6760 case Intrinsic::aarch64_sme_st1d_horiz:
6761 case Intrinsic::aarch64_sme_st1q_horiz: {
6763 if (!Idx || Idx->getOpcode() != Instruction::Add)
6765 Ops.push_back(&
II->getOperandUse(3));
6768 case Intrinsic::aarch64_neon_pmull:
6771 Ops.push_back(&
II->getOperandUse(0));
6772 Ops.push_back(&
II->getOperandUse(1));
6774 case Intrinsic::aarch64_neon_pmull64:
6776 II->getArgOperand(1)))
6778 Ops.push_back(&
II->getArgOperandUse(0));
6779 Ops.push_back(&
II->getArgOperandUse(1));
6781 case Intrinsic::masked_gather:
6784 Ops.push_back(&
II->getArgOperandUse(0));
6786 case Intrinsic::masked_scatter:
6789 Ops.push_back(&
II->getArgOperandUse(1));
6796 auto ShouldSinkCondition = [](
Value *
Cond,
6801 if (
II->getIntrinsicID() != Intrinsic::vector_reduce_or ||
6805 Ops.push_back(&
II->getOperandUse(0));
6809 switch (
I->getOpcode()) {
6810 case Instruction::GetElementPtr:
6811 case Instruction::Add:
6812 case Instruction::Sub:
6814 for (
unsigned Op = 0;
Op <
I->getNumOperands(); ++
Op) {
6816 Ops.push_back(&
I->getOperandUse(
Op));
6821 case Instruction::Select: {
6822 if (!ShouldSinkCondition(
I->getOperand(0),
Ops))
6825 Ops.push_back(&
I->getOperandUse(0));
6828 case Instruction::Br: {
6835 Ops.push_back(&
I->getOperandUse(0));
6838 case Instruction::FMul:
6843 Ops.push_back(&
I->getOperandUse(0));
6845 Ops.push_back(&
I->getOperandUse(1));
6853 if (!
I->getType()->isVectorTy())
6854 return !
Ops.empty();
6856 switch (
I->getOpcode()) {
6857 case Instruction::Sub:
6858 case Instruction::Add: {
6867 Ops.push_back(&Ext1->getOperandUse(0));
6868 Ops.push_back(&Ext2->getOperandUse(0));
6871 Ops.push_back(&
I->getOperandUse(0));
6872 Ops.push_back(&
I->getOperandUse(1));
6876 case Instruction::Or: {
6879 if (ST->hasNEON()) {
6893 if (
I->getParent() != MainAnd->
getParent() ||
6898 if (
I->getParent() != IA->getParent() ||
6899 I->getParent() != IB->getParent())
6904 Ops.push_back(&
I->getOperandUse(0));
6905 Ops.push_back(&
I->getOperandUse(1));
6914 case Instruction::Mul: {
6915 auto ShouldSinkSplatForIndexedVariant = [](
Value *V) {
6918 if (Ty->isScalableTy())
6922 return Ty->getScalarSizeInBits() == 16 || Ty->getScalarSizeInBits() == 32;
6925 int NumZExts = 0, NumSExts = 0;
6926 for (
auto &
Op :
I->operands()) {
6933 auto *ExtOp = Ext->getOperand(0);
6934 if (
isSplatShuffle(ExtOp) && ShouldSinkSplatForIndexedVariant(ExtOp))
6935 Ops.push_back(&Ext->getOperandUse(0));
6943 if (Ext->getOperand(0)->getType()->getScalarSizeInBits() * 2 <
6944 I->getType()->getScalarSizeInBits())
6981 if (!ElementConstant || !ElementConstant->
isZero())
6984 unsigned Opcode = OperandInstr->
getOpcode();
6985 if (Opcode == Instruction::SExt)
6987 else if (Opcode == Instruction::ZExt)
6992 unsigned Bitwidth =
I->getType()->getScalarSizeInBits();
7002 Ops.push_back(&Insert->getOperandUse(1));
7008 if (!
Ops.empty() && (NumSExts == 2 || NumZExts == 2))
7012 if (!ShouldSinkSplatForIndexedVariant(
I))
7017 Ops.push_back(&
I->getOperandUse(0));
7019 Ops.push_back(&
I->getOperandUse(1));
7021 return !
Ops.empty();
7023 case Instruction::FMul: {
7025 if (
I->getType()->isScalableTy())
7026 return !
Ops.empty();
7030 return !
Ops.empty();
7034 Ops.push_back(&
I->getOperandUse(0));
7036 Ops.push_back(&
I->getOperandUse(1));
7037 return !
Ops.empty();
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
static Error reportError(StringRef Message)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
This file defines the DenseMap class.
static Value * getCondition(Instruction *I)
const HexagonInstrInfo * TII
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file defines the LoopVectorizationLegality class.
static const Function * getCalledFunction(const Value *V)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
static uint64_t getBits(uint64_t Val, int Start, int End)
static SymbolRef::Type getType(const Symbol *Sym)
This file describes how to lower LLVM code to machine code.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
unsigned getVectorInsertExtractBaseCost() const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const override
unsigned getMaxInterleaveFactor(ElementCount VF) const override
InstructionCost getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
InstructionCost getGatherScatterOpCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const override
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr, TTI::TargetCostKind CostKind) const override
bool isExtPartOfAvgExpr(const Instruction *ExtUser, Type *Dst, Type *Src) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getIntImmCost(int64_t Val) const
Calculate the cost of materializing a 64-bit value.
std::optional< InstructionCost > getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE, std::function< InstructionCost(Type *)> InstCost) const
FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the architecture features are not...
bool prefersVectorizedAddressing() const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const override
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const override
bool isElementTypeLegalForScalableVector(Type *Ty) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
APInt getPriorityMask(const Function &F) const override
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const override
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const override
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
bool useNeonVector(const Type *Ty) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const override
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const override
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const override
bool areInlineCompatible(const Function *Caller, const Function *Callee) const override
unsigned getMaxNumElements(ElementCount VF) const
Try to return an estimate cost factor that can be used as a multiplier when scalarizing an operation ...
bool shouldTreatInstructionLikeSelect(const Instruction *I) const override
bool isMultiversionedFunction(const Function &F) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const override
bool isLegalMaskedGatherScatter(Type *DataType) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const override
See if I should be considered for address type promotion.
APInt getFeatureMask(const Function &F) const override
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const override
bool enableScalableVectorization() const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const override
bool hasKnownLowerThroughputFromSchedulingModel(unsigned Opcode1, unsigned Opcode2) const
Check whether Opcode1 has less throughput according to the scheduling model than Opcode2.
unsigned getEpilogueVectorizationMinVF() const override
InstructionCost getSpliceCost(VectorType *Tp, int Index, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, TTI::TargetCostKind CostKind) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const override
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const override
Class for arbitrary precision integers.
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
unsigned popcount() const
Count the number of bits set.
unsigned countLeadingOnes() const
void negate()
Negate this APInt in place.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
unsigned logBase2() const
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const override
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask, VectorType *SrcTy, int &Index, VectorType *&SubTy) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const override
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const override
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP) const override
InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const override
InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const override
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
bool isTypeLegal(Type *Ty) const override
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Value * getArgOperand(unsigned i) const
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
static bool isIntPredicate(Predicate P)
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
This is the shared class of boolean and integer constants.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
const APInt & getValue() const
Return the constant as an APInt value reference.
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
static constexpr ElementCount getScalable(ScalarTy MinVal)
static constexpr ElementCount getFixed(ScalarTy MinVal)
This provides a helper for copying FMF from an instruction or setting specified flags.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
Container class for subtarget features.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
CallInst * CreateInsertVector(Type *DstType, Value *SrcVec, Value *SubVec, Value *Idx, const Twine &Name="")
Create a call to the vector.insert intrinsic.
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Type * getDoubleTy()
Fetch the type representing a 64-bit floating point value.
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
LLVM_ABI CallInst * CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Type * getHalfTy()
Fetch the type representing a 16-bit floating point value.
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateBinOpFMF(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, FMFSource FMFSource, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
LLVM_ABI CallInst * CreateMaskedStore(Value *Val, Value *Ptr, Align Alignment, Value *Mask)
Create a call to Masked Store intrinsic.
Type * getFloatTy()
Fetch the type representing a 32-bit floating point value.
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI Value * CreateElementCount(Type *Ty, ElementCount EC)
Create an expression which evaluates to the number of elements in EC at runtime.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
The core instruction combiner logic.
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static InstructionCost getInvalid(CostType Val=0)
CostType getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
bool hasGroups() const
Returns true if we have any interleave groups.
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
const SmallVectorImpl< const Value * > & getArgs() const
Intrinsic::ID getID() const
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
iterator_range< block_iterator > blocks() const
RecurrenceSet & getFixedOrderRecurrences()
Return the fixed-order recurrences found in the loop.
DominatorTree * getDominatorTree() const
PredicatedScalarEvolution * getPredicatedScalarEvolution() const
const ReductionList & getReductionVars() const
Returns the reduction variables found in the loop.
Represents a single loop in the control flow graph.
const FeatureBitset & getFeatureBits() const
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
Information for memory intrinsic cost model.
Align getAlignment() const
Type * getDataType() const
Intrinsic::ID getID() const
const Instruction * getInst() const
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Type * getRecurrenceType() const
Returns the type of the recurrence.
RecurKind getRecurrenceKind() const
This node represents a polynomial recurrence on the trip count of the specified loop.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
This class represents an analyzed expression in the program.
SMEAttrs is a utility class to parse the SME ACLE attributes on functions.
bool hasNonStreamingInterfaceAndBody() const
bool hasStreamingCompatibleInterface() const
bool hasStreamingInterfaceOrBody() const
bool isSMEABIRoutine() const
bool hasStreamingBody() const
void set(unsigned M, bool Enable=true)
SMECallAttrs is a utility class to hold the SMEAttrs for a callsite.
bool requiresPreservingZT0() const
bool requiresSMChange() const
bool requiresLazySave() const
bool requiresPreservingAllZAState() const
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
static ScalableVectorType * getDoubleElementsVectorType(ScalableVectorType *VTy)
The main scalar evolution driver.
LLVM_ABI const SCEV * getBackedgeTakenCount(const Loop *L, ExitCountKind Kind=Exact)
If the specified loop has a predictable backedge-taken count, return it, otherwise return a SCEVCould...
LLVM_ABI unsigned getSmallConstantTripMultiple(const Loop *L, const SCEV *ExitCount)
Returns the largest constant divisor of the trip count as a normal unsigned value,...
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI unsigned getSmallConstantMaxTripCount(const Loop *L, SmallVectorImpl< const SCEVPredicate * > *Predicates=nullptr)
Returns the upper bound of the loop trip count as a normal unsigned value.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
const SCEV * getSymbolicMaxBackedgeTakenCount(const Loop *L)
When successful, this returns a SCEV that is greater than or equal to (i.e.
This instruction constructs a fixed permutation of two input vectors.
static LLVM_ABI bool isDeInterleaveMaskOfFactor(ArrayRef< int > Mask, unsigned Factor, unsigned &Index)
Check if the mask is a DE-interleave mask of the given factor Factor like: <Index,...
static LLVM_ABI bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
static LLVM_ABI bool isInterleaveMask(ArrayRef< int > Mask, unsigned Factor, unsigned NumInputElts, SmallVectorImpl< unsigned > &StartIndexes)
Return true if the mask interleaves one or more input vectors together.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
static StackOffset getScalable(int64_t Scalable)
static StackOffset getFixed(int64_t Fixed)
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Class to represent struct types.
TargetInstrInfo - Interface to description of machine instruction set.
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
bool isVectorTy() const
True if this is an instance of VectorType.
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Type * getElementType() const
constexpr ScalarTy getFixedValue() const
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
LLVM_ABI APInt getCpuSupportsMask(ArrayRef< StringRef > Features)
static constexpr unsigned SVEBitsPerBlock
LLVM_ABI APInt getFMVPriority(ArrayRef< StringRef > Features)
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
@ ADD
Simple integer binary arithmetic operators.
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ SIGN_EXTEND
Conversion operators.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SHL
Shift and rotation operations.
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_nonnegative > m_NonNegative()
Match an integer or vector of non-negative values.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
FunctionAddr VTableAddr Value
std::optional< unsigned > isDUPQMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPQMask - matches a splat of equivalent lanes within segments of a given number of elements.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
LLVM_ABI bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name)
Returns true if Name is applied to TheLoop and enabled.
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
TailFoldingOpts
An enum to describe what types of loops we should attempt to tail-fold: Disabled: None Reductions: Lo...
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
bool isDUPFirstSegmentMask(ArrayRef< int > Mask, unsigned Segments, unsigned SegmentSize)
isDUPFirstSegmentMask - matches a splat of the first 128b segment.
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
LLVM_ABI std::optional< const MDOperand * > findStringMetadataForLoop(const Loop *TheLoop, StringRef Name)
Find string metadata for loop.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
unsigned M1(unsigned Val)
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
unsigned getPerfectShuffleCost(llvm::ArrayRef< int > M)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI Value * simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a BinaryOperator, fold the result or return null.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ Or
Bitwise or logical OR of integers.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ Sub
Subtraction of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
CostTblEntryT< unsigned > CostTblEntry
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
unsigned getNumElementsFromSVEPredPattern(unsigned Pattern)
Return the number of active elements for VL1 to VL256 predicate pattern, zero for all other patterns.
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
unsigned getMatchingIROpode() const
bool inactiveLanesAreUnused() const
bool inactiveLanesAreNotDefined() const
bool hasMatchingUndefIntrinsic() const
static SVEIntrinsicInfo defaultMergingUnaryNarrowingTopOp()
static SVEIntrinsicInfo defaultZeroingOp()
bool hasGoverningPredicate() const
SVEIntrinsicInfo & setOperandIdxInactiveLanesTakenFrom(unsigned Index)
static SVEIntrinsicInfo defaultMergingOp(Intrinsic::ID IID=Intrinsic::not_intrinsic)
SVEIntrinsicInfo & setOperandIdxWithNoActiveLanes(unsigned Index)
unsigned getOperandIdxWithNoActiveLanes() const
SVEIntrinsicInfo & setInactiveLanesAreUnused()
SVEIntrinsicInfo & setInactiveLanesAreNotDefined()
SVEIntrinsicInfo & setGoverningPredicateOperandIdx(unsigned Index)
bool inactiveLanesTakenFromOperand() const
static SVEIntrinsicInfo defaultUndefOp()
bool hasOperandWithNoActiveLanes() const
Intrinsic::ID getMatchingUndefIntrinsic() const
SVEIntrinsicInfo & setResultIsZeroInitialized()
static SVEIntrinsicInfo defaultMergingUnaryOp()
SVEIntrinsicInfo & setMatchingUndefIntrinsic(Intrinsic::ID IID)
unsigned getGoverningPredicateOperandIdx() const
bool hasMatchingIROpode() const
bool resultIsZeroInitialized() const
SVEIntrinsicInfo & setMatchingIROpcode(unsigned Opcode)
unsigned getOperandIdxInactiveLanesTakenFrom() const
static SVEIntrinsicInfo defaultVoidOp(unsigned GPIndex)
This struct is a compact representation of a valid (non-zero power of two) alignment.
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
uint64_t getScalarSizeInBits() const
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool isFixedLengthVector() const
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Machine model for scheduling, bundling, and heuristics.
static LLVM_ABI double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Information about a load/store intrinsic defined by the target.
InterleavedAccessInfo * IAI
LoopVectorizationLegality * LVL
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...