28#include "llvm/IR/IntrinsicsAMDGPU.h"
36#define DEBUG_TYPE "AMDGPUtti"
40struct AMDGPUImageDMaskIntrinsic {
44#define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
45#include "AMDGPUGenSearchableTables.inc"
75 Type *VTy = V.getType();
84 APFloat FloatValue(ConstFloat->getValueAPF());
85 bool LosesInfo =
true;
94 APInt IntValue(ConstInt->getValue());
113 Type *VTy = V.getType();
139 Func(Args, OverloadTys);
154 bool RemoveOldIntr = &OldIntr != &InstToReplace;
163static std::optional<Instruction *>
168 if (
const auto *LZMappingInfo =
170 if (
auto *ConstantLod =
172 if (ConstantLod->isZero() || ConstantLod->isNegative()) {
177 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
178 Args.erase(Args.begin() + ImageDimIntr->LodIndex);
185 if (
const auto *MIPMappingInfo =
187 if (
auto *ConstantMip =
189 if (ConstantMip->isZero()) {
194 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
195 Args.erase(Args.begin() + ImageDimIntr->MipIndex);
202 if (
const auto *BiasMappingInfo =
204 if (
auto *ConstantBias =
206 if (ConstantBias->isZero()) {
211 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
212 Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
213 ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
220 if (
const auto *OffsetMappingInfo =
222 if (
auto *ConstantOffset =
224 if (ConstantOffset->isZero()) {
227 OffsetMappingInfo->NoOffset, ImageDimIntr->
Dim);
229 II,
II, NewImageDimIntr->
Intr, IC, [&](
auto &Args,
auto &ArgTys) {
230 Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
237 if (ST->hasD16Images()) {
247 if (
II.hasOneUse()) {
250 if (
User->getOpcode() == Instruction::FPTrunc &&
254 [&](
auto &Args,
auto &ArgTys) {
257 ArgTys[0] = User->getType();
266 bool AllHalfExtracts =
true;
268 for (
User *U :
II.users()) {
270 if (!Ext || !Ext->hasOneUse()) {
271 AllHalfExtracts =
false;
276 if (!Tr || !Tr->getType()->isHalfTy()) {
277 AllHalfExtracts =
false;
284 if (!ExtractTruncPairs.
empty() && AllHalfExtracts) {
295 OverloadTys[0] = HalfVecTy;
298 M, ImageDimIntr->
Intr, OverloadTys);
300 II.mutateType(HalfVecTy);
301 II.setCalledFunction(HalfDecl);
304 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
305 Value *Idx = Ext->getIndexOperand();
307 Builder.SetInsertPoint(Tr);
309 Value *HalfExtract = Builder.CreateExtractElement(&
II, Idx);
312 Tr->replaceAllUsesWith(HalfExtract);
315 for (
auto &[Ext, Tr] : ExtractTruncPairs) {
326 if (!ST->hasA16() && !ST->hasG16())
333 bool FloatCoord =
false;
335 bool OnlyDerivatives =
false;
338 OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
339 Value *Coord =
II.getOperand(OperandIndex);
342 if (OperandIndex < ImageDimIntr->CoordStart ||
347 OnlyDerivatives =
true;
356 if (!OnlyDerivatives && !ST->hasA16())
357 OnlyDerivatives =
true;
360 if (!OnlyDerivatives && ImageDimIntr->
NumBiasArgs != 0) {
363 "Only image instructions with a sampler can have a bias");
365 OnlyDerivatives =
true;
368 if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->
GradientStart ==
376 II,
II,
II.getIntrinsicID(), IC, [&](
auto &Args,
auto &ArgTys) {
377 ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
378 if (!OnlyDerivatives) {
379 ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
382 if (ImageDimIntr->NumBiasArgs != 0)
383 ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
389 OperandIndex < EndIndex; OperandIndex++) {
391 convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
396 Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
397 Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
426 Value *Src =
nullptr;
429 if (Src->getType()->isHalfTy())
446 unsigned VWidth = VTy->getNumElements();
449 for (
int i = VWidth - 1; i > 0; --i) {
471 unsigned VWidth = VTy->getNumElements();
477 SVI->getShuffleMask(ShuffleMask);
479 for (
int I = VWidth - 1;
I > 0; --
I) {
480 if (ShuffleMask.empty()) {
531 unsigned LaneArgIdx)
const {
532 unsigned MaskBits = ST->getWavefrontSizeLog2();
546 Value *LaneArg =
II.getArgOperand(LaneArgIdx);
549 if (MaskedConst != LaneArg) {
550 II.getOperandUse(LaneArgIdx).set(MaskedConst);
562 CallInst *NewCall =
B.CreateCall(&NewCallee,
Ops, OpBundles);
578 if (ST.isWave32() &&
match(V, W32Pred))
580 if (ST.isWave64() &&
match(V, W64Pred))
589 const auto IID =
II.getIntrinsicID();
590 assert(IID == Intrinsic::amdgcn_readlane ||
591 IID == Intrinsic::amdgcn_readfirstlane ||
592 IID == Intrinsic::amdgcn_permlane64);
602 const bool IsReadLane = (IID == Intrinsic::amdgcn_readlane);
606 Value *LaneID =
nullptr;
608 LaneID =
II.getOperand(1);
622 const auto DoIt = [&](
unsigned OpIdx,
626 Ops.push_back(LaneID);
642 return DoIt(0,
II.getCalledFunction());
646 Type *SrcTy = Src->getType();
652 return DoIt(0, Remangled);
660 return DoIt(1,
II.getCalledFunction());
662 return DoIt(0,
II.getCalledFunction());
673 unsigned Depth = 0) {
683 return CI->getZExtValue();
692 std::optional<unsigned>
LHS =
696 std::optional<unsigned>
RHS =
705 return CI ? std::optional<unsigned>(CI->getZExtValue()) : std::nullopt;
713 unsigned WaveSize = ST.getWavefrontSize();
715 for (
unsigned Lane :
seq(WaveSize)) {
717 if (!Val || *Val >= WaveSize)
726template <
unsigned Period>
728 static_assert(
isPowerOf2_32(Period),
"Period must be a power of two");
729 for (
unsigned I = Period,
E = Ids.
size();
I <
E; ++
I)
730 if (Ids[
I] != Ids[
I % Period] + (
I & ~(Period - 1)))
738 for (
unsigned I = 0;
I <
N; ++
I)
754 return Ids[3] << 6 | Ids[2] << 4 | Ids[1] << 2 | Ids[0];
761 for (
unsigned J = 0; J <
N; ++J)
762 if (Ids[J] != (
N - 1) - J)
774 for (
unsigned J = 1; J < 16; ++J)
775 if (Ids[J] != (Ids[0] + J) % 16)
793 unsigned Mask = Ids[0];
796 for (
unsigned J = 0; J < 16; ++J)
797 if (Ids[J] != (Mask ^ J))
807 unsigned Selector = 0;
808 for (
unsigned J = 0; J < 8; ++J)
809 Selector |= Ids[J] << (J * 3);
818 for (
unsigned J = 0; J < 16; ++J)
819 Sel |=
static_cast<uint64_t>(Ids[J] & 0xF) << (J * 4);
826 if (Ids.
size() != 64)
828 for (
unsigned J = 0; J < 64; ++J)
829 if (Ids[J] != (J ^ 32))
840 for (
unsigned J = 0; J < 16; ++J) {
841 if (Ids[J] < 16 || Ids[J] >= 32)
843 if (Ids[J + 16] != Ids[J] - 16)
854static std::optional<unsigned>
863 unsigned AndMask = 0, OrMask = 0, XorMask = 0;
864 for (
unsigned B = 0;
B < 5; ++
B) {
865 unsigned Bit0 = (Ids[0] >>
B) & 1;
866 unsigned Bit1 = (Ids[1u <<
B] >>
B) & 1;
869 XorMask |= Bit0 <<
B;
877 for (
unsigned I :
seq(32u)) {
878 unsigned Expected = ((
I & AndMask) | OrMask) ^ XorMask;
893static std::optional<unsigned>
904 for (
unsigned I = 0;
I < 32; ++
I)
905 if (Ids[
I] != (
I +
N) % 32)
917 return B.CreateIntrinsic(Intrinsic::amdgcn_update_dpp, {Ty},
919 B.getInt32(0xF),
B.getInt32(0xF),
B.getTrue()});
924 return B.CreateIntrinsic(Intrinsic::amdgcn_mov_dpp8, {Val->
getType()},
925 {Val,
B.getInt32(Selector)});
932 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane16, {Ty},
934 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
942 return B.CreateIntrinsic(Intrinsic::amdgcn_permlanex16, {Ty},
944 B.getInt32(
Hi),
B.getFalse(),
B.getFalse()});
952 assert(
DL.getTypeSizeInBits(OrigTy) == 32 &&
953 "ds_swizzle only supports 32-bit operands");
957 Src =
B.CreatePtrToInt(Src, I32Ty);
958 else if (OrigTy != I32Ty)
959 Src =
B.CreateBitCast(Src, I32Ty);
960 Value *Result =
B.CreateIntrinsic(Intrinsic::amdgcn_ds_swizzle, {},
963 return B.CreateIntToPtr(Result, OrigTy);
965 return B.CreateBitCast(Result, OrigTy);
971 return B.CreateIntrinsic(Intrinsic::amdgcn_permlane64, {Val->
getType()},
1001 if (ST.hasDPPRowShare()) {
1006 if (ST.hasDPP() && ST.hasGFX10Insts()) {
1016 if (ST.hasPermlane16Insts()) {
1036 if (ST.hasDsSwizzleRotateMode()) {
1049static std::optional<Instruction *>
1053 if (
DL.getTypeSizeInBits(
II.getType()) != 32)
1054 return std::nullopt;
1056 if (!ST.isWaveSizeKnown())
1057 return std::nullopt;
1059 unsigned WaveSize = ST.getWavefrontSize();
1060 bool IsBpermute =
II.getIntrinsicID() == Intrinsic::amdgcn_ds_bpermute;
1061 Value *Src =
II.getArgOperand(IsBpermute ? 1 : 0);
1062 Value *Index =
II.getArgOperand(IsBpermute ? 0 : 1);
1067 for (
unsigned Lane :
seq(WaveSize)) {
1069 if (!Val || (*Val & 3) || (*Val >> 2) >= WaveSize)
1070 return std::nullopt;
1071 Ids[Lane] = *Val >> 2;
1075 return std::nullopt;
1080 return std::nullopt;
1084std::optional<Instruction *>
1088 case Intrinsic::amdgcn_implicitarg_ptr: {
1089 if (
II.getFunction()->hasFnAttribute(
"amdgpu-no-implicitarg-ptr"))
1091 uint64_t ImplicitArgBytes = ST->getImplicitArgNumBytes(*
II.getFunction());
1094 II.getAttributes().getRetDereferenceableOrNullBytes();
1095 if (CurrentOrNullBytes != 0) {
1098 uint64_t NewBytes = std::max(CurrentOrNullBytes, ImplicitArgBytes);
1101 II.removeRetAttr(Attribute::DereferenceableOrNull);
1105 uint64_t CurrentBytes =
II.getAttributes().getRetDereferenceableBytes();
1106 uint64_t NewBytes = std::max(CurrentBytes, ImplicitArgBytes);
1107 if (NewBytes != CurrentBytes) {
1113 return std::nullopt;
1115 case Intrinsic::amdgcn_rcp: {
1116 Value *Src =
II.getArgOperand(0);
1127 if (
II.isStrictFP())
1131 const APFloat &ArgVal =
C->getValueAPF();
1149 auto IID = SrcCI->getIntrinsicID();
1154 if (IID == Intrinsic::amdgcn_sqrt || IID == Intrinsic::sqrt) {
1164 SrcCI->getModule(), Intrinsic::amdgcn_rsq, {SrcCI->getType()});
1167 II.setFastMathFlags(InnerFMF);
1169 II.setCalledFunction(NewDecl);
1175 case Intrinsic::amdgcn_sqrt:
1176 case Intrinsic::amdgcn_rsq:
1177 case Intrinsic::amdgcn_tanh: {
1178 Value *Src =
II.getArgOperand(0);
1190 if (IID == Intrinsic::amdgcn_sqrt && Src->getType()->isHalfTy()) {
1192 II.getModule(), Intrinsic::sqrt, {II.getType()});
1193 II.setCalledFunction(NewDecl);
1199 case Intrinsic::amdgcn_log:
1200 case Intrinsic::amdgcn_exp2: {
1201 const bool IsLog = IID == Intrinsic::amdgcn_log;
1202 const bool IsExp = IID == Intrinsic::amdgcn_exp2;
1203 Value *Src =
II.getArgOperand(0);
1213 if (
C->isInfinity()) {
1216 if (!
C->isNegative())
1220 if (IsExp &&
C->isNegative())
1224 if (
II.isStrictFP())
1228 Constant *Quieted = ConstantFP::get(Ty,
C->getValue().makeQuiet());
1233 if (
C->isZero() || (
C->getValue().isDenormal() && Ty->isFloatTy())) {
1235 : ConstantFP::get(Ty, 1.0);
1239 if (IsLog &&
C->isNegative())
1247 case Intrinsic::amdgcn_frexp_mant:
1248 case Intrinsic::amdgcn_frexp_exp: {
1249 Value *Src =
II.getArgOperand(0);
1255 if (IID == Intrinsic::amdgcn_frexp_mant) {
1257 II, ConstantFP::get(
II.getContext(), Significand));
1277 case Intrinsic::amdgcn_class: {
1278 Value *Src0 =
II.getArgOperand(0);
1279 Value *Src1 =
II.getArgOperand(1);
1283 II.getModule(), Intrinsic::is_fpclass, Src0->
getType()));
1286 II.setArgOperand(1, ConstantInt::get(Src1->
getType(),
1307 case Intrinsic::amdgcn_cvt_pkrtz: {
1308 auto foldFPTruncToF16RTZ = [](
Value *Arg) ->
Value * {
1321 return ConstantFP::get(HalfTy, Val);
1324 Value *Src =
nullptr;
1326 if (Src->getType()->isHalfTy())
1333 if (
Value *Src0 = foldFPTruncToF16RTZ(
II.getArgOperand(0))) {
1334 if (
Value *Src1 = foldFPTruncToF16RTZ(
II.getArgOperand(1))) {
1344 case Intrinsic::amdgcn_cvt_pknorm_i16:
1345 case Intrinsic::amdgcn_cvt_pknorm_u16:
1346 case Intrinsic::amdgcn_cvt_pk_i16:
1347 case Intrinsic::amdgcn_cvt_pk_u16: {
1348 Value *Src0 =
II.getArgOperand(0);
1349 Value *Src1 =
II.getArgOperand(1);
1361 case Intrinsic::amdgcn_cvt_off_f32_i4: {
1362 Value* Arg =
II.getArgOperand(0);
1376 constexpr size_t ResValsSize = 16;
1377 static constexpr float ResVals[ResValsSize] = {
1378 0.0, 0.0625, 0.125, 0.1875, 0.25, 0.3125, 0.375, 0.4375,
1379 -0.5, -0.4375, -0.375, -0.3125, -0.25, -0.1875, -0.125, -0.0625};
1381 ConstantFP::get(Ty, ResVals[CArg->
getZExtValue() & (ResValsSize - 1)]);
1384 case Intrinsic::amdgcn_ubfe:
1385 case Intrinsic::amdgcn_sbfe: {
1387 Value *Src =
II.getArgOperand(0);
1394 unsigned IntSize = Ty->getIntegerBitWidth();
1399 if ((Width & (IntSize - 1)) == 0) {
1404 if (Width >= IntSize) {
1406 II, 2, ConstantInt::get(CWidth->
getType(), Width & (IntSize - 1)));
1417 ConstantInt::get(COffset->
getType(),
Offset & (IntSize - 1)));
1421 bool Signed = IID == Intrinsic::amdgcn_sbfe;
1423 if (!CWidth || !COffset)
1433 if (
Offset + Width < IntSize) {
1437 RightShift->takeName(&
II);
1444 RightShift->takeName(&
II);
1447 case Intrinsic::amdgcn_exp:
1448 case Intrinsic::amdgcn_exp_row:
1449 case Intrinsic::amdgcn_exp_compr: {
1455 bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
1457 for (
int I = 0;
I < (IsCompr ? 2 : 4); ++
I) {
1458 if ((!IsCompr && (EnBits & (1 <<
I)) == 0) ||
1459 (IsCompr && ((EnBits & (0x3 << (2 *
I))) == 0))) {
1460 Value *Src =
II.getArgOperand(
I + 2);
1474 case Intrinsic::amdgcn_fmed3: {
1475 Value *Src0 =
II.getArgOperand(0);
1476 Value *Src1 =
II.getArgOperand(1);
1477 Value *Src2 =
II.getArgOperand(2);
1479 for (
Value *Src : {Src0, Src1, Src2}) {
1484 if (
II.isStrictFP())
1521 const APFloat *ConstSrc0 =
nullptr;
1522 const APFloat *ConstSrc1 =
nullptr;
1523 const APFloat *ConstSrc2 =
nullptr;
1528 const bool IsPosInfinity = ConstSrc0 && ConstSrc0->
isPosInfinity();
1548 const bool IsPosInfinity = ConstSrc1 && ConstSrc1->
isPosInfinity();
1571 auto *Quieted = ConstantFP::get(
II.getType(), ConstSrc2->
makeQuiet());
1591 CI->copyFastMathFlags(&
II);
1617 II.setArgOperand(0, Src0);
1618 II.setArgOperand(1, Src1);
1619 II.setArgOperand(2, Src2);
1629 ConstantFP::get(
II.getType(), Result));
1634 if (!ST->hasMed3_16())
1643 IID, {
X->getType()}, {
X,
Y, Z}, &
II,
II.getName());
1651 case Intrinsic::amdgcn_icmp:
1652 case Intrinsic::amdgcn_fcmp: {
1656 bool IsInteger = IID == Intrinsic::amdgcn_icmp;
1663 Value *Src0 =
II.getArgOperand(0);
1664 Value *Src1 =
II.getArgOperand(1);
1684 II.getType(), Args);
1685 NewCall->
addFnAttr(Attribute::Convergent);
1693 II.setArgOperand(0, Src1);
1694 II.setArgOperand(1, Src0);
1696 2, ConstantInt::get(CC->
getType(),
static_cast<int>(SwapPred)));
1743 ? Intrinsic::amdgcn_fcmp
1744 : Intrinsic::amdgcn_icmp;
1749 unsigned Width = CmpType->getBitWidth();
1750 unsigned NewWidth = Width;
1758 else if (Width <= 32)
1760 else if (Width <= 64)
1765 if (Width != NewWidth) {
1775 }
else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
1778 Value *Args[] = {SrcLHS, SrcRHS,
1779 ConstantInt::get(CC->
getType(), SrcPred)};
1781 NewIID, {
II.getType(), SrcLHS->
getType()}, Args);
1788 case Intrinsic::amdgcn_mbcnt_hi:
1793 case Intrinsic::amdgcn_mbcnt_lo: {
1806 if (std::optional<ConstantRange> ExistingRange =
II.getRange()) {
1807 ComputedRange = ComputedRange.
intersectWith(*ExistingRange);
1808 if (ComputedRange == *ExistingRange)
1812 II.addRangeRetAttr(ComputedRange);
1815 case Intrinsic::amdgcn_ballot: {
1816 Value *Arg =
II.getArgOperand(0);
1821 if (Src->isZero()) {
1826 if (ST->isWave32() &&
II.getType()->getIntegerBitWidth() == 64) {
1833 {IC.Builder.getInt32Ty()},
1834 {II.getArgOperand(0)}),
1841 case Intrinsic::amdgcn_wavefrontsize: {
1842 if (ST->isWaveSizeKnown())
1844 II, ConstantInt::get(
II.getType(), ST->getWavefrontSize()));
1847 case Intrinsic::amdgcn_wqm_vote: {
1854 case Intrinsic::amdgcn_kill: {
1856 if (!
C || !
C->getZExtValue())
1862 case Intrinsic::amdgcn_s_sendmsg:
1863 case Intrinsic::amdgcn_s_sendmsghalt: {
1869 Value *M0Val =
II.getArgOperand(1);
1875 decodeMsg(MsgImm->getZExtValue(), MsgId, OpId, StreamId, *ST);
1877 if (!msgDoesNotUseM0(MsgId, *ST))
1881 II.dropUBImplyingAttrsAndMetadata();
1885 case Intrinsic::amdgcn_update_dpp: {
1886 Value *Old =
II.getArgOperand(0);
1891 if (BC->isNullValue() || RM->getZExtValue() != 0xF ||
1898 case Intrinsic::amdgcn_permlane16:
1899 case Intrinsic::amdgcn_permlane16_var:
1900 case Intrinsic::amdgcn_permlanex16:
1901 case Intrinsic::amdgcn_permlanex16_var: {
1903 Value *VDstIn =
II.getArgOperand(0);
1908 unsigned int FiIdx = (IID == Intrinsic::amdgcn_permlane16 ||
1909 IID == Intrinsic::amdgcn_permlanex16)
1916 unsigned int BcIdx = FiIdx + 1;
1925 case Intrinsic::amdgcn_wave_shuffle:
1927 case Intrinsic::amdgcn_permlane64:
1928 case Intrinsic::amdgcn_readfirstlane:
1929 case Intrinsic::amdgcn_readlane:
1930 case Intrinsic::amdgcn_ds_bpermute: {
1932 unsigned SrcIdx = IID == Intrinsic::amdgcn_ds_bpermute ? 1 : 0;
1933 const Use &Src =
II.getArgOperandUse(SrcIdx);
1937 if (IID == Intrinsic::amdgcn_readlane &&
1944 if (IID == Intrinsic::amdgcn_ds_bpermute) {
1945 const Use &Lane =
II.getArgOperandUse(0);
1949 II.getModule(), Intrinsic::amdgcn_readlane,
II.getType());
1950 II.setCalledFunction(NewDecl);
1951 II.setOperand(0, Src);
1952 II.setOperand(1, NewLane);
1957 if (IID == Intrinsic::amdgcn_ds_bpermute)
1963 return std::nullopt;
1965 case Intrinsic::amdgcn_writelane: {
1969 return std::nullopt;
1971 case Intrinsic::amdgcn_trig_preop: {
1974 if (!
II.getType()->isDoubleTy())
1977 Value *Src =
II.getArgOperand(0);
1978 Value *Segment =
II.getArgOperand(1);
1987 if (StrippedSign != Src)
1990 if (
II.isStrictFP())
2012 unsigned Shift = SegmentVal * 53;
2017 static const uint32_t TwoByPi[] = {
2018 0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
2019 0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
2020 0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
2021 0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
2022 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
2023 0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
2027 unsigned Idx = Shift >> 5;
2028 if (Idx + 2 >= std::size(TwoByPi)) {
2033 unsigned BShift = Shift & 0x1f;
2037 Thi = (Thi << BShift) | (Tlo >> (64 - BShift));
2041 int Scale = -53 - Shift;
2048 case Intrinsic::amdgcn_fmul_legacy: {
2049 Value *Op0 =
II.getArgOperand(0);
2050 Value *Op1 =
II.getArgOperand(1);
2052 for (
Value *Src : {Op0, Op1}) {
2073 case Intrinsic::amdgcn_fma_legacy: {
2074 Value *Op0 =
II.getArgOperand(0);
2075 Value *Op1 =
II.getArgOperand(1);
2076 Value *Op2 =
II.getArgOperand(2);
2078 for (
Value *Src : {Op0, Op1, Op2}) {
2100 II.getModule(), Intrinsic::fma,
II.getType()));
2105 case Intrinsic::amdgcn_is_shared:
2106 case Intrinsic::amdgcn_is_private: {
2107 Value *Src =
II.getArgOperand(0);
2117 case Intrinsic::amdgcn_make_buffer_rsrc: {
2118 Value *Src =
II.getArgOperand(0);
2121 return std::nullopt;
2123 case Intrinsic::amdgcn_raw_buffer_store_format:
2124 case Intrinsic::amdgcn_struct_buffer_store_format:
2125 case Intrinsic::amdgcn_raw_tbuffer_store:
2126 case Intrinsic::amdgcn_struct_tbuffer_store:
2127 case Intrinsic::amdgcn_image_store_1d:
2128 case Intrinsic::amdgcn_image_store_1darray:
2129 case Intrinsic::amdgcn_image_store_2d:
2130 case Intrinsic::amdgcn_image_store_2darray:
2131 case Intrinsic::amdgcn_image_store_2darraymsaa:
2132 case Intrinsic::amdgcn_image_store_2dmsaa:
2133 case Intrinsic::amdgcn_image_store_3d:
2134 case Intrinsic::amdgcn_image_store_cube:
2135 case Intrinsic::amdgcn_image_store_mip_1d:
2136 case Intrinsic::amdgcn_image_store_mip_1darray:
2137 case Intrinsic::amdgcn_image_store_mip_2d:
2138 case Intrinsic::amdgcn_image_store_mip_2darray:
2139 case Intrinsic::amdgcn_image_store_mip_3d:
2140 case Intrinsic::amdgcn_image_store_mip_cube: {
2145 if (ST->hasDefaultComponentBroadcast())
2147 else if (ST->hasDefaultComponentZero())
2152 int DMaskIdx = getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID()) ? 1 : -1;
2160 case Intrinsic::amdgcn_prng_b32: {
2161 auto *Src =
II.getArgOperand(0);
2165 return std::nullopt;
2167 case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4:
2168 case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: {
2169 Value *Src0 =
II.getArgOperand(0);
2170 Value *Src1 =
II.getArgOperand(1);
2176 auto getFormatNumRegs = [](
unsigned FormatVal) {
2177 switch (FormatVal) {
2191 bool MadeChange =
false;
2192 unsigned Src0NumElts = getFormatNumRegs(CBSZ);
2193 unsigned Src1NumElts = getFormatNumRegs(BLGP);
2197 if (Src0Ty->getNumElements() > Src0NumElts) {
2204 if (Src1Ty->getNumElements() > Src1NumElts) {
2212 return std::nullopt;
2223 case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4:
2224 case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4:
2225 case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: {
2226 Value *Src0 =
II.getArgOperand(1);
2227 Value *Src1 =
II.getArgOperand(3);
2233 bool MadeChange =
false;
2239 if (Src0Ty->getNumElements() > Src0NumElts) {
2246 if (Src1Ty->getNumElements() > Src1NumElts) {
2254 return std::nullopt;
2271 return std::nullopt;
2284 int DMaskIdx,
bool IsLoad) {
2287 :
II.getOperand(0)->getType());
2288 unsigned VWidth = IIVTy->getNumElements();
2291 Type *EltTy = IIVTy->getElementType();
2303 const unsigned UnusedComponentsAtFront = DemandedElts.
countr_zero();
2308 DemandedElts = (1 << ActiveBits) - 1;
2310 if (UnusedComponentsAtFront > 0) {
2311 static const unsigned InvalidOffsetIdx = 0xf;
2314 switch (
II.getIntrinsicID()) {
2315 case Intrinsic::amdgcn_raw_buffer_load:
2316 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2319 case Intrinsic::amdgcn_s_buffer_load:
2323 if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
2324 OffsetIdx = InvalidOffsetIdx;
2328 case Intrinsic::amdgcn_struct_buffer_load:
2329 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2334 OffsetIdx = InvalidOffsetIdx;
2338 if (OffsetIdx != InvalidOffsetIdx) {
2340 DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
2341 auto *
Offset = Args[OffsetIdx];
2342 unsigned SingleComponentSizeInBits =
2344 unsigned OffsetAdd =
2345 UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
2346 auto *OffsetAddVal = ConstantInt::get(
Offset->getType(), OffsetAdd);
2363 unsigned NewDMaskVal = 0;
2364 unsigned OrigLdStIdx = 0;
2365 for (
unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
2366 const unsigned Bit = 1 << SrcIdx;
2367 if (!!(DMaskVal & Bit)) {
2368 if (!!DemandedElts[OrigLdStIdx])
2374 if (DMaskVal != NewDMaskVal)
2375 Args[DMaskIdx] = ConstantInt::get(DMask->
getType(), NewDMaskVal);
2378 unsigned NewNumElts = DemandedElts.
popcount();
2382 if (NewNumElts >= VWidth && DemandedElts.
isMask()) {
2384 II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
2396 OverloadTys[0] = NewTy;
2400 for (
unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
2401 if (DemandedElts[OrigStoreIdx])
2404 if (NewNumElts == 1)
2414 AttributeList OldAttrList =
II.getAttributes();
2418 if (NewNumElts == 1) {
2424 unsigned NewLoadIdx = 0;
2425 for (
unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
2426 if (!!DemandedElts[OrigLoadIdx])
2442 APInt &UndefElts)
const {
2447 const unsigned FirstElt = DemandedElts.
countr_zero();
2449 const unsigned MaskLen = LastElt - FirstElt + 1;
2451 unsigned OldNumElts = VT->getNumElements();
2452 if (MaskLen == OldNumElts && MaskLen != 1)
2455 Type *EltTy = VT->getElementType();
2463 Value *Src =
II.getArgOperand(0);
2468 II.getOperandBundlesAsDefs(OpBundles);
2485 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2486 if (DemandedElts[FirstElt +
I])
2487 ExtractMask[
I] = FirstElt +
I;
2496 for (
unsigned I = 0;
I != MaskLen; ++
I) {
2497 if (DemandedElts[FirstElt +
I])
2498 InsertMask[FirstElt +
I] =
I;
2510 SimplifyAndSetOp)
const {
2511 switch (
II.getIntrinsicID()) {
2512 case Intrinsic::amdgcn_readfirstlane:
2513 SimplifyAndSetOp(&
II, 0, DemandedElts, UndefElts);
2515 case Intrinsic::amdgcn_raw_buffer_load:
2516 case Intrinsic::amdgcn_raw_ptr_buffer_load:
2517 case Intrinsic::amdgcn_raw_buffer_load_format:
2518 case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
2519 case Intrinsic::amdgcn_raw_tbuffer_load:
2520 case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
2521 case Intrinsic::amdgcn_s_buffer_load:
2522 case Intrinsic::amdgcn_struct_buffer_load:
2523 case Intrinsic::amdgcn_struct_ptr_buffer_load:
2524 case Intrinsic::amdgcn_struct_buffer_load_format:
2525 case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
2526 case Intrinsic::amdgcn_struct_tbuffer_load:
2527 case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
2530 if (getAMDGPUImageDMaskIntrinsic(
II.getIntrinsicID())) {
2536 return std::nullopt;
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static Value * createPermlane16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlane16 with the precomputed lane-select halves.
static std::optional< unsigned > matchRowSharePattern(ArrayRef< uint8_t > Ids)
Match a row-share pattern: all 16 lanes of each row read the same source lane.
static bool matchMirrorPattern(ArrayRef< uint8_t > Ids)
Match an N-lane reversal (mirror) pattern.
static bool tryBuildShuffleMap(Value *Index, const GCNSubtarget &ST, SmallVectorImpl< uint8_t > &Ids, const DataLayout &DL)
Build the per-lane shuffle map by evaluating Index for every lane in the wave.
static std::optional< unsigned > matchQuadPermPattern(ArrayRef< uint8_t > Ids)
Match a 4-lane (quad) permutation, encoded as the v_mov_b32_dpp QUAD_PERM control word: bits[1:0]=Ids...
static std::optional< unsigned > matchDsSwizzleRotatePattern(ArrayRef< uint8_t > Ids)
Match a GFX9+ DS_SWIZZLE rotate-mode permutation: a cyclic left-rotation of all 32 lanes within each ...
static std::optional< unsigned > matchHalfRowPermPattern(ArrayRef< uint8_t > Ids)
Match an 8-lane arbitrary permutation, encoded as the v_mov_b32_dpp8 24-bit selector (three bits per ...
static std::optional< unsigned > matchRowXMaskPattern(ArrayRef< uint8_t > Ids)
Match an XOR mask pattern within each 16-lane row: Ids[J] == Mask ^ J, with Mask in [1,...
static constexpr auto matchHalfRowMirrorPattern
static Value * createPermlaneX16(IRBuilderBase &B, Value *Val, uint32_t Lo, uint32_t Hi)
Emit v_permlanex16 with the precomputed lane-select halves.
static bool isRowPattern(ArrayRef< uint8_t > Ids)
Match an N-lane row pattern: each lane in [0, N) reads from a source lane in the same N-lane row,...
static bool canContractSqrtToRsq(const FPMathOperator *SqrtOp)
Return true if it's legal to contract llvm.amdgcn.rcp(llvm.sqrt)
static bool isTriviallyUniform(const Use &U)
Return true if we can easily prove that use U is uniform.
static CallInst * rewriteCall(IRBuilderBase &B, CallInst &Old, Function &NewCallee, ArrayRef< Value * > Ops)
static Value * convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder)
static constexpr auto isFullRowPattern
static constexpr auto isQuadPattern
static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV, Instruction *I)
static uint64_t computePermlane16Masks(ArrayRef< uint8_t > Ids)
Pack a 16-lane permutation into a single 64-bit value: four bits per output lane, lane J in bits [J*4...
static bool matchHalfWaveSwapPattern(ArrayRef< uint8_t > Ids)
Match a half-wave swap: lane J reads from lane J ^ 32.
static bool hasPeriodicLayout(ArrayRef< uint8_t > Ids)
Lanes are partitioned into groups of Period; each group is a translated copy of the first: Ids[I] = I...
static std::optional< Instruction * > tryOptimizeShufflePattern(InstCombiner &IC, IntrinsicInst &II, const GCNSubtarget &ST)
Try to fold a wave_shuffle/ds_bpermute whose lane index is a constant function of the lane ID into a ...
static constexpr auto isHalfRowPattern
static APInt defaultComponentBroadcast(Value *V)
static std::optional< unsigned > matchDsSwizzleBitmaskPattern(ArrayRef< uint8_t > Ids)
Match a DS_SWIZZLE bitmask-mode permutation: dst_lane = ((src_lane & AND) | OR) ^ XOR with each mask ...
static Value * createDsSwizzle(IRBuilderBase &B, Value *Val, unsigned Offset, const DataLayout &DL)
Emit ds_swizzle with the given immediate, bitcasting/converting between pointer/float types and i32 a...
static std::optional< Instruction * > modifyIntrinsicCall(IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr, InstCombiner &IC, std::function< void(SmallVectorImpl< Value * > &, SmallVectorImpl< Type * > &)> Func)
Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with modified arguments (based on ...
static Value * matchShuffleToHWIntrinsic(IRBuilderBase &B, Value *Src, ArrayRef< uint8_t > Ids, const GCNSubtarget &ST, const DataLayout &DL)
Given a shuffle map, try to emit the best hardware intrinsic.
static std::optional< unsigned > matchRowRotatePattern(ArrayRef< uint8_t > Ids)
Match a 16-lane cyclic rotation; returns the rotation amount in [1, 15].
static bool isCrossRowPattern(ArrayRef< uint8_t > Ids)
Match a cross-row permutation suitable for v_permlanex16: every lane in the low 16-lane half reads fr...
static bool isThreadID(const GCNSubtarget &ST, Value *V)
static Value * createUpdateDpp(IRBuilderBase &B, Value *Val, unsigned Ctrl)
Emit v_mov_b32_dpp with the given control word, row/bank masks 0xF, and bound_ctrl=1 so out-of-bounds...
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
static Value * simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, int DMaskIdx=-1, bool IsLoad=true)
Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
static std::optional< Instruction * > simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST, const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr, IntrinsicInst &II, InstCombiner &IC)
static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat)
static Value * createMovDpp8(IRBuilderBase &B, Value *Val, unsigned Selector)
Emit v_mov_b32_dpp8 with the given 24-bit lane selector.
static Value * matchFPExtFromF16(Value *Arg)
Match an fpext from half to float, or a constant we can convert.
static constexpr auto matchFullRowMirrorPattern
static std::optional< unsigned > evalLaneExpr(Value *V, unsigned Lane, const GCNSubtarget &ST, const DataLayout &DL, unsigned Depth=0)
Evaluate V as a function of the lane ID and return its value on Lane, or std::nullopt if V is not a c...
static Value * createPermlane64(IRBuilderBase &B, Value *Val)
Emit v_permlane64 (swap of the two 32-lane halves of a wave64).
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Utilities for dealing with flags related to floating point properties and mode controls.
AMD GCN specific subclass of TargetSubtarget.
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
Provides some synthesis utilities to produce sequences of values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
static constexpr roundingMode rmTowardZero
static constexpr roundingMode rmNearestTiesToEven
static const fltSemantics & IEEEhalf()
static APFloat getQNaN(const fltSemantics &Sem, bool Negative=false, const APInt *payload=nullptr)
Factory for QNaN values.
opStatus divide(const APFloat &RHS, roundingMode RM)
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
bool isPosInfinity() const
const fltSemantics & getSemantics() const
APFloat makeQuiet() const
Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
APInt bitcastToAPInt() const
bool isNegInfinity() const
static APFloat getZero(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Zero.
cmpResult compare(const APFloat &RHS) const
Class for arbitrary precision integers.
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
uint64_t getZExtValue() const
Get zero extended value.
unsigned popcount() const
Count the number of bits set.
LLVM_ABI uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const
unsigned getActiveBits() const
Compute the number of active bits in the value.
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
unsigned countr_zero() const
Count the number of trailing zero bits.
bool isMask(unsigned numBits) const
Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
size_t size() const
Get the array size.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
bool isTypeLegal(Type *Ty) const override
void addFnAttr(Attribute::AttrKind Kind)
Adds the attribute to the function.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
void setAttributes(AttributeList A)
Set the attributes for this call.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
AttributeList getAttributes() const
Return the attributes for this call.
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
bool isFPPredicate() const
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
ConstantFP - Floating Point Values [float, double].
const APFloat & getValueAPF() const
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getNaN(Type *Ty, bool Negative=false, uint64_t Payload=0)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
const APInt & getValue() const
Return the constant as an APInt value reference.
This class represents a range of values.
LLVM_ABI ConstantRange add(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an addition of a value in this ran...
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI ConstantRange intersectWith(const ConstantRange &CR, PreferredRangeType Type=Smallest) const
Return the range that results from the intersection of this range with another range.
This is an important base class in LLVM.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Tagged union holding either a T or a Error.
This class represents an extension of floating point types.
Utility class for floating point operations which can have information about relaxed accuracy require...
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
bool allowContract() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
bool simplifyDemandedLaneMaskArg(InstCombiner &IC, IntrinsicInst &II, unsigned LaneAgIdx) const
Simplify a lane index operand (e.g.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
Instruction * hoistLaneIntrinsicThroughOperand(InstCombiner &IC, IntrinsicInst &II) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const override
KnownIEEEMode fpenvIEEEMode(const Instruction &I) const
Return KnownIEEEMode::On if we know if the use context can assume "amdgpu-ieee"="true" and KnownIEEEM...
Value * simplifyAMDGCNLaneIntrinsicDemanded(InstCombiner &IC, IntrinsicInst &II, const APInt &DemandedElts, APInt &UndefElts) const
bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, const Value *Op1, InstCombiner &IC) const
Common base class shared among various IRBuilders.
CallInst * CreateExtractVector(Type *DstType, Value *SrcVec, Value *Idx, const Twine &Name="")
Create a call to the vector.extract intrinsic.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Value * CreateMaxNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the maxnum intrinsic.
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Value * CreateMaximumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the maximum intrinsic.
Value * CreateMinNum(Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create call to the minnum intrinsic.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Value * CreateFAddFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
Value * CreateMinimumNum(Value *LHS, Value *RHS, const Twine &Name="")
Create call to the minimumnum intrinsic.
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Value * CreateFMulFMF(Value *L, Value *R, FMFSource FMFSource, const Twine &Name="", MDNode *FPMD=nullptr)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
The core instruction combiner logic.
const DataLayout & getDataLayout() const
virtual Instruction * eraseInstFromFunction(Instruction &I)=0
Combiner aware instruction erasure.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
DominatorTree & getDominatorTree() const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
virtual bool SimplifyDemandedBits(Instruction *I, unsigned OpNo, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0)=0
static Value * stripSignOnlyFPOps(Value *Val)
Ignore all operations which only change the sign of a value, returning the underlying magnitude value...
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
A wrapper class for inspecting calls to intrinsic functions.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
A Module instance is used to store all the information related to an LLVM module.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
const ParentTy * getParent() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const ImageDimIntrinsicInfo * getImageDimIntrinsicByBaseOpcode(unsigned BaseOpcode, unsigned Dim)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool isArgPassedInSGPR(const Argument *A)
bool isIntrinsicAlwaysUniform(unsigned IntrID)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
bool match(Val *V, const Pattern &P)
cstfp_pred_ty< is_any_zero_fp > m_AnyZeroFP()
Match a floating-point negative zero or positive zero.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
auto m_Value()
Match an arbitrary value and ignore it.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cstfp_pred_ty< is_finitenonzero > m_FiniteNonZero()
Match a finite non-zero FP constant.
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
auto m_ConstantFP()
Match an arbitrary ConstantFP and ignore it.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
auto dyn_cast_or_null(const Y &Val)
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
constexpr unsigned MaxAnalysisRecursionDepth
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
@ NearestTiesToEven
roundTiesToEven.
LLVM_ABI bool isKnownNeverInfOrNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point value can never contain a NaN or infinity.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
constexpr uint64_t Make_64(uint32_t High, uint32_t Low)
Make a 64-bit integer from a high / low pair of 32-bit integers.
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
bool isConstant() const
Returns true if we know the value of all bits.
const APInt & getConstant() const
Returns the value when all bits have a known value.
SimplifyQuery getWithInstruction(const Instruction *I) const
LLVM_ABI bool isUndefValue(Value *V) const
If CanUseUndef is true, returns whether V is undef.