29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc(
"Enable spilling SGPRs to VGPRs"),
38std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
49 const Twine &ErrMsg) {
122 MI->getOperand(0).isKill(),
Index,
RS) {}
137 MovOpc = AMDGPU::S_MOV_B32;
138 NotOpc = AMDGPU::S_NOT_B32;
141 MovOpc = AMDGPU::S_MOV_B64;
142 NotOpc = AMDGPU::S_NOT_B64;
147 SuperReg != AMDGPU::EXEC &&
"exec should never spill");
178 assert(
RS &&
"Cannot spill SGPR to memory without RegScavenger");
179 TmpVGPR =
RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false,
207 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
227 if (
RS->isRegUsed(AMDGPU::SCC))
229 "unhandled SGPR spill to memory");
239 I->getOperand(2).setIsDead();
274 I->getOperand(2).setIsDead();
303 if (
RS->isRegUsed(AMDGPU::SCC))
305 "unhandled SGPR spill to memory");
330 ST.getAMDGPUDwarfFlavour(),
335 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
336 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
337 (getSubRegIndexLaneMask(AMDGPU::lo16) |
338 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
339 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
340 "getNumCoveredRegs() will not work with generated subreg masks!");
342 RegPressureIgnoredUnits.resize(getNumRegUnits());
343 RegPressureIgnoredUnits.set(
345 for (
auto Reg : AMDGPU::VGPR_16RegClass) {
347 RegPressureIgnoredUnits.set(
348 static_cast<unsigned>(*regunits(Reg).begin()));
354 static auto InitializeRegSplitPartsOnce = [
this]() {
355 for (
unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
356 unsigned Size = getSubRegIdxSize(Idx);
359 std::vector<int16_t> &Vec = RegSplitParts[
Size / 16 - 1];
360 unsigned Pos = getSubRegIdxOffset(Idx);
365 unsigned MaxNumParts = 1024 /
Size;
366 Vec.resize(MaxNumParts);
374 static auto InitializeSubRegFromChannelTableOnce = [
this]() {
375 for (
auto &Row : SubRegFromChannelTable)
376 Row.fill(AMDGPU::NoSubRegister);
377 for (
unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
378 unsigned Width = getSubRegIdxSize(Idx) / 32;
379 unsigned Offset = getSubRegIdxOffset(Idx) / 32;
384 unsigned TableIdx = Width - 1;
385 assert(TableIdx < SubRegFromChannelTable.size());
387 SubRegFromChannelTable[TableIdx][
Offset] = Idx;
391 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
393 InitializeSubRegFromChannelTableOnce);
410 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
411 : CSR_AMDGPU_SaveList;
414 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
415 : CSR_AMDGPU_SI_Gfx_SaveList;
417 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
420 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
421 return &NoCalleeSavedReg;
437 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
438 : CSR_AMDGPU_RegMask;
441 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
442 : CSR_AMDGPU_SI_Gfx_RegMask;
447 return AMDGPU_AllVGPRs_RegMask;
454 return CSR_AMDGPU_NoRegs_RegMask;
458 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
469 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
470 return &AMDGPU::AV_32RegClass;
471 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
472 return &AMDGPU::AV_64RegClass;
473 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
474 RC == &AMDGPU::AReg_64_Align2RegClass)
475 return &AMDGPU::AV_64_Align2RegClass;
476 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
477 return &AMDGPU::AV_96RegClass;
478 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
479 RC == &AMDGPU::AReg_96_Align2RegClass)
480 return &AMDGPU::AV_96_Align2RegClass;
481 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
482 return &AMDGPU::AV_128RegClass;
483 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
484 RC == &AMDGPU::AReg_128_Align2RegClass)
485 return &AMDGPU::AV_128_Align2RegClass;
486 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
487 return &AMDGPU::AV_160RegClass;
488 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
489 RC == &AMDGPU::AReg_160_Align2RegClass)
490 return &AMDGPU::AV_160_Align2RegClass;
491 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
492 return &AMDGPU::AV_192RegClass;
493 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
494 RC == &AMDGPU::AReg_192_Align2RegClass)
495 return &AMDGPU::AV_192_Align2RegClass;
496 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
497 return &AMDGPU::AV_256RegClass;
498 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
499 RC == &AMDGPU::AReg_256_Align2RegClass)
500 return &AMDGPU::AV_256_Align2RegClass;
501 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
502 return &AMDGPU::AV_512RegClass;
503 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
504 RC == &AMDGPU::AReg_512_Align2RegClass)
505 return &AMDGPU::AV_512_Align2RegClass;
506 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
507 return &AMDGPU::AV_1024RegClass;
508 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
509 RC == &AMDGPU::AReg_1024_Align2RegClass)
510 return &AMDGPU::AV_1024_Align2RegClass;
540 return AMDGPU_AllVGPRs_RegMask;
544 return AMDGPU_AllAGPRs_RegMask;
548 return AMDGPU_AllVectorRegs_RegMask;
552 return AMDGPU_AllAllocatableSRegs_RegMask;
559 assert(NumRegIndex &&
"Not implemented");
560 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].
size());
561 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
566 const unsigned Align,
569 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
570 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
588 reserveRegisterTuples(
Reserved, AMDGPU::EXEC);
589 reserveRegisterTuples(
Reserved, AMDGPU::FLAT_SCR);
592 reserveRegisterTuples(
Reserved, AMDGPU::M0);
595 reserveRegisterTuples(
Reserved, AMDGPU::SRC_VCCZ);
596 reserveRegisterTuples(
Reserved, AMDGPU::SRC_EXECZ);
597 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SCC);
600 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_BASE);
601 reserveRegisterTuples(
Reserved, AMDGPU::SRC_SHARED_LIMIT);
602 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_BASE);
603 reserveRegisterTuples(
Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
604 reserveRegisterTuples(
Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_LO);
605 reserveRegisterTuples(
Reserved, AMDGPU::SRC_FLAT_SCRATCH_BASE_HI);
608 reserveRegisterTuples(
Reserved, AMDGPU::ASYNCcnt);
609 reserveRegisterTuples(
Reserved, AMDGPU::TENSORcnt);
612 reserveRegisterTuples(
Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
615 reserveRegisterTuples(
Reserved, AMDGPU::XNACK_MASK);
618 reserveRegisterTuples(
Reserved, AMDGPU::LDS_DIRECT);
621 reserveRegisterTuples(
Reserved, AMDGPU::TBA);
622 reserveRegisterTuples(
Reserved, AMDGPU::TMA);
623 reserveRegisterTuples(
Reserved, AMDGPU::TTMP0_TTMP1);
624 reserveRegisterTuples(
Reserved, AMDGPU::TTMP2_TTMP3);
625 reserveRegisterTuples(
Reserved, AMDGPU::TTMP4_TTMP5);
626 reserveRegisterTuples(
Reserved, AMDGPU::TTMP6_TTMP7);
627 reserveRegisterTuples(
Reserved, AMDGPU::TTMP8_TTMP9);
628 reserveRegisterTuples(
Reserved, AMDGPU::TTMP10_TTMP11);
629 reserveRegisterTuples(
Reserved, AMDGPU::TTMP12_TTMP13);
630 reserveRegisterTuples(
Reserved, AMDGPU::TTMP14_TTMP15);
633 reserveRegisterTuples(
Reserved, AMDGPU::SGPR_NULL64);
637 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
638 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
641 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
644 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
651 if (ScratchRSrcReg != AMDGPU::NoRegister) {
655 reserveRegisterTuples(
Reserved, ScratchRSrcReg);
659 if (LongBranchReservedReg)
660 reserveRegisterTuples(
Reserved, LongBranchReservedReg);
667 reserveRegisterTuples(
Reserved, StackPtrReg);
668 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
673 reserveRegisterTuples(
Reserved, FrameReg);
674 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
679 reserveRegisterTuples(
Reserved, BasePtrReg);
680 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
687 reserveRegisterTuples(
Reserved, ExecCopyReg);
691 auto [MaxNumVGPRs, MaxNumAGPRs] = ST.getMaxNumVectorRegs(MF.
getFunction());
695 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
698 if (Index + NumRegs > MaxNumVGPRs)
705 if (!ST.hasMAIInsts())
709 unsigned NumRegs =
divideCeil(getRegSizeInBits(*RC), 32);
712 if (Index + NumRegs > MaxNumAGPRs)
720 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
728 if (!NonWWMRegMask.
empty()) {
729 for (
unsigned RegI = AMDGPU::VGPR0, RegE = AMDGPU::VGPR0 + MaxNumVGPRs;
730 RegI < RegE; ++RegI) {
731 if (NonWWMRegMask.
test(RegI))
732 reserveRegisterTuples(
Reserved, RegI);
737 reserveRegisterTuples(
Reserved, Reg);
741 reserveRegisterTuples(
Reserved, Reg);
744 reserveRegisterTuples(
Reserved, Reg);
761 if (Info->isBottomOfStack())
769 if (Info->isEntryFunction()) {
802 int OffIdx = AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
803 AMDGPU::OpName::offset);
804 return MI->getOperand(OffIdx).getImm();
809 switch (
MI->getOpcode()) {
810 case AMDGPU::V_ADD_U32_e32:
811 case AMDGPU::V_ADD_U32_e64:
812 case AMDGPU::V_ADD_CO_U32_e32: {
813 int OtherIdx = Idx == 1 ? 2 : 1;
817 case AMDGPU::V_ADD_CO_U32_e64: {
818 int OtherIdx = Idx == 2 ? 3 : 2;
829 assert((Idx == AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
830 AMDGPU::OpName::vaddr) ||
831 (Idx == AMDGPU::getNamedOperandIdx(
MI->getOpcode(),
832 AMDGPU::OpName::saddr))) &&
833 "Should never see frame index on non-address operand");
845 return Src1.
isImm() || (Src1.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
850 return Src0.
isImm() || (Src0.
isReg() &&
TRI.isVGPR(
MI.getMF()->getRegInfo(),
859 switch (
MI->getOpcode()) {
860 case AMDGPU::V_ADD_U32_e32: {
863 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e32) < 2 &&
868 case AMDGPU::V_ADD_U32_e64:
878 return !ST.hasFlatScratchEnabled();
879 case AMDGPU::V_ADD_CO_U32_e32:
880 if (ST.getConstantBusLimit(AMDGPU::V_ADD_CO_U32_e32) < 2 &&
885 return MI->getOperand(3).isDead();
886 case AMDGPU::V_ADD_CO_U32_e64:
888 return MI->getOperand(1).isDead();
900 return !
TII->isLegalMUBUFImmOffset(FullOffset);
912 if (Ins !=
MBB->end())
913 DL = Ins->getDebugLoc();
919 ST.hasFlatScratchEnabled() ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
922 ST.hasFlatScratchEnabled() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
923 : &AMDGPU::VGPR_32RegClass);
934 ? &AMDGPU::SReg_32_XM0RegClass
935 : &AMDGPU::VGPR_32RegClass);
942 if (ST.hasFlatScratchEnabled()) {
951 TII->getAddNoCarry(*
MBB, Ins,
DL, BaseReg)
963 switch (
MI.getOpcode()) {
964 case AMDGPU::V_ADD_U32_e32:
965 case AMDGPU::V_ADD_CO_U32_e32: {
971 if (!ImmOp->
isImm()) {
974 TII->legalizeOperandsVOP2(
MI.getMF()->getRegInfo(),
MI);
979 if (TotalOffset == 0) {
980 MI.setDesc(
TII->get(AMDGPU::COPY));
981 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
984 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
988 ImmOp->
setImm(TotalOffset);
1003 MI.getOperand(2).ChangeToRegister(BaseRegVGPR,
false);
1005 MI.getOperand(2).ChangeToRegister(BaseReg,
false);
1009 case AMDGPU::V_ADD_U32_e64:
1010 case AMDGPU::V_ADD_CO_U32_e64: {
1011 int Src0Idx =
MI.getNumExplicitDefs();
1017 if (!ImmOp->
isImm()) {
1019 TII->legalizeOperandsVOP3(
MI.getMF()->getRegInfo(),
MI);
1024 if (TotalOffset == 0) {
1025 MI.setDesc(
TII->get(AMDGPU::COPY));
1027 for (
unsigned I =
MI.getNumOperands() - 1;
I != 1; --
I)
1028 MI.removeOperand(
I);
1030 MI.getOperand(1).ChangeToRegister(BaseReg,
false);
1033 ImmOp->
setImm(TotalOffset);
1042 bool IsFlat =
TII->isFLATScratch(
MI);
1046 bool SeenFI =
false;
1058 TII->getNamedOperand(
MI, IsFlat ? AMDGPU::OpName::saddr
1059 : AMDGPU::OpName::vaddr);
1064 assert(FIOp && FIOp->
isFI() &&
"frame index must be address operand");
1070 "offset should be legal");
1081 assert(
TII->isLegalMUBUFImmOffset(NewOffset) &&
"offset should be legal");
1091 switch (
MI->getOpcode()) {
1092 case AMDGPU::V_ADD_U32_e32:
1093 case AMDGPU::V_ADD_CO_U32_e32:
1095 case AMDGPU::V_ADD_U32_e64:
1096 case AMDGPU::V_ADD_CO_U32_e64:
1109 return TII->isLegalMUBUFImmOffset(NewOffset);
1120 return &AMDGPU::VGPR_32RegClass;
1125 return RC == &AMDGPU::SCC_CLASSRegClass ? &AMDGPU::SReg_32RegClass : RC;
1131 unsigned Op =
MI.getOpcode();
1133 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE:
1134 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE:
1139 (
uint64_t)
TII->getNamedOperand(
MI, AMDGPU::OpName::mask)->getImm());
1140 case AMDGPU::SI_SPILL_S1024_SAVE:
1141 case AMDGPU::SI_SPILL_S1024_RESTORE:
1142 case AMDGPU::SI_SPILL_V1024_SAVE:
1143 case AMDGPU::SI_SPILL_V1024_RESTORE:
1144 case AMDGPU::SI_SPILL_A1024_SAVE:
1145 case AMDGPU::SI_SPILL_A1024_RESTORE:
1146 case AMDGPU::SI_SPILL_AV1024_SAVE:
1147 case AMDGPU::SI_SPILL_AV1024_RESTORE:
1149 case AMDGPU::SI_SPILL_S512_SAVE:
1150 case AMDGPU::SI_SPILL_S512_RESTORE:
1151 case AMDGPU::SI_SPILL_V512_SAVE:
1152 case AMDGPU::SI_SPILL_V512_RESTORE:
1153 case AMDGPU::SI_SPILL_A512_SAVE:
1154 case AMDGPU::SI_SPILL_A512_RESTORE:
1155 case AMDGPU::SI_SPILL_AV512_SAVE:
1156 case AMDGPU::SI_SPILL_AV512_RESTORE:
1158 case AMDGPU::SI_SPILL_S384_SAVE:
1159 case AMDGPU::SI_SPILL_S384_RESTORE:
1160 case AMDGPU::SI_SPILL_V384_SAVE:
1161 case AMDGPU::SI_SPILL_V384_RESTORE:
1162 case AMDGPU::SI_SPILL_A384_SAVE:
1163 case AMDGPU::SI_SPILL_A384_RESTORE:
1164 case AMDGPU::SI_SPILL_AV384_SAVE:
1165 case AMDGPU::SI_SPILL_AV384_RESTORE:
1167 case AMDGPU::SI_SPILL_S352_SAVE:
1168 case AMDGPU::SI_SPILL_S352_RESTORE:
1169 case AMDGPU::SI_SPILL_V352_SAVE:
1170 case AMDGPU::SI_SPILL_V352_RESTORE:
1171 case AMDGPU::SI_SPILL_A352_SAVE:
1172 case AMDGPU::SI_SPILL_A352_RESTORE:
1173 case AMDGPU::SI_SPILL_AV352_SAVE:
1174 case AMDGPU::SI_SPILL_AV352_RESTORE:
1176 case AMDGPU::SI_SPILL_S320_SAVE:
1177 case AMDGPU::SI_SPILL_S320_RESTORE:
1178 case AMDGPU::SI_SPILL_V320_SAVE:
1179 case AMDGPU::SI_SPILL_V320_RESTORE:
1180 case AMDGPU::SI_SPILL_A320_SAVE:
1181 case AMDGPU::SI_SPILL_A320_RESTORE:
1182 case AMDGPU::SI_SPILL_AV320_SAVE:
1183 case AMDGPU::SI_SPILL_AV320_RESTORE:
1185 case AMDGPU::SI_SPILL_S288_SAVE:
1186 case AMDGPU::SI_SPILL_S288_RESTORE:
1187 case AMDGPU::SI_SPILL_V288_SAVE:
1188 case AMDGPU::SI_SPILL_V288_RESTORE:
1189 case AMDGPU::SI_SPILL_A288_SAVE:
1190 case AMDGPU::SI_SPILL_A288_RESTORE:
1191 case AMDGPU::SI_SPILL_AV288_SAVE:
1192 case AMDGPU::SI_SPILL_AV288_RESTORE:
1194 case AMDGPU::SI_SPILL_S256_SAVE:
1195 case AMDGPU::SI_SPILL_S256_RESTORE:
1196 case AMDGPU::SI_SPILL_V256_SAVE:
1197 case AMDGPU::SI_SPILL_V256_RESTORE:
1198 case AMDGPU::SI_SPILL_A256_SAVE:
1199 case AMDGPU::SI_SPILL_A256_RESTORE:
1200 case AMDGPU::SI_SPILL_AV256_SAVE:
1201 case AMDGPU::SI_SPILL_AV256_RESTORE:
1203 case AMDGPU::SI_SPILL_S224_SAVE:
1204 case AMDGPU::SI_SPILL_S224_RESTORE:
1205 case AMDGPU::SI_SPILL_V224_SAVE:
1206 case AMDGPU::SI_SPILL_V224_RESTORE:
1207 case AMDGPU::SI_SPILL_A224_SAVE:
1208 case AMDGPU::SI_SPILL_A224_RESTORE:
1209 case AMDGPU::SI_SPILL_AV224_SAVE:
1210 case AMDGPU::SI_SPILL_AV224_RESTORE:
1212 case AMDGPU::SI_SPILL_S192_SAVE:
1213 case AMDGPU::SI_SPILL_S192_RESTORE:
1214 case AMDGPU::SI_SPILL_V192_SAVE:
1215 case AMDGPU::SI_SPILL_V192_RESTORE:
1216 case AMDGPU::SI_SPILL_A192_SAVE:
1217 case AMDGPU::SI_SPILL_A192_RESTORE:
1218 case AMDGPU::SI_SPILL_AV192_SAVE:
1219 case AMDGPU::SI_SPILL_AV192_RESTORE:
1221 case AMDGPU::SI_SPILL_S160_SAVE:
1222 case AMDGPU::SI_SPILL_S160_RESTORE:
1223 case AMDGPU::SI_SPILL_V160_SAVE:
1224 case AMDGPU::SI_SPILL_V160_RESTORE:
1225 case AMDGPU::SI_SPILL_A160_SAVE:
1226 case AMDGPU::SI_SPILL_A160_RESTORE:
1227 case AMDGPU::SI_SPILL_AV160_SAVE:
1228 case AMDGPU::SI_SPILL_AV160_RESTORE:
1230 case AMDGPU::SI_SPILL_S128_SAVE:
1231 case AMDGPU::SI_SPILL_S128_RESTORE:
1232 case AMDGPU::SI_SPILL_V128_SAVE:
1233 case AMDGPU::SI_SPILL_V128_RESTORE:
1234 case AMDGPU::SI_SPILL_A128_SAVE:
1235 case AMDGPU::SI_SPILL_A128_RESTORE:
1236 case AMDGPU::SI_SPILL_AV128_SAVE:
1237 case AMDGPU::SI_SPILL_AV128_RESTORE:
1239 case AMDGPU::SI_SPILL_S96_SAVE:
1240 case AMDGPU::SI_SPILL_S96_RESTORE:
1241 case AMDGPU::SI_SPILL_V96_SAVE:
1242 case AMDGPU::SI_SPILL_V96_RESTORE:
1243 case AMDGPU::SI_SPILL_A96_SAVE:
1244 case AMDGPU::SI_SPILL_A96_RESTORE:
1245 case AMDGPU::SI_SPILL_AV96_SAVE:
1246 case AMDGPU::SI_SPILL_AV96_RESTORE:
1248 case AMDGPU::SI_SPILL_S64_SAVE:
1249 case AMDGPU::SI_SPILL_S64_RESTORE:
1250 case AMDGPU::SI_SPILL_V64_SAVE:
1251 case AMDGPU::SI_SPILL_V64_RESTORE:
1252 case AMDGPU::SI_SPILL_A64_SAVE:
1253 case AMDGPU::SI_SPILL_A64_RESTORE:
1254 case AMDGPU::SI_SPILL_AV64_SAVE:
1255 case AMDGPU::SI_SPILL_AV64_RESTORE:
1257 case AMDGPU::SI_SPILL_S32_SAVE:
1258 case AMDGPU::SI_SPILL_S32_RESTORE:
1259 case AMDGPU::SI_SPILL_V32_SAVE:
1260 case AMDGPU::SI_SPILL_V32_RESTORE:
1261 case AMDGPU::SI_SPILL_A32_SAVE:
1262 case AMDGPU::SI_SPILL_A32_RESTORE:
1263 case AMDGPU::SI_SPILL_AV32_SAVE:
1264 case AMDGPU::SI_SPILL_AV32_RESTORE:
1265 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1266 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1267 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1268 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1269 case AMDGPU::SI_SPILL_V16_SAVE:
1270 case AMDGPU::SI_SPILL_V16_RESTORE:
1278 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1279 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1280 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1281 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1282 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1283 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1284 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1285 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1286 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1287 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1288 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1289 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1290 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1291 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1292 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1293 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1301 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1302 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1303 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1304 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1305 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1306 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1307 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1308 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1309 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1310 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1311 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1312 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1313 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1314 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1315 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1316 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1317 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1318 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1319 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1320 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1321 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1322 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1323 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1324 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1325 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1326 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1327 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1328 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1336 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1337 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1338 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1339 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1340 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1341 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1342 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1343 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1344 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1345 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1346 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1347 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1348 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1349 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1350 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1351 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1359 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1360 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1361 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1362 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1363 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1364 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1365 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1366 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1367 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1368 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1369 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1370 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1371 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1372 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1373 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1374 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1375 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1376 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1377 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1378 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1379 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1380 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1381 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1382 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1383 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1384 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1385 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1386 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1395 int Index,
unsigned Lane,
1396 unsigned ValueReg,
bool IsKill) {
1403 if (
Reg == AMDGPU::NoRegister)
1406 bool IsStore =
MI->mayStore();
1410 unsigned Dst = IsStore ?
Reg : ValueReg;
1411 unsigned Src = IsStore ? ValueReg :
Reg;
1412 bool IsVGPR =
TRI->isVGPR(MRI,
Reg);
1414 if (IsVGPR ==
TRI->isVGPR(MRI, ValueReg)) {
1424 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1425 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1443 bool IsStore =
MI->mayStore();
1445 unsigned Opc =
MI->getOpcode();
1446 int LoadStoreOp = IsStore ?
1448 if (LoadStoreOp == -1)
1458 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::srsrc))
1459 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset))
1466 AMDGPU::OpName::vdata_in);
1468 NewMI.
add(*VDataIn);
1473 unsigned LoadStoreOp,
1475 bool IsStore =
TII->get(LoadStoreOp).mayStore();
1481 if (
TII->isBlockLoadStore(LoadStoreOp))
1486 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1487 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1490 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1491 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1494 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1495 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1498 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1499 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1515 unsigned LoadStoreOp,
int Index,
Register ValueReg,
bool IsKill,
1518 assert((!RS || !LiveUnits) &&
"Only RS or LiveUnits can be set but not both");
1526 bool IsStore =
Desc->mayStore();
1527 bool IsFlat =
TII->isFLATScratch(LoadStoreOp);
1528 bool IsBlock =
TII->isBlockLoadStore(LoadStoreOp);
1530 bool CanClobberSCC =
false;
1531 bool Scavenged =
false;
1536 const bool IsAGPR = !ST.hasGFX90AInsts() &&
isAGPRClass(RC);
1543 bool IsRegMisaligned =
false;
1544 if (!IsBlock && RegWidth > 4) {
1545 unsigned SpillOpcode =
1548 IsStore ? AMDGPU::getNamedOperandIdx(SpillOpcode, AMDGPU::OpName::vdata)
1551 TII->getRegClass(
TII->get(SpillOpcode), VDataIdx);
1552 if (!ExpectedRC->
contains(ValueReg)) {
1556 getMatchingSuperRegClass(RC, ExpectedRC, SubIdx);
1557 if (!MatchRC || !MatchRC->
contains(ValueReg))
1558 IsRegMisaligned =
true;
1565 unsigned EltSize = IsBlock ? RegWidth
1566 : (IsFlat && !IsAGPR && !IsRegMisaligned)
1567 ? std::min(RegWidth, 16u)
1569 unsigned NumSubRegs = RegWidth / EltSize;
1570 unsigned Size = NumSubRegs * EltSize;
1571 unsigned RemSize = RegWidth -
Size;
1572 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1574 int64_t MaterializedOffset =
Offset;
1579 int64_t MaxOffset =
Offset +
Size - (RemSize ? 0 : EltSize);
1580 int64_t ScratchOffsetRegDelta = 0;
1582 if (IsFlat && EltSize > 4) {
1584 Desc = &
TII->get(LoadStoreOp);
1591 "unexpected VGPR spill offset");
1598 bool UseVGPROffset =
false;
1605 if (IsFlat && SGPRBase) {
1610 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1629 bool IsOffsetLegal =
1632 :
TII->isLegalMUBUFImmOffset(MaxOffset);
1633 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1641 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass,
MI,
false, 0,
false);
1644 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1645 }
else if (LiveUnits) {
1646 CanClobberSCC = LiveUnits->
available(AMDGPU::SCC);
1647 for (
MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1655 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1659 UseVGPROffset =
true;
1662 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false, 0);
1665 for (
MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1667 TmpOffsetVGPR = Reg;
1674 }
else if (!SOffset && CanClobberSCC) {
1685 if (!ScratchOffsetReg)
1687 SOffset = ScratchOffsetReg;
1688 ScratchOffsetRegDelta =
Offset;
1696 if (!IsFlat && !UseVGPROffset)
1697 Offset *= ST.getWavefrontSize();
1699 if (!UseVGPROffset && !SOffset)
1702 if (UseVGPROffset) {
1704 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR,
Offset);
1705 }
else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1710 .
addReg(ScratchOffsetReg)
1712 Add->getOperand(3).setIsDead();
1718 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1719 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1720 &&
"Unexpected vaddr for flat scratch with a FI operand");
1722 if (UseVGPROffset) {
1725 assert(ST.hasFlatScratchSTMode());
1726 assert(!
TII->isBlockLoadStore(LoadStoreOp) &&
"Block ops don't have ST");
1730 Desc = &
TII->get(LoadStoreOp);
1733 for (
unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1734 ++i, RegOffset += EltSize) {
1735 if (i == NumSubRegs) {
1739 Desc = &
TII->get(LoadStoreOp);
1741 if (!IsFlat && UseVGPROffset) {
1744 Desc = &
TII->get(NewLoadStoreOp);
1747 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1754 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1757 unsigned NumRegs = EltSize / 4;
1765 const bool IsLastSubReg = i + 1 == e;
1766 const bool IsFirstSubReg = i == 0;
1775 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1776 bool NeedSuperRegImpOperand = e > 1;
1780 unsigned RemEltSize = EltSize;
1788 for (
int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1789 LaneE = RegOffset / 4;
1790 Lane >= LaneE; --Lane) {
1791 bool IsSubReg = e > 1 || EltSize > 4;
1796 if (!MIB.getInstr())
1798 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1800 NeedSuperRegDef =
false;
1802 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1803 NeedSuperRegImpOperand =
true;
1805 if (!IsLastSubReg || (Lane != LaneE))
1807 if (!IsFirstSubReg || (Lane != LaneS))
1817 if (RemEltSize != EltSize) {
1818 assert(IsFlat && EltSize > 4);
1820 unsigned NumRegs = RemEltSize / 4;
1821 SubReg =
Register(getSubReg(ValueReg,
1827 unsigned FinalReg = SubReg;
1832 if (!TmpIntermediateVGPR) {
1838 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1839 TmpIntermediateVGPR)
1841 if (NeedSuperRegDef)
1843 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1847 SubReg = TmpIntermediateVGPR;
1848 }
else if (UseVGPROffset) {
1849 if (!TmpOffsetVGPR) {
1850 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
1852 RS->setRegUsed(TmpOffsetVGPR);
1857 if (LoadStoreOp == AMDGPU::SCRATCH_LOAD_USHORT_SADDR) {
1861 RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false, 0);
1877 if (UseVGPROffset) {
1886 if (SOffset == AMDGPU::NoRegister) {
1888 if (UseVGPROffset && ScratchOffsetReg) {
1889 MIB.addReg(ScratchOffsetReg);
1896 MIB.addReg(SOffset, SOffsetRegState);
1906 MIB.addMemOperand(NewMMO);
1908 if (FinalValueReg != ValueReg) {
1910 ValueReg = getSubReg(ValueReg, AMDGPU::lo16);
1916 ValueReg = FinalValueReg;
1919 if (!IsAGPR && NeedSuperRegDef)
1922 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1930 bool PartialReloadCopy = (RemEltSize != EltSize) && !IsStore;
1931 if (NeedSuperRegImpOperand &&
1932 (IsFirstSubReg || (IsLastSubReg && !IsSrcDstDef))) {
1934 if (PartialReloadCopy)
1959 if (!IsStore &&
MI !=
MBB.end() &&
MI->isReturn() &&
1960 MI->readsRegister(SubReg,
this)) {
1962 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1970 if (!IsStore &&
TII->isBlockLoadStore(LoadStoreOp))
1974 if (ScratchOffsetRegDelta != 0) {
1978 .
addImm(-ScratchOffsetRegDelta);
1987 Register BaseVGPR = getSubReg(BlockReg, AMDGPU::sub0);
1988 for (
unsigned RegOffset = 1; RegOffset < 32; ++RegOffset)
1989 if (!(Mask & (1 << RegOffset)) &&
1990 isCalleeSavedPhysReg(BaseVGPR + RegOffset, *MF))
1996 bool IsKill)
const {
2006 Align Alignment = FrameInfo.getObjectAlign(Index);
2013 unsigned Opc = ST.hasFlatScratchEnabled()
2014 ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2015 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2019 unsigned Opc = ST.hasFlatScratchEnabled()
2020 ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2021 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2032 bool SpillToPhysVGPRLane)
const {
2033 assert(!
MI->getOperand(0).isUndef() &&
2034 "undef spill should have been deleted earlier");
2041 bool SpillToVGPR = !VGPRSpills.
empty();
2042 if (OnlyToVGPR && !SpillToVGPR)
2055 "Num of SGPRs spilled should be less than or equal to num of "
2058 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
2065 bool IsFirstSubreg = i == 0;
2067 bool UseKill = SB.
IsKill && IsLastSubreg;
2073 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
2090 if (SB.
NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
2110 for (
unsigned i =
Offset * PVD.PerVGPR,
2120 SB.
TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.
TmpVGPR)
2121 .
addReg(SubReg, SubKillState)
2151 MI->eraseFromParent();
2163 bool SpillToPhysVGPRLane)
const {
2169 bool SpillToVGPR = !VGPRSpills.
empty();
2170 if (OnlyToVGPR && !SpillToVGPR)
2174 for (
unsigned i = 0, e = SB.
NumSubRegs; i < e; ++i) {
2182 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
2205 for (
unsigned i =
Offset * PVD.PerVGPR,
2213 bool LastSubReg = (i + 1 == e);
2215 SB.
TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
2232 MI->eraseFromParent();
2252 for (
unsigned i =
Offset * PVD.PerVGPR,
2263 .
addReg(SubReg, SubKillState)
2281 MI = RestoreMBB.
end();
2287 for (
unsigned i =
Offset * PVD.PerVGPR,
2296 bool LastSubReg = (i + 1 == e);
2317 switch (
MI->getOpcode()) {
2318 case AMDGPU::SI_SPILL_S1024_SAVE:
2319 case AMDGPU::SI_SPILL_S512_SAVE:
2320 case AMDGPU::SI_SPILL_S384_SAVE:
2321 case AMDGPU::SI_SPILL_S352_SAVE:
2322 case AMDGPU::SI_SPILL_S320_SAVE:
2323 case AMDGPU::SI_SPILL_S288_SAVE:
2324 case AMDGPU::SI_SPILL_S256_SAVE:
2325 case AMDGPU::SI_SPILL_S224_SAVE:
2326 case AMDGPU::SI_SPILL_S192_SAVE:
2327 case AMDGPU::SI_SPILL_S160_SAVE:
2328 case AMDGPU::SI_SPILL_S128_SAVE:
2329 case AMDGPU::SI_SPILL_S96_SAVE:
2330 case AMDGPU::SI_SPILL_S64_SAVE:
2331 case AMDGPU::SI_SPILL_S32_SAVE:
2332 return spillSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2333 case AMDGPU::SI_SPILL_S1024_RESTORE:
2334 case AMDGPU::SI_SPILL_S512_RESTORE:
2335 case AMDGPU::SI_SPILL_S384_RESTORE:
2336 case AMDGPU::SI_SPILL_S352_RESTORE:
2337 case AMDGPU::SI_SPILL_S320_RESTORE:
2338 case AMDGPU::SI_SPILL_S288_RESTORE:
2339 case AMDGPU::SI_SPILL_S256_RESTORE:
2340 case AMDGPU::SI_SPILL_S224_RESTORE:
2341 case AMDGPU::SI_SPILL_S192_RESTORE:
2342 case AMDGPU::SI_SPILL_S160_RESTORE:
2343 case AMDGPU::SI_SPILL_S128_RESTORE:
2344 case AMDGPU::SI_SPILL_S96_RESTORE:
2345 case AMDGPU::SI_SPILL_S64_RESTORE:
2346 case AMDGPU::SI_SPILL_S32_RESTORE:
2347 return restoreSGPR(
MI, FI, RS, Indexes, LIS,
true, SpillToPhysVGPRLane);
2354 int SPAdj,
unsigned FIOperandNum,
2363 assert(SPAdj == 0 &&
"unhandled SP adjustment in call sequence?");
2366 "unreserved scratch RSRC register");
2369 int Index =
MI->getOperand(FIOperandNum).getIndex();
2375 switch (
MI->getOpcode()) {
2377 case AMDGPU::SI_SPILL_S1024_SAVE:
2378 case AMDGPU::SI_SPILL_S512_SAVE:
2379 case AMDGPU::SI_SPILL_S384_SAVE:
2380 case AMDGPU::SI_SPILL_S352_SAVE:
2381 case AMDGPU::SI_SPILL_S320_SAVE:
2382 case AMDGPU::SI_SPILL_S288_SAVE:
2383 case AMDGPU::SI_SPILL_S256_SAVE:
2384 case AMDGPU::SI_SPILL_S224_SAVE:
2385 case AMDGPU::SI_SPILL_S192_SAVE:
2386 case AMDGPU::SI_SPILL_S160_SAVE:
2387 case AMDGPU::SI_SPILL_S128_SAVE:
2388 case AMDGPU::SI_SPILL_S96_SAVE:
2389 case AMDGPU::SI_SPILL_S64_SAVE:
2390 case AMDGPU::SI_SPILL_S32_SAVE: {
2395 case AMDGPU::SI_SPILL_S1024_RESTORE:
2396 case AMDGPU::SI_SPILL_S512_RESTORE:
2397 case AMDGPU::SI_SPILL_S384_RESTORE:
2398 case AMDGPU::SI_SPILL_S352_RESTORE:
2399 case AMDGPU::SI_SPILL_S320_RESTORE:
2400 case AMDGPU::SI_SPILL_S288_RESTORE:
2401 case AMDGPU::SI_SPILL_S256_RESTORE:
2402 case AMDGPU::SI_SPILL_S224_RESTORE:
2403 case AMDGPU::SI_SPILL_S192_RESTORE:
2404 case AMDGPU::SI_SPILL_S160_RESTORE:
2405 case AMDGPU::SI_SPILL_S128_RESTORE:
2406 case AMDGPU::SI_SPILL_S96_RESTORE:
2407 case AMDGPU::SI_SPILL_S64_RESTORE:
2408 case AMDGPU::SI_SPILL_S32_RESTORE: {
2413 case AMDGPU::SI_BLOCK_SPILL_V1024_SAVE: {
2417 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::mask));
2420 case AMDGPU::SI_SPILL_V1024_SAVE:
2421 case AMDGPU::SI_SPILL_V512_SAVE:
2422 case AMDGPU::SI_SPILL_V384_SAVE:
2423 case AMDGPU::SI_SPILL_V352_SAVE:
2424 case AMDGPU::SI_SPILL_V320_SAVE:
2425 case AMDGPU::SI_SPILL_V288_SAVE:
2426 case AMDGPU::SI_SPILL_V256_SAVE:
2427 case AMDGPU::SI_SPILL_V224_SAVE:
2428 case AMDGPU::SI_SPILL_V192_SAVE:
2429 case AMDGPU::SI_SPILL_V160_SAVE:
2430 case AMDGPU::SI_SPILL_V128_SAVE:
2431 case AMDGPU::SI_SPILL_V96_SAVE:
2432 case AMDGPU::SI_SPILL_V64_SAVE:
2433 case AMDGPU::SI_SPILL_V32_SAVE:
2434 case AMDGPU::SI_SPILL_V16_SAVE:
2435 case AMDGPU::SI_SPILL_A1024_SAVE:
2436 case AMDGPU::SI_SPILL_A512_SAVE:
2437 case AMDGPU::SI_SPILL_A384_SAVE:
2438 case AMDGPU::SI_SPILL_A352_SAVE:
2439 case AMDGPU::SI_SPILL_A320_SAVE:
2440 case AMDGPU::SI_SPILL_A288_SAVE:
2441 case AMDGPU::SI_SPILL_A256_SAVE:
2442 case AMDGPU::SI_SPILL_A224_SAVE:
2443 case AMDGPU::SI_SPILL_A192_SAVE:
2444 case AMDGPU::SI_SPILL_A160_SAVE:
2445 case AMDGPU::SI_SPILL_A128_SAVE:
2446 case AMDGPU::SI_SPILL_A96_SAVE:
2447 case AMDGPU::SI_SPILL_A64_SAVE:
2448 case AMDGPU::SI_SPILL_A32_SAVE:
2449 case AMDGPU::SI_SPILL_AV1024_SAVE:
2450 case AMDGPU::SI_SPILL_AV512_SAVE:
2451 case AMDGPU::SI_SPILL_AV384_SAVE:
2452 case AMDGPU::SI_SPILL_AV352_SAVE:
2453 case AMDGPU::SI_SPILL_AV320_SAVE:
2454 case AMDGPU::SI_SPILL_AV288_SAVE:
2455 case AMDGPU::SI_SPILL_AV256_SAVE:
2456 case AMDGPU::SI_SPILL_AV224_SAVE:
2457 case AMDGPU::SI_SPILL_AV192_SAVE:
2458 case AMDGPU::SI_SPILL_AV160_SAVE:
2459 case AMDGPU::SI_SPILL_AV128_SAVE:
2460 case AMDGPU::SI_SPILL_AV96_SAVE:
2461 case AMDGPU::SI_SPILL_AV64_SAVE:
2462 case AMDGPU::SI_SPILL_AV32_SAVE:
2463 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2464 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2466 AMDGPU::OpName::vdata);
2468 MI->eraseFromParent();
2472 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2476 if (
MI->getOpcode() == AMDGPU::SI_SPILL_V16_SAVE) {
2477 assert(ST.hasFlatScratchEnabled() &&
"Flat Scratch is not enabled!");
2478 Opc = AMDGPU::SCRATCH_STORE_SHORT_SADDR_t16;
2480 Opc =
MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_SAVE
2481 ? AMDGPU::SCRATCH_STORE_BLOCK_SADDR
2482 : ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2483 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2486 auto *
MBB =
MI->getParent();
2487 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2488 if (IsWWMRegSpill) {
2490 RS->isRegUsed(AMDGPU::SCC));
2494 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2495 *
MI->memoperands_begin(), RS);
2500 MI->eraseFromParent();
2503 case AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE: {
2507 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::mask));
2510 case AMDGPU::SI_SPILL_V16_RESTORE:
2511 case AMDGPU::SI_SPILL_V32_RESTORE:
2512 case AMDGPU::SI_SPILL_V64_RESTORE:
2513 case AMDGPU::SI_SPILL_V96_RESTORE:
2514 case AMDGPU::SI_SPILL_V128_RESTORE:
2515 case AMDGPU::SI_SPILL_V160_RESTORE:
2516 case AMDGPU::SI_SPILL_V192_RESTORE:
2517 case AMDGPU::SI_SPILL_V224_RESTORE:
2518 case AMDGPU::SI_SPILL_V256_RESTORE:
2519 case AMDGPU::SI_SPILL_V288_RESTORE:
2520 case AMDGPU::SI_SPILL_V320_RESTORE:
2521 case AMDGPU::SI_SPILL_V352_RESTORE:
2522 case AMDGPU::SI_SPILL_V384_RESTORE:
2523 case AMDGPU::SI_SPILL_V512_RESTORE:
2524 case AMDGPU::SI_SPILL_V1024_RESTORE:
2525 case AMDGPU::SI_SPILL_A32_RESTORE:
2526 case AMDGPU::SI_SPILL_A64_RESTORE:
2527 case AMDGPU::SI_SPILL_A96_RESTORE:
2528 case AMDGPU::SI_SPILL_A128_RESTORE:
2529 case AMDGPU::SI_SPILL_A160_RESTORE:
2530 case AMDGPU::SI_SPILL_A192_RESTORE:
2531 case AMDGPU::SI_SPILL_A224_RESTORE:
2532 case AMDGPU::SI_SPILL_A256_RESTORE:
2533 case AMDGPU::SI_SPILL_A288_RESTORE:
2534 case AMDGPU::SI_SPILL_A320_RESTORE:
2535 case AMDGPU::SI_SPILL_A352_RESTORE:
2536 case AMDGPU::SI_SPILL_A384_RESTORE:
2537 case AMDGPU::SI_SPILL_A512_RESTORE:
2538 case AMDGPU::SI_SPILL_A1024_RESTORE:
2539 case AMDGPU::SI_SPILL_AV32_RESTORE:
2540 case AMDGPU::SI_SPILL_AV64_RESTORE:
2541 case AMDGPU::SI_SPILL_AV96_RESTORE:
2542 case AMDGPU::SI_SPILL_AV128_RESTORE:
2543 case AMDGPU::SI_SPILL_AV160_RESTORE:
2544 case AMDGPU::SI_SPILL_AV192_RESTORE:
2545 case AMDGPU::SI_SPILL_AV224_RESTORE:
2546 case AMDGPU::SI_SPILL_AV256_RESTORE:
2547 case AMDGPU::SI_SPILL_AV288_RESTORE:
2548 case AMDGPU::SI_SPILL_AV320_RESTORE:
2549 case AMDGPU::SI_SPILL_AV352_RESTORE:
2550 case AMDGPU::SI_SPILL_AV384_RESTORE:
2551 case AMDGPU::SI_SPILL_AV512_RESTORE:
2552 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2553 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2554 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2556 AMDGPU::OpName::vdata);
2557 assert(
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset)->getReg() ==
2561 if (
MI->getOpcode() == AMDGPU::SI_SPILL_V16_RESTORE) {
2562 assert(ST.hasFlatScratchEnabled() &&
"Flat Scratch is not enabled!");
2563 Opc = ST.d16PreservesUnusedBits()
2564 ? AMDGPU::SCRATCH_LOAD_SHORT_D16_SADDR_t16
2565 : AMDGPU::SCRATCH_LOAD_USHORT_SADDR;
2567 Opc =
MI->getOpcode() == AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE
2568 ? AMDGPU::SCRATCH_LOAD_BLOCK_SADDR
2569 : ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2570 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2573 auto *
MBB =
MI->getParent();
2574 bool IsWWMRegSpill =
TII->isWWMRegSpillOpcode(
MI->getOpcode());
2575 if (IsWWMRegSpill) {
2577 RS->isRegUsed(AMDGPU::SCC));
2582 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm(),
2583 *
MI->memoperands_begin(), RS);
2588 MI->eraseFromParent();
2591 case AMDGPU::V_ADD_U32_e32:
2592 case AMDGPU::V_ADD_U32_e64:
2593 case AMDGPU::V_ADD_CO_U32_e32:
2594 case AMDGPU::V_ADD_CO_U32_e64: {
2596 unsigned NumDefs =
MI->getNumExplicitDefs();
2597 unsigned Src0Idx = NumDefs;
2599 bool HasClamp =
false;
2602 switch (
MI->getOpcode()) {
2603 case AMDGPU::V_ADD_U32_e32:
2605 case AMDGPU::V_ADD_U32_e64:
2606 HasClamp =
MI->getOperand(3).getImm();
2608 case AMDGPU::V_ADD_CO_U32_e32:
2609 VCCOp = &
MI->getOperand(3);
2611 case AMDGPU::V_ADD_CO_U32_e64:
2612 VCCOp = &
MI->getOperand(1);
2613 HasClamp =
MI->getOperand(4).getImm();
2618 bool DeadVCC = !VCCOp || VCCOp->
isDead();
2622 unsigned OtherOpIdx =
2623 FIOperandNum == Src0Idx ? FIOperandNum + 1 : Src0Idx;
2626 unsigned Src1Idx = Src0Idx + 1;
2627 Register MaterializedReg = FrameReg;
2630 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2634 if (OtherOp->
isImm()) {
2645 OtherOp->
setImm(TotalOffset);
2649 if (FrameReg && !ST.hasFlatScratchEnabled()) {
2657 ScavengedVGPR = RS->scavengeRegisterBackwards(
2658 AMDGPU::VGPR_32RegClass,
MI,
false, 0);
2664 .
addImm(ST.getWavefrontSizeLog2())
2666 MaterializedReg = ScavengedVGPR;
2669 if ((!OtherOp->
isImm() || OtherOp->
getImm() != 0) && MaterializedReg) {
2670 if (ST.hasFlatScratchEnabled() &&
2671 !
TII->isOperandLegal(*
MI, Src1Idx, OtherOp)) {
2678 if (!ScavengedVGPR) {
2679 ScavengedVGPR = RS->scavengeRegisterBackwards(
2680 AMDGPU::VGPR_32RegClass,
MI,
false,
2684 assert(ScavengedVGPR != DstReg);
2689 MaterializedReg = ScavengedVGPR;
2698 AddI32.
add(
MI->getOperand(1));
2703 if (
isVGPRClass(getPhysRegBaseClass(MaterializedReg))) {
2708 .addReg(MaterializedReg, MaterializedRegFlags);
2713 .addReg(MaterializedReg, MaterializedRegFlags)
2717 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
2718 MI->getOpcode() == AMDGPU::V_ADD_U32_e64)
2721 if (
MI->getOpcode() == AMDGPU::V_ADD_CO_U32_e32)
2722 AddI32.setOperandDead(3);
2724 MaterializedReg = DstReg;
2730 }
else if (
Offset != 0) {
2731 assert(!MaterializedReg);
2735 if (DeadVCC && !HasClamp) {
2740 if (OtherOp->
isReg() && OtherOp->
getReg() == DstReg) {
2742 MI->eraseFromParent();
2747 MI->setDesc(
TII->get(AMDGPU::V_MOV_B32_e32));
2748 MI->removeOperand(FIOperandNum);
2750 unsigned NumOps =
MI->getNumOperands();
2751 for (
unsigned I =
NumOps - 2;
I >= NumDefs + 1; --
I)
2752 MI->removeOperand(
I);
2755 MI->removeOperand(1);
2767 if (!
TII->isOperandLegal(*
MI, Src1Idx) &&
TII->commuteInstruction(*
MI)) {
2775 for (
unsigned SrcIdx : {FIOperandNum, OtherOpIdx}) {
2776 if (!
TII->isOperandLegal(*
MI, SrcIdx)) {
2780 if (!ScavengedVGPR) {
2781 ScavengedVGPR = RS->scavengeRegisterBackwards(
2782 AMDGPU::VGPR_32RegClass,
MI,
false,
2786 assert(ScavengedVGPR != DstReg);
2792 Src.ChangeToRegister(ScavengedVGPR,
false);
2793 Src.setIsKill(
true);
2799 if (FIOp->
isImm() && FIOp->
getImm() == 0 && DeadVCC && !HasClamp) {
2800 if (OtherOp->
isReg() && OtherOp->
getReg() != DstReg) {
2804 MI->eraseFromParent();
2809 case AMDGPU::S_ADD_I32:
2810 case AMDGPU::S_ADD_U32: {
2812 unsigned OtherOpIdx = FIOperandNum == 1 ? 2 : 1;
2819 Register MaterializedReg = FrameReg;
2822 bool DeadSCC =
MI->getOperand(3).isDead();
2831 if (FrameReg && !ST.hasFlatScratchEnabled()) {
2836 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2843 .
addImm(ST.getWavefrontSizeLog2())
2846 MaterializedReg = TmpReg;
2849 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2854 if (OtherOp.
isImm()) {
2858 if (MaterializedReg)
2862 }
else if (MaterializedReg) {
2866 if (!TmpReg && MaterializedReg == FrameReg) {
2867 TmpReg = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2881 MaterializedReg = DstReg;
2894 if (DeadSCC && OtherOp.
isImm() && OtherOp.
getImm() == 0) {
2896 MI->removeOperand(3);
2897 MI->removeOperand(OtherOpIdx);
2898 MI->setDesc(
TII->get(FIOp->
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2899 }
else if (DeadSCC && FIOp->
isImm() && FIOp->
getImm() == 0) {
2901 MI->removeOperand(3);
2902 MI->removeOperand(FIOperandNum);
2904 TII->get(OtherOp.
isReg() ? AMDGPU::COPY : AMDGPU::S_MOV_B32));
2915 int64_t
Offset = FrameInfo.getObjectOffset(Index);
2916 if (ST.hasFlatScratchEnabled()) {
2917 if (
TII->isFLATScratch(*
MI)) {
2919 (int16_t)FIOperandNum ==
2920 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::saddr));
2927 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset);
2938 unsigned Opc =
MI->getOpcode();
2942 }
else if (ST.hasFlatScratchSTMode()) {
2952 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::vdst_in);
2953 bool TiedVDst = VDstIn != -1 &&
MI->getOperand(VDstIn).isReg() &&
2954 MI->getOperand(VDstIn).isTied();
2956 MI->untieRegOperand(VDstIn);
2959 AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::saddr));
2963 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2965 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2966 assert(NewVDst != -1 && NewVDstIn != -1 &&
"Must be tied!");
2967 MI->tieOperands(NewVDst, NewVDstIn);
2969 MI->setDesc(
TII->get(NewOpc));
2977 if (
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp))
2984 bool UseSGPR =
TII->isOperandLegal(*
MI, FIOperandNum, FIOp);
2986 if (!
Offset && FrameReg && UseSGPR) {
2992 UseSGPR ? &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
2995 RS->scavengeRegisterBackwards(*RC,
MI,
false, 0, !UseSGPR);
2999 if ((!FrameReg || !
Offset) && TmpReg) {
3000 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
3003 MIB.addReg(FrameReg);
3010 bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
3011 !
MI->definesRegister(AMDGPU::SCC,
nullptr);
3015 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
3016 MI,
false, 0, !UseSGPR);
3018 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR)) {
3020 if (ST.hasFlatScratchSVSMode() && SVOpcode != -1) {
3021 Register TmpVGPR = RS->scavengeRegisterBackwards(
3022 AMDGPU::VGPR_32RegClass,
MI,
false, 0,
true);
3028 MIB.addReg(FrameReg);
3039 .
add(
MI->getOperand(0))
3042 .
add(*
TII->getNamedOperand(*
MI, AMDGPU::OpName::cpol));
3043 MI->eraseFromParent();
3057 assert(!(
Offset & 0x1) &&
"Flat scratch offset must be aligned!");
3077 if (TmpSReg == FrameReg) {
3080 !
MI->registerDefIsDead(AMDGPU::SCC,
nullptr)) {
3104 bool IsMUBUF =
TII->isMUBUF(*
MI);
3110 bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
3111 !
MI->definesRegister(AMDGPU::SCC,
nullptr);
3113 ? &AMDGPU::SReg_32RegClass
3114 : &AMDGPU::VGPR_32RegClass;
3115 bool IsCopy =
MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
3116 MI->getOpcode() == AMDGPU::V_MOV_B32_e64 ||
3117 MI->getOpcode() == AMDGPU::S_MOV_B32;
3119 IsCopy ?
MI->getOperand(0).getReg()
3120 : RS->scavengeRegisterBackwards(*RC,
MI,
false, 0);
3122 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3125 IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32 : AMDGPU::V_LSHRREV_B32_e64;
3127 if (IsSALU && LiveSCC) {
3128 TmpResultReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
3133 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
3136 Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
3138 Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
3139 if (IsSALU && !LiveSCC)
3140 Shift.getInstr()->getOperand(3).setIsDead();
3141 if (IsSALU && LiveSCC) {
3145 NewDest = ResultReg;
3147 NewDest = RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
3152 ResultReg = NewDest;
3157 if ((MIB =
TII->getAddNoCarry(*
MBB,
MI,
DL, ResultReg, *RS)) !=
3164 .
addImm(ST.getWavefrontSizeLog2())
3167 const bool IsVOP2 = MIB->
getOpcode() == AMDGPU::V_ADD_U32_e32;
3179 "Need to reuse carry out register");
3184 ConstOffsetReg = getSubReg(MIB.
getReg(1), AMDGPU::sub0);
3186 ConstOffsetReg = MIB.
getReg(1);
3197 if (!MIB || IsSALU) {
3204 Register TmpScaledReg = IsCopy && IsSALU
3206 : RS->scavengeRegisterBackwards(
3207 AMDGPU::SReg_32_XM0RegClass,
MI,
3209 Register ScaledReg = TmpScaledReg.
isValid() ? TmpScaledReg : FrameReg;
3215 .
addImm(ST.getWavefrontSizeLog2());
3220 TmpResultReg = RS->scavengeRegisterBackwards(
3221 AMDGPU::VGPR_32RegClass,
MI,
false, 0,
true);
3224 if ((
Add =
TII->getAddNoCarry(*
MBB,
MI,
DL, TmpResultReg, *RS))) {
3227 .
addImm(ST.getWavefrontSizeLog2())
3229 if (
Add->getOpcode() == AMDGPU::V_ADD_CO_U32_e64) {
3239 "offset is unsafe for v_mad_u32_u24");
3248 bool IsInlinableLiteral =
3250 if (!IsInlinableLiteral) {
3259 if (!IsInlinableLiteral) {
3265 Add.addImm(ST.getWavefrontSize()).addReg(FrameReg).addImm(0);
3268 .
addImm(ST.getWavefrontSizeLog2())
3274 NewDest = ResultReg;
3276 NewDest = RS->scavengeRegisterBackwards(
3277 AMDGPU::SReg_32_XM0RegClass, *
Add,
false, 0,
3284 ResultReg = NewDest;
3290 if (!TmpScaledReg.
isValid()) {
3296 .
addImm(ST.getWavefrontSizeLog2());
3303 MI->eraseFromParent();
3313 static_cast<int>(FIOperandNum) ==
3314 AMDGPU::getNamedOperandIdx(
MI->getOpcode(), AMDGPU::OpName::vaddr));
3316 auto &SOffset = *
TII->getNamedOperand(*
MI, AMDGPU::OpName::soffset);
3317 assert((SOffset.isImm() && SOffset.getImm() == 0));
3319 if (FrameReg != AMDGPU::NoRegister)
3320 SOffset.ChangeToRegister(FrameReg,
false);
3322 int64_t
Offset = FrameInfo.getObjectOffset(Index);
3324 TII->getNamedOperand(*
MI, AMDGPU::OpName::offset)->getImm();
3325 int64_t NewOffset = OldImm +
Offset;
3327 if (
TII->isLegalMUBUFImmOffset(NewOffset) &&
3329 MI->eraseFromParent();
3338 if (!
TII->isImmOperandLegal(*
MI, FIOperandNum, *FIOp)) {
3340 RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
MI,
false, 0);
3364 return &AMDGPU::VReg_64RegClass;
3366 return &AMDGPU::VReg_96RegClass;
3368 return &AMDGPU::VReg_128RegClass;
3370 return &AMDGPU::VReg_160RegClass;
3372 return &AMDGPU::VReg_192RegClass;
3374 return &AMDGPU::VReg_224RegClass;
3376 return &AMDGPU::VReg_256RegClass;
3378 return &AMDGPU::VReg_288RegClass;
3380 return &AMDGPU::VReg_320RegClass;
3382 return &AMDGPU::VReg_352RegClass;
3384 return &AMDGPU::VReg_384RegClass;
3386 return &AMDGPU::VReg_512RegClass;
3388 return &AMDGPU::VReg_1024RegClass;
3396 return &AMDGPU::VReg_64_Align2RegClass;
3398 return &AMDGPU::VReg_96_Align2RegClass;
3400 return &AMDGPU::VReg_128_Align2RegClass;
3402 return &AMDGPU::VReg_160_Align2RegClass;
3404 return &AMDGPU::VReg_192_Align2RegClass;
3406 return &AMDGPU::VReg_224_Align2RegClass;
3408 return &AMDGPU::VReg_256_Align2RegClass;
3410 return &AMDGPU::VReg_288_Align2RegClass;
3412 return &AMDGPU::VReg_320_Align2RegClass;
3414 return &AMDGPU::VReg_352_Align2RegClass;
3416 return &AMDGPU::VReg_384_Align2RegClass;
3418 return &AMDGPU::VReg_512_Align2RegClass;
3420 return &AMDGPU::VReg_1024_Align2RegClass;
3428 return &AMDGPU::VReg_1RegClass;
3430 return &AMDGPU::VGPR_16RegClass;
3432 return &AMDGPU::VGPR_32RegClass;
3440 return &AMDGPU::VGPR_32_Lo256RegClass;
3442 return &AMDGPU::VReg_64_Lo256_Align2RegClass;
3444 return &AMDGPU::VReg_96_Lo256_Align2RegClass;
3446 return &AMDGPU::VReg_128_Lo256_Align2RegClass;
3448 return &AMDGPU::VReg_160_Lo256_Align2RegClass;
3450 return &AMDGPU::VReg_192_Lo256_Align2RegClass;
3452 return &AMDGPU::VReg_224_Lo256_Align2RegClass;
3454 return &AMDGPU::VReg_256_Lo256_Align2RegClass;
3456 return &AMDGPU::VReg_288_Lo256_Align2RegClass;
3458 return &AMDGPU::VReg_320_Lo256_Align2RegClass;
3460 return &AMDGPU::VReg_352_Lo256_Align2RegClass;
3462 return &AMDGPU::VReg_384_Lo256_Align2RegClass;
3464 return &AMDGPU::VReg_512_Lo256_Align2RegClass;
3466 return &AMDGPU::VReg_1024_Lo256_Align2RegClass;
3474 return &AMDGPU::AReg_64RegClass;
3476 return &AMDGPU::AReg_96RegClass;
3478 return &AMDGPU::AReg_128RegClass;
3480 return &AMDGPU::AReg_160RegClass;
3482 return &AMDGPU::AReg_192RegClass;
3484 return &AMDGPU::AReg_224RegClass;
3486 return &AMDGPU::AReg_256RegClass;
3488 return &AMDGPU::AReg_288RegClass;
3490 return &AMDGPU::AReg_320RegClass;
3492 return &AMDGPU::AReg_352RegClass;
3494 return &AMDGPU::AReg_384RegClass;
3496 return &AMDGPU::AReg_512RegClass;
3498 return &AMDGPU::AReg_1024RegClass;
3506 return &AMDGPU::AReg_64_Align2RegClass;
3508 return &AMDGPU::AReg_96_Align2RegClass;
3510 return &AMDGPU::AReg_128_Align2RegClass;
3512 return &AMDGPU::AReg_160_Align2RegClass;
3514 return &AMDGPU::AReg_192_Align2RegClass;
3516 return &AMDGPU::AReg_224_Align2RegClass;
3518 return &AMDGPU::AReg_256_Align2RegClass;
3520 return &AMDGPU::AReg_288_Align2RegClass;
3522 return &AMDGPU::AReg_320_Align2RegClass;
3524 return &AMDGPU::AReg_352_Align2RegClass;
3526 return &AMDGPU::AReg_384_Align2RegClass;
3528 return &AMDGPU::AReg_512_Align2RegClass;
3530 return &AMDGPU::AReg_1024_Align2RegClass;
3538 return &AMDGPU::AGPR_LO16RegClass;
3540 return &AMDGPU::AGPR_32RegClass;
3548 return &AMDGPU::AV_64RegClass;
3550 return &AMDGPU::AV_96RegClass;
3552 return &AMDGPU::AV_128RegClass;
3554 return &AMDGPU::AV_160RegClass;
3556 return &AMDGPU::AV_192RegClass;
3558 return &AMDGPU::AV_224RegClass;
3560 return &AMDGPU::AV_256RegClass;
3562 return &AMDGPU::AV_288RegClass;
3564 return &AMDGPU::AV_320RegClass;
3566 return &AMDGPU::AV_352RegClass;
3568 return &AMDGPU::AV_384RegClass;
3570 return &AMDGPU::AV_512RegClass;
3572 return &AMDGPU::AV_1024RegClass;
3580 return &AMDGPU::AV_64_Align2RegClass;
3582 return &AMDGPU::AV_96_Align2RegClass;
3584 return &AMDGPU::AV_128_Align2RegClass;
3586 return &AMDGPU::AV_160_Align2RegClass;
3588 return &AMDGPU::AV_192_Align2RegClass;
3590 return &AMDGPU::AV_224_Align2RegClass;
3592 return &AMDGPU::AV_256_Align2RegClass;
3594 return &AMDGPU::AV_288_Align2RegClass;
3596 return &AMDGPU::AV_320_Align2RegClass;
3598 return &AMDGPU::AV_352_Align2RegClass;
3600 return &AMDGPU::AV_384_Align2RegClass;
3602 return &AMDGPU::AV_512_Align2RegClass;
3604 return &AMDGPU::AV_1024_Align2RegClass;
3612 return &AMDGPU::AV_32RegClass;
3613 return ST.needsAlignedVGPRs()
3632 return &AMDGPU::SReg_32RegClass;
3634 return &AMDGPU::SReg_64RegClass;
3636 return &AMDGPU::SGPR_96RegClass;
3638 return &AMDGPU::SGPR_128RegClass;
3640 return &AMDGPU::SGPR_160RegClass;
3642 return &AMDGPU::SGPR_192RegClass;
3644 return &AMDGPU::SGPR_224RegClass;
3646 return &AMDGPU::SGPR_256RegClass;
3648 return &AMDGPU::SGPR_288RegClass;
3650 return &AMDGPU::SGPR_320RegClass;
3652 return &AMDGPU::SGPR_352RegClass;
3654 return &AMDGPU::SGPR_384RegClass;
3656 return &AMDGPU::SGPR_512RegClass;
3658 return &AMDGPU::SGPR_1024RegClass;
3666 if (Reg.isVirtual())
3669 RC = getPhysRegBaseClass(Reg);
3675 unsigned Size = getRegSizeInBits(*SRC);
3677 switch (SRC->
getID()) {
3680 case AMDGPU::VS_32_Lo256RegClassID:
3681 case AMDGPU::VS_64_Lo256RegClassID:
3687 assert(VRC &&
"Invalid register class size");
3693 unsigned Size = getRegSizeInBits(*SRC);
3695 assert(ARC &&
"Invalid register class size");
3701 unsigned Size = getRegSizeInBits(*SRC);
3703 assert(ARC &&
"Invalid register class size");
3709 unsigned Size = getRegSizeInBits(*VRC);
3711 return &AMDGPU::SGPR_32RegClass;
3713 assert(SRC &&
"Invalid register class size");
3720 unsigned SubIdx)
const {
3723 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
3724 return MatchRC && MatchRC->
hasSubClassEq(SuperRC) ? MatchRC :
nullptr;
3730 return !ST.hasMFMAInlineLiteralBug();
3749 if (ReserveHighestRegister) {
3772 unsigned EltSize)
const {
3774 assert(RegBitWidth >= 32 && RegBitWidth <= 1024 && EltSize >= 2);
3776 const unsigned RegHalves = RegBitWidth / 16;
3777 const unsigned EltHalves = EltSize / 2;
3778 assert(RegSplitParts.size() + 1 >= EltHalves);
3780 const std::vector<int16_t> &Parts = RegSplitParts[EltHalves - 1];
3781 const unsigned NumParts = RegHalves / EltHalves;
3783 return ArrayRef(Parts.data(), NumParts);
3789 return Reg.isVirtual() ? MRI.
getRegClass(Reg) : getPhysRegBaseClass(Reg);
3796 return getSubRegisterClass(SrcRC, MO.
getSubReg());
3816 unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;
3817 switch (RC->
getID()) {
3819 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3820 case AMDGPU::VGPR_32RegClassID:
3825 ST.getMaxNumVGPRs(MF));
3826 case AMDGPU::SGPR_32RegClassID:
3827 case AMDGPU::SGPR_LO16RegClassID:
3828 return std::min(ST.getMaxNumSGPRs(MinOcc,
true), ST.getMaxNumSGPRs(MF));
3833 unsigned Idx)
const {
3834 switch (
static_cast<AMDGPU::RegisterPressureSets
>(Idx)) {
3835 case AMDGPU::RegisterPressureSets::VGPR_32:
3836 case AMDGPU::RegisterPressureSets::AGPR_32:
3839 case AMDGPU::RegisterPressureSets::SReg_32:
3848 static const int Empty[] = { -1 };
3850 if (RegPressureIgnoredUnits[
static_cast<unsigned>(RegUnit)])
3853 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3868 switch (Hint.first) {
3875 getMatchingSuperReg(Paired, AMDGPU::lo16, &AMDGPU::VGPR_32RegClass);
3876 }
else if (VRM && VRM->
hasPhys(Paired)) {
3877 PairedPhys = getMatchingSuperReg(VRM->
getPhys(Paired), AMDGPU::lo16,
3878 &AMDGPU::VGPR_32RegClass);
3893 PairedPhys =
TRI->getSubReg(Paired, AMDGPU::lo16);
3894 }
else if (VRM && VRM->
hasPhys(Paired)) {
3895 PairedPhys =
TRI->getSubReg(VRM->
getPhys(Paired), AMDGPU::lo16);
3910 if (AMDGPU::VGPR_16RegClass.
contains(PhysReg) &&
3925 return AMDGPU::SGPR30_SGPR31;
3931 switch (RB.
getID()) {
3932 case AMDGPU::VGPRRegBankID:
3934 std::max(ST.useRealTrue16Insts() ? 16u : 32u,
Size));
3935 case AMDGPU::VCCRegBankID:
3938 case AMDGPU::SGPRRegBankID:
3940 case AMDGPU::AGPRRegBankID:
3955 return getAllocatableClass(RC);
3961 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3965 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3970 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3971 : &AMDGPU::VReg_64RegClass;
3983 if (Reg.isVirtual()) {
3987 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
3992 if ((S.LaneMask & SubLanes) == SubLanes) {
3993 V = S.getVNInfoAt(UseIdx);
4005 for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
4020 if (!Def || !MDT.dominates(Def, &
Use))
4023 assert(Def->modifiesRegister(Reg,
this));
4029 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
4032 AMDGPU::SReg_32RegClass,
4033 AMDGPU::AGPR_32RegClass } ) {
4034 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
4037 if (
MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
4038 &AMDGPU::VGPR_32RegClass)) {
4042 return AMDGPU::NoRegister;
4046 if (!ST.needsAlignedVGPRs())
4057 assert(&RC != &AMDGPU::VS_64RegClass);
4064 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
4069 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
4074 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
4079 unsigned SubReg)
const {
4082 return std::min(128u, getSubRegIdxSize(SubReg));
4086 return std::min(32u, getSubRegIdxSize(SubReg));
4095 bool IncludeCalls)
const {
4096 unsigned NumArchVGPRs = ST.getAddressableNumArchVGPRs();
4098 (RC.
getID() == AMDGPU::VGPR_32RegClassID)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first DebugLoc that has line number information, given a range of instructions.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
SI Pre allocate WWM Registers
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static unsigned getNumSubRegsForSpillOp(const MachineInstr &MI, const SIInstrInfo *TII)
static void emitUnsupportedError(const Function &Fn, const MachineInstr &MI, const Twine &ErrMsg)
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static bool isFIPlusImmOrVGPR(const SIRegisterInfo &TRI, const MachineInstr &MI)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const char * getRegisterName(MCRegister Reg)
bool isBottomOfStack() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
bool test(unsigned Idx) const
bool empty() const
empty - Tests whether there are no bits in this bitvector.
Diagnostic information for unsupported feature in backend.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LiveInterval - This class represents the liveness of a register, or stack slot.
bool hasSubRanges() const
Returns true if subregister liveness information is available.
iterator_range< subrange_iterator > subranges()
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
MachineDominatorTree & getDomTree()
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveInterval & getInterval(Register Reg)
LiveRange & getRegUnit(MCRegUnit Unit)
Return the live range for register unit Unit.
This class represents the liveness of a register, stack slot, etc.
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
A set of register units used to track register liveness.
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Generic base class for all target subtargets.
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
LLVM_ABI void setIsRenamable(bool Val=true)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
LLVM_ABI void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
std::pair< unsigned, Register > getRegAllocationHint(Register VReg) const
getRegAllocationHint - Return the register allocation hint for the specified virtual register.
const TargetRegisterInfo * getTargetRegisterInfo() const
LLVM_ABI LaneBitmask getMaxLaneMaskForVReg(Register Reg) const
Returns a mask covering all bits that can appear in lane masks of subregisters of the virtual registe...
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isValid() const
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static bool isFLATScratch(const MachineInstr &MI)
static bool isMUBUF(const MachineInstr &MI)
static bool isVOP3(const MCInstrDesc &Desc)
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getLongBranchReservedReg() const
unsigned getDynamicVGPRBlockSize() const
Register getStackPtrOffsetReg() const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
Register getSGPRForEXECCopy() const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Register getVGPRForAGPRCopy() const
Register getFrameOffsetReg() const
BitVector getNonWWMRegMask() const
bool checkFlag(Register Reg, uint8_t Flag) const
void addToSpilledVGPRs(unsigned num)
const ReservedRegSet & getWWMReservedRegs() const
void addToSpilledSGPRs(unsigned num)
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
unsigned getHWRegIndex(MCRegister Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getPointerRegClass(unsigned Kind=0) const override
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override
LLVM_READONLY const TargetRegisterClass * getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
const TargetRegisterClass * getEquivalentAVClass(const TargetRegisterClass *SRC) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
SmallVector< StringLiteral > getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getDefaultVectorSuperClassForBitWidth(unsigned BitWidth) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
void addImplicitUsesForBlockCSRLoad(MachineInstrBuilder &MIB, Register BlockReg) const
unsigned getNumUsedPhysRegs(const MachineRegisterInfo &MRI, const TargetRegisterClass &RC, bool IncludeCalls=true) const
const uint32_t * getAllAGPRRegMask() const
const int * getRegUnitPressureSets(MCRegUnit RegUnit) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
SlotIndex - An opaque wrapper around machine indexes.
bool isValid() const
Returns true if this is a valid index.
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
const uint8_t TSFlags
Configurable target specific flags.
ArrayRef< MCPhysReg > getRegisters() const
unsigned getID() const
Return the register class ID number.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
virtual bool getRegAllocationHints(Register VirtReg, ArrayRef< MCPhysReg > Order, SmallVectorImpl< MCPhysReg > &Hints, const MachineFunction &MF, const VirtRegMap *VRM=nullptr, const LiveRegMatrix *Matrix=nullptr) const
Get a list of 'hint' registers that the register allocator should try first when allocating a physica...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
A Use represents the edge between a Value definition and its users.
VNInfo - Value Number Information.
MCRegister getPhys(Register virtReg) const
returns the physical register mapped to the specified virtual register
bool hasPhys(Register virtReg) const
returns true if the specified virtual register is mapped to a physical register
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
@ OPERAND_REG_INLINE_AC_FIRST
@ OPERAND_REG_INLINE_AC_LAST
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY int32_t getFlatScratchInstSVfromSVS(uint32_t Opcode)
LLVM_READONLY int32_t getFlatScratchInstSVfromSS(uint32_t Opcode)
LLVM_READONLY int32_t getFlatScratchInstSTfromSS(uint32_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
RegState
Flags to represent properties of register accesses.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ Define
Register definition.
@ Renamable
Register that may be renamed.
constexpr RegState getKillRegState(bool B)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
auto reverse(ContainerTy &&C)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
constexpr RegState getDefRegState(bool B)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
constexpr bool hasRegState(RegState Value, RegState Test)
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
constexpr unsigned BitWidth
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
static const MachineMemOperand::Flags MOThreadPrivate
Mark the MMO of accesses to memory locations that are never written to by other threads.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
This struct is a compact representation of a valid (non-zero power of two) alignment.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
The llvm::once_flag structure.