29#include "llvm/IR/IntrinsicsAMDGPU.h"
32#define DEBUG_TYPE "amdgpu-isel"
37#define GET_GLOBALISEL_IMPL
38#define AMDGPUSubtarget GCNSubtarget
39#include "AMDGPUGenGlobalISel.inc"
40#undef GET_GLOBALISEL_IMPL
46 : TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI), TM(TM),
49#include
"AMDGPUGenGlobalISel.inc"
52#include
"AMDGPUGenGlobalISel.inc"
64 MRI = &
MF.getRegInfo();
72 return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
73 ? Def->getOperand(1).getReg()
80 F,
"intrinsic not supported on subtarget",
I.getDebugLoc(),
DS_Error));
89 auto &RegClassOrBank = MRI.getRegClassOrRegBank(
Reg);
90 const TargetRegisterClass *RC =
93 const LLT Ty = MRI.getType(
Reg);
97 return MRI.getVRegDef(
Reg)->getOpcode() != AMDGPU::G_TRUNC &&
102 return RB->
getID() == AMDGPU::VCCRegBankID;
105bool AMDGPUInstructionSelector::constrainCopyLikeIntrin(
MachineInstr &
MI,
106 unsigned NewOpc)
const {
107 MI.setDesc(TII.get(NewOpc));
111 MachineOperand &Dst =
MI.getOperand(0);
112 MachineOperand &Src =
MI.getOperand(1);
118 const TargetRegisterClass *DstRC
119 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
120 const TargetRegisterClass *SrcRC
121 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
122 if (!DstRC || DstRC != SrcRC)
125 if (!RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI) ||
126 !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
128 const MCInstrDesc &MCID =
MI.getDesc();
130 MI.getOperand(0).setIsEarlyClobber(
true);
135bool AMDGPUInstructionSelector::selectCOPY(
MachineInstr &
I)
const {
138 I.setDesc(TII.get(TargetOpcode::COPY));
140 const MachineOperand &Src =
I.getOperand(1);
141 MachineOperand &Dst =
I.getOperand(0);
145 if (isVCC(DstReg, *MRI)) {
146 if (SrcReg == AMDGPU::SCC) {
147 const TargetRegisterClass *RC
148 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
151 return RBI.constrainGenericRegister(DstReg, *RC, *MRI);
154 if (!isVCC(SrcReg, *MRI)) {
156 if (!RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI))
159 const TargetRegisterClass *SrcRC
160 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
162 std::optional<ValueAndVReg> ConstVal =
166 STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
168 .
addImm(ConstVal->Value.getBoolValue() ? -1 : 0);
170 Register MaskedReg = MRI->createVirtualRegister(SrcRC);
177 assert(Subtarget->useRealTrue16Insts());
178 const int64_t NoMods = 0;
179 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_AND_B16_t16_e64), MaskedReg)
185 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U16_t16_e64), DstReg)
192 bool IsSGPR = TRI.isSGPRClass(SrcRC);
193 unsigned AndOpc = IsSGPR ? AMDGPU::S_AND_B32 : AMDGPU::V_AND_B32_e32;
200 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CMP_NE_U32_e64), DstReg)
206 if (!MRI->getRegClassOrNull(SrcReg))
207 MRI->setRegClass(SrcReg, SrcRC);
212 const TargetRegisterClass *RC =
213 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
214 if (RC && !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
220 for (
const MachineOperand &MO :
I.operands()) {
221 if (MO.getReg().isPhysical())
224 const TargetRegisterClass *RC =
225 TRI.getConstrainedRegClassForOperand(MO, *MRI);
228 RBI.constrainGenericRegister(MO.getReg(), *RC, *MRI);
233bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(
MachineInstr &
I)
const {
236 Register VCCReg =
I.getOperand(1).getReg();
240 if (STI.hasScalarCompareEq64()) {
242 STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32;
245 Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
246 Cmp =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_OR_B64), DeadDst)
253 Register DstReg =
I.getOperand(0).getReg();
257 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
260bool AMDGPUInstructionSelector::selectCOPY_VCC_SCC(
MachineInstr &
I)
const {
264 Register DstReg =
I.getOperand(0).getReg();
265 Register SrcReg =
I.getOperand(1).getReg();
266 std::optional<ValueAndVReg> Arg =
270 const int64_t
Value = Arg->Value.getZExtValue();
272 unsigned Opcode = STI.isWave64() ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
279 return RBI.constrainGenericRegister(DstReg, *TRI.getBoolRC(), *MRI);
285 unsigned SelectOpcode =
286 STI.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
296bool AMDGPUInstructionSelector::selectReadAnyLane(
MachineInstr &
I)
const {
297 Register DstReg =
I.getOperand(0).getReg();
298 Register SrcReg =
I.getOperand(1).getReg();
303 auto RFL =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), DstReg)
311bool AMDGPUInstructionSelector::selectPHI(
MachineInstr &
I)
const {
312 const Register DefReg =
I.getOperand(0).getReg();
313 const LLT DefTy = MRI->getType(DefReg);
325 MRI->getRegClassOrRegBank(DefReg);
327 const TargetRegisterClass *DefRC =
336 DefRC = TRI.getRegClassForTypeOnBank(DefTy, RB);
345 for (
unsigned i = 1; i !=
I.getNumOperands(); i += 2) {
346 const Register SrcReg =
I.getOperand(i).getReg();
348 const RegisterBank *RB = MRI->getRegBankOrNull(SrcReg);
350 const LLT SrcTy = MRI->getType(SrcReg);
351 const TargetRegisterClass *SrcRC =
352 TRI.getRegClassForTypeOnBank(SrcTy, *RB);
353 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
358 I.setDesc(TII.get(TargetOpcode::PHI));
359 return RBI.constrainGenericRegister(DefReg, *DefRC, *MRI);
365 unsigned SubIdx)
const {
369 Register DstReg = MRI->createVirtualRegister(&SubRC);
372 unsigned ComposedSubIdx = TRI.composeSubRegIndices(MO.
getSubReg(), SubIdx);
374 BuildMI(*BB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::COPY), DstReg)
400 return Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
402 return Is64 ? AMDGPU::S_OR_B64 : AMDGPU::S_OR_B32;
404 return Is64 ? AMDGPU::S_XOR_B64 : AMDGPU::S_XOR_B32;
410bool AMDGPUInstructionSelector::selectG_AND_OR_XOR(
MachineInstr &
I)
const {
411 Register DstReg =
I.getOperand(0).getReg();
412 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
414 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
415 if (DstRB->
getID() != AMDGPU::SGPRRegBankID &&
416 DstRB->
getID() != AMDGPU::VCCRegBankID)
419 bool Is64 =
Size > 32 || (DstRB->
getID() == AMDGPU::VCCRegBankID &&
432bool AMDGPUInstructionSelector::selectG_ADD_SUB(
MachineInstr &
I)
const {
435 Register DstReg =
I.getOperand(0).getReg();
437 LLT Ty = MRI->getType(DstReg);
442 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
443 const bool IsSALU = DstRB->
getID() == AMDGPU::SGPRRegBankID;
444 const bool Sub =
I.getOpcode() == TargetOpcode::G_SUB;
448 const unsigned Opc =
Sub ? AMDGPU::S_SUB_U32 : AMDGPU::S_ADD_U32;
451 .
add(
I.getOperand(1))
452 .
add(
I.getOperand(2))
459 if (STI.hasAddNoCarryInsts()) {
460 const unsigned Opc =
Sub ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_ADD_U32_e64;
461 I.setDesc(TII.get(
Opc));
468 const unsigned Opc =
Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
470 Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
474 .
add(
I.getOperand(1))
475 .
add(
I.getOperand(2))
482 assert(!
Sub &&
"illegal sub should not reach here");
484 const TargetRegisterClass &RC
485 = IsSALU ? AMDGPU::SReg_64_XEXECRegClass : AMDGPU::VReg_64RegClass;
486 const TargetRegisterClass &HalfRC
487 = IsSALU ? AMDGPU::SReg_32RegClass : AMDGPU::VGPR_32RegClass;
489 MachineOperand Lo1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub0));
490 MachineOperand Lo2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub0));
491 MachineOperand Hi1(getSubOperand64(
I.getOperand(1), HalfRC, AMDGPU::sub1));
492 MachineOperand Hi2(getSubOperand64(
I.getOperand(2), HalfRC, AMDGPU::sub1));
494 Register DstLo = MRI->createVirtualRegister(&HalfRC);
495 Register DstHi = MRI->createVirtualRegister(&HalfRC);
498 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_U32), DstLo)
501 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADDC_U32), DstHi)
506 const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
507 Register CarryReg = MRI->createVirtualRegister(CarryRC);
508 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
513 MachineInstr *Addc =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_ADDC_U32_e64), DstHi)
523 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
530 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
537bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
542 Register Dst0Reg =
I.getOperand(0).getReg();
543 Register Dst1Reg =
I.getOperand(1).getReg();
544 const bool IsAdd =
I.getOpcode() == AMDGPU::G_UADDO ||
545 I.getOpcode() == AMDGPU::G_UADDE;
546 const bool HasCarryIn =
I.getOpcode() == AMDGPU::G_UADDE ||
547 I.getOpcode() == AMDGPU::G_USUBE;
549 if (isVCC(Dst1Reg, *MRI)) {
550 unsigned NoCarryOpc =
551 IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
552 unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
553 I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
560 Register Src0Reg =
I.getOperand(2).getReg();
561 Register Src1Reg =
I.getOperand(3).getReg();
564 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
565 .
addReg(
I.getOperand(4).getReg());
568 unsigned NoCarryOpc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
569 unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
571 auto CarryInst =
BuildMI(*BB, &
I,
DL, TII.get(HasCarryIn ? CarryOpc : NoCarryOpc), Dst0Reg)
572 .
add(
I.getOperand(2))
573 .
add(
I.getOperand(3));
575 if (MRI->use_nodbg_empty(Dst1Reg)) {
576 CarryInst.setOperandDead(3);
578 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), Dst1Reg)
580 if (!MRI->getRegClassOrNull(Dst1Reg))
581 MRI->setRegClass(Dst1Reg, &AMDGPU::SReg_32RegClass);
584 if (!RBI.constrainGenericRegister(Dst0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
585 !RBI.constrainGenericRegister(Src0Reg, AMDGPU::SReg_32RegClass, *MRI) ||
586 !RBI.constrainGenericRegister(Src1Reg, AMDGPU::SReg_32RegClass, *MRI))
590 !RBI.constrainGenericRegister(
I.getOperand(4).getReg(),
591 AMDGPU::SReg_32RegClass, *MRI))
598bool AMDGPUInstructionSelector::selectG_AMDGPU_MAD_64_32(
602 const bool IsUnsigned =
I.getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32;
603 bool UseNoCarry = Subtarget->hasMadNC64_32Insts() &&
604 MRI->use_nodbg_empty(
I.getOperand(1).getReg());
607 if (Subtarget->hasMADIntraFwdBug())
608 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_gfx11_e64
609 : AMDGPU::V_MAD_I64_I32_gfx11_e64;
611 Opc = IsUnsigned ? AMDGPU::V_MAD_NC_U64_U32_e64
612 : AMDGPU::V_MAD_NC_I64_I32_e64;
614 Opc = IsUnsigned ? AMDGPU::V_MAD_U64_U32_e64 : AMDGPU::V_MAD_I64_I32_e64;
619 I.setDesc(TII.get(
Opc));
621 I.addImplicitDefUseOperands(*
MF);
622 I.getOperand(0).setIsEarlyClobber(
true);
628bool AMDGPUInstructionSelector::selectG_EXTRACT(
MachineInstr &
I)
const {
630 Register DstReg =
I.getOperand(0).getReg();
631 Register SrcReg =
I.getOperand(1).getReg();
632 LLT DstTy = MRI->getType(DstReg);
633 LLT SrcTy = MRI->getType(SrcReg);
638 unsigned Offset =
I.getOperand(2).getImm();
639 if (
Offset % 32 != 0 || DstSize > 128)
647 const TargetRegisterClass *DstRC =
648 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
649 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
652 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
653 const TargetRegisterClass *SrcRC =
654 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
659 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubReg);
664 *SrcRC,
I.getOperand(1));
666 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::COPY), DstReg)
667 .
addReg(SrcReg, {}, SubReg);
673bool AMDGPUInstructionSelector::selectS16MergeToS32(
MachineInstr &
MI)
const {
678 LLT Src0Ty = MRI->getType(Src0);
679 LLT Src1Ty = MRI->getType(Src1);
681 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
682 const RegisterBank *Src0Bank = RBI.getRegBank(Src0, *MRI, TRI);
683 const RegisterBank *Src1Bank = RBI.getRegBank(Src1, *MRI, TRI);
684 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
690 MachineBasicBlock *BB =
MI.getParent();
695 if (Src0Bank->
getID() == AMDGPU::VGPRRegBankID &&
696 Src1Bank->
getID() == AMDGPU::VGPRRegBankID &&
698 BuildMI(*BB,
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), Dst)
704 if (!RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI))
707 MI.eraseFromParent();
712 Register TmpReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
713 auto MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_AND_B32_e32), TmpReg)
718 MIB =
BuildMI(*BB,
MI,
DL, TII.get(AMDGPU::V_LSHL_OR_B32_e64), Dst)
724 MI.eraseFromParent();
747 unsigned Opc = AMDGPU::S_PACK_LL_B32_B16;
748 if (Shift0 && Shift1) {
749 Opc = AMDGPU::S_PACK_HH_B32_B16;
750 MI.getOperand(1).setReg(ShiftSrc0);
751 MI.getOperand(2).setReg(ShiftSrc1);
753 Opc = AMDGPU::S_PACK_LH_B32_B16;
754 MI.getOperand(2).setReg(ShiftSrc1);
758 if (ConstSrc1 && ConstSrc1->Value == 0) {
760 auto MIB =
BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst)
765 MI.eraseFromParent();
769 if (STI.hasSPackHL()) {
770 Opc = AMDGPU::S_PACK_HL_B32_B16;
771 MI.getOperand(1).setReg(ShiftSrc0);
775 MI.setDesc(TII.get(
Opc));
780bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(
MachineInstr &
MI)
const {
781 MachineBasicBlock *BB =
MI.getParent();
783 LLT DstTy = MRI->getType(DstReg);
784 LLT SrcTy = MRI->getType(
MI.getOperand(1).getReg());
790 MI.getNumOperands() == 3) {
791 return selectS16MergeToS32(
MI);
797 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
799 const TargetRegisterClass *DstRC =
800 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
804 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(DstRC, SrcSize / 8);
805 MachineInstrBuilder MIB =
806 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::REG_SEQUENCE), DstReg);
807 for (
int I = 0,
E =
MI.getNumOperands() - 1;
I !=
E; ++
I) {
808 MachineOperand &Src =
MI.getOperand(
I + 1);
812 const TargetRegisterClass *SrcRC
813 = TRI.getConstrainedRegClassForOperand(Src, *MRI);
814 if (SrcRC && !RBI.constrainGenericRegister(Src.getReg(), *SrcRC, *MRI))
818 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
821 MI.eraseFromParent();
825bool AMDGPUInstructionSelector::selectG_UNMERGE_VALUES(
MachineInstr &
MI)
const {
826 MachineBasicBlock *BB =
MI.getParent();
827 const int NumDst =
MI.getNumOperands() - 1;
829 MachineOperand &Src =
MI.getOperand(NumDst);
833 LLT DstTy = MRI->getType(DstReg0);
834 LLT SrcTy = MRI->getType(SrcReg);
839 const RegisterBank *SrcBank = RBI.getRegBank(SrcReg, *MRI, TRI);
841 const TargetRegisterClass *SrcRC =
842 TRI.getRegClassForSizeOnBank(SrcSize, *SrcBank);
843 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
849 ArrayRef<int16_t> SubRegs = TRI.getRegSplitParts(SrcRC, DstSize / 8);
850 for (
int I = 0,
E = NumDst;
I !=
E; ++
I) {
851 MachineOperand &Dst =
MI.getOperand(
I);
853 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID &&
854 SubRegs[
I] == AMDGPU::hi16) {
855 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_LSHR_B32), Dst.getReg())
859 BuildMI(*BB, &
MI,
DL, TII.get(TargetOpcode::COPY), Dst.getReg())
860 .
addReg(SrcReg, {}, SubRegs[
I]);
864 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubRegs[
I]);
865 if (!SrcRC || !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI))
868 const TargetRegisterClass *DstRC =
869 TRI.getConstrainedRegClassForOperand(Dst, *MRI);
870 if (DstRC && !RBI.constrainGenericRegister(Dst.getReg(), *DstRC, *MRI))
874 MI.eraseFromParent();
878bool AMDGPUInstructionSelector::selectG_BUILD_VECTOR(
MachineInstr &
MI)
const {
879 assert(
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC ||
880 MI.getOpcode() == AMDGPU::G_BUILD_VECTOR);
884 LLT SrcTy = MRI->getType(Src0);
888 if (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR && SrcSize >= 32) {
889 return selectG_MERGE_VALUES(
MI);
896 (
MI.getOpcode() == AMDGPU::G_BUILD_VECTOR_TRUNC &&
900 const RegisterBank *DstBank = RBI.getRegBank(Dst, *MRI, TRI);
901 if (DstBank->
getID() == AMDGPU::AGPRRegBankID)
904 assert(DstBank->
getID() == AMDGPU::SGPRRegBankID ||
905 DstBank->
getID() == AMDGPU::VGPRRegBankID);
906 const bool IsVector = DstBank->
getID() == AMDGPU::VGPRRegBankID;
909 MachineBasicBlock *BB =
MI.getParent();
919 const int64_t K0 = ConstSrc0->Value.getSExtValue();
920 const int64_t K1 = ConstSrc1->Value.getSExtValue();
921 uint32_t Lo16 =
static_cast<uint32_t
>(K0) & 0xffff;
922 uint32_t Hi16 =
static_cast<uint32_t
>(K1) & 0xffff;
923 uint32_t
Imm = Lo16 | (Hi16 << 16);
928 MI.eraseFromParent();
929 return RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI);
934 MI.eraseFromParent();
935 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
946 if (Src1Def->
getOpcode() == AMDGPU::G_IMPLICIT_DEF) {
947 MI.setDesc(TII.get(AMDGPU::COPY));
950 IsVector ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
951 return RBI.constrainGenericRegister(Dst, RC, *MRI) &&
952 RBI.constrainGenericRegister(Src0, RC, *MRI);
955 return selectS16MergeToS32(
MI);
958bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(
MachineInstr &
I)
const {
959 const MachineOperand &MO =
I.getOperand(0);
963 const TargetRegisterClass *RC = TRI.getConstrainedRegClassForOperand(MO, *MRI);
964 if ((!RC && !MRI->getRegBankOrNull(MO.
getReg())) ||
965 (RC && RBI.constrainGenericRegister(MO.
getReg(), *RC, *MRI))) {
966 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
973bool AMDGPUInstructionSelector::selectG_INSERT(
MachineInstr &
I)
const {
976 Register DstReg =
I.getOperand(0).getReg();
977 Register Src0Reg =
I.getOperand(1).getReg();
978 Register Src1Reg =
I.getOperand(2).getReg();
979 LLT Src1Ty = MRI->getType(Src1Reg);
981 unsigned DstSize = MRI->getType(DstReg).getSizeInBits();
984 int64_t
Offset =
I.getOperand(3).getImm();
987 if (
Offset % 32 != 0 || InsSize % 32 != 0)
994 unsigned SubReg = TRI.getSubRegFromChannel(
Offset / 32, InsSize / 32);
995 if (SubReg == AMDGPU::NoSubRegister)
998 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
999 const TargetRegisterClass *DstRC =
1000 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
1004 const RegisterBank *Src0Bank = RBI.getRegBank(Src0Reg, *MRI, TRI);
1005 const RegisterBank *Src1Bank = RBI.getRegBank(Src1Reg, *MRI, TRI);
1006 const TargetRegisterClass *Src0RC =
1007 TRI.getRegClassForSizeOnBank(DstSize, *Src0Bank);
1008 const TargetRegisterClass *Src1RC =
1009 TRI.getRegClassForSizeOnBank(InsSize, *Src1Bank);
1013 Src0RC = TRI.getSubClassWithSubReg(Src0RC, SubReg);
1014 if (!Src0RC || !Src1RC)
1017 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
1018 !RBI.constrainGenericRegister(Src0Reg, *Src0RC, *MRI) ||
1019 !RBI.constrainGenericRegister(Src1Reg, *Src1RC, *MRI))
1023 BuildMI(*BB, &
I,
DL, TII.get(TargetOpcode::INSERT_SUBREG), DstReg)
1028 I.eraseFromParent();
1032bool AMDGPUInstructionSelector::selectG_SBFX_UBFX(
MachineInstr &
MI)
const {
1035 Register OffsetReg =
MI.getOperand(2).getReg();
1036 Register WidthReg =
MI.getOperand(3).getReg();
1038 assert(RBI.getRegBank(DstReg, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID &&
1039 "scalar BFX instructions are expanded in regbankselect");
1040 assert(MRI->getType(
MI.getOperand(0).getReg()).getSizeInBits() == 32 &&
1041 "64-bit vector BFX instructions are expanded in regbankselect");
1044 MachineBasicBlock *
MBB =
MI.getParent();
1046 bool IsSigned =
MI.getOpcode() == TargetOpcode::G_SBFX;
1047 unsigned Opc = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
1052 MI.eraseFromParent();
1057bool AMDGPUInstructionSelector::selectInterpP1F16(
MachineInstr &
MI)
const {
1058 if (STI.getLDSBankCount() != 16)
1064 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI) ||
1065 !RBI.constrainGenericRegister(Dst, AMDGPU::VGPR_32RegClass, *MRI) ||
1066 !RBI.constrainGenericRegister(Src0, AMDGPU::VGPR_32RegClass, *MRI))
1076 Register InterpMov = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
1078 MachineBasicBlock *
MBB =
MI.getParent();
1082 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_INTERP_MOV_F32), InterpMov)
1085 .
addImm(
MI.getOperand(3).getImm());
1098 MI.eraseFromParent();
1107bool AMDGPUInstructionSelector::selectWritelane(
MachineInstr &
MI)
const {
1109 if (STI.getConstantBusLimit(AMDGPU::V_WRITELANE_B32) > 1)
1112 MachineBasicBlock *
MBB =
MI.getParent();
1116 Register LaneSelect =
MI.getOperand(3).getReg();
1119 auto MIB =
BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::V_WRITELANE_B32), VDst);
1121 std::optional<ValueAndVReg> ConstSelect =
1127 MIB.
addImm(ConstSelect->Value.getSExtValue() &
1130 std::optional<ValueAndVReg> ConstVal =
1136 STI.hasInv2PiInlineImm())) {
1137 MIB.
addImm(ConstVal->Value.getSExtValue());
1145 RBI.constrainGenericRegister(LaneSelect, AMDGPU::SReg_32_XM0RegClass, *MRI);
1147 BuildMI(*
MBB, *MIB,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
1155 MI.eraseFromParent();
1162bool AMDGPUInstructionSelector::selectDivScale(
MachineInstr &
MI)
const {
1166 LLT Ty = MRI->getType(Dst0);
1169 Opc = AMDGPU::V_DIV_SCALE_F32_e64;
1171 Opc = AMDGPU::V_DIV_SCALE_F64_e64;
1178 MachineBasicBlock *
MBB =
MI.getParent();
1182 unsigned ChooseDenom =
MI.getOperand(5).getImm();
1184 Register Src0 = ChooseDenom != 0 ? Numer : Denom;
1197 MI.eraseFromParent();
1202bool AMDGPUInstructionSelector::selectG_INTRINSIC(
MachineInstr &
I)
const {
1204 switch (IntrinsicID) {
1205 case Intrinsic::amdgcn_if_break: {
1210 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::SI_IF_BREAK))
1211 .
add(
I.getOperand(0))
1212 .
add(
I.getOperand(2))
1213 .
add(
I.getOperand(3));
1215 Register DstReg =
I.getOperand(0).getReg();
1216 Register Src0Reg =
I.getOperand(2).getReg();
1217 Register Src1Reg =
I.getOperand(3).getReg();
1219 I.eraseFromParent();
1222 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1226 case Intrinsic::amdgcn_interp_p1_f16:
1227 return selectInterpP1F16(
I);
1228 case Intrinsic::amdgcn_wqm:
1229 return constrainCopyLikeIntrin(
I, AMDGPU::WQM);
1230 case Intrinsic::amdgcn_softwqm:
1231 return constrainCopyLikeIntrin(
I, AMDGPU::SOFT_WQM);
1232 case Intrinsic::amdgcn_strict_wwm:
1233 case Intrinsic::amdgcn_wwm:
1234 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WWM);
1235 case Intrinsic::amdgcn_strict_wqm:
1236 return constrainCopyLikeIntrin(
I, AMDGPU::STRICT_WQM);
1237 case Intrinsic::amdgcn_writelane:
1238 return selectWritelane(
I);
1239 case Intrinsic::amdgcn_div_scale:
1240 return selectDivScale(
I);
1241 case Intrinsic::amdgcn_icmp:
1242 case Intrinsic::amdgcn_fcmp:
1245 return selectIntrinsicCmp(
I);
1246 case Intrinsic::amdgcn_ballot:
1247 return selectBallot(
I);
1248 case Intrinsic::amdgcn_reloc_constant:
1249 return selectRelocConstant(
I);
1250 case Intrinsic::amdgcn_groupstaticsize:
1251 return selectGroupStaticSize(
I);
1252 case Intrinsic::returnaddress:
1253 return selectReturnAddress(
I);
1254 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
1255 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
1256 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
1257 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
1258 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
1259 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
1260 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
1261 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
1262 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
1263 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
1264 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
1265 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
1266 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
1267 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
1268 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
1269 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
1270 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
1271 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
1272 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
1273 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
1274 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
1275 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
1276 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
1277 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
1278 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
1279 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
1280 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
1281 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
1282 return selectSMFMACIntrin(
I);
1283 case Intrinsic::amdgcn_permlane16_swap:
1284 case Intrinsic::amdgcn_permlane32_swap:
1285 return selectPermlaneSwapIntrin(
I, IntrinsicID);
1286 case Intrinsic::amdgcn_wave_shuffle:
1287 return selectWaveShuffleIntrin(
I);
1288 case Intrinsic::amdgcn_fma_legacy:
1289 if (!STI.hasFmaLegacy32Insts()) {
1294 case Intrinsic::amdgcn_sudot4:
1295 case Intrinsic::amdgcn_sudot8:
1296 if (!STI.hasDot8Insts()) {
1301 case Intrinsic::amdgcn_permlane16:
1302 case Intrinsic::amdgcn_permlanex16:
1303 if (!STI.hasPermlane16Insts()) {
1308 case Intrinsic::amdgcn_mov_dpp8:
1309 if (!STI.hasDPP8()) {
1314 case Intrinsic::amdgcn_tanh:
1315 if (!STI.hasTanhInsts()) {
1330 if (
Size == 16 && !ST.has16BitInsts())
1333 const auto Select = [&](
unsigned S16Opc,
unsigned TrueS16Opc,
1334 unsigned FakeS16Opc,
unsigned S32Opc,
1337 return ST.hasTrue16BitInsts()
1338 ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
1349 return Select(AMDGPU::V_CMP_NE_U16_e64, AMDGPU::V_CMP_NE_U16_t16_e64,
1350 AMDGPU::V_CMP_NE_U16_fake16_e64, AMDGPU::V_CMP_NE_U32_e64,
1351 AMDGPU::V_CMP_NE_U64_e64);
1353 return Select(AMDGPU::V_CMP_EQ_U16_e64, AMDGPU::V_CMP_EQ_U16_t16_e64,
1354 AMDGPU::V_CMP_EQ_U16_fake16_e64, AMDGPU::V_CMP_EQ_U32_e64,
1355 AMDGPU::V_CMP_EQ_U64_e64);
1357 return Select(AMDGPU::V_CMP_GT_I16_e64, AMDGPU::V_CMP_GT_I16_t16_e64,
1358 AMDGPU::V_CMP_GT_I16_fake16_e64, AMDGPU::V_CMP_GT_I32_e64,
1359 AMDGPU::V_CMP_GT_I64_e64);
1361 return Select(AMDGPU::V_CMP_GE_I16_e64, AMDGPU::V_CMP_GE_I16_t16_e64,
1362 AMDGPU::V_CMP_GE_I16_fake16_e64, AMDGPU::V_CMP_GE_I32_e64,
1363 AMDGPU::V_CMP_GE_I64_e64);
1365 return Select(AMDGPU::V_CMP_LT_I16_e64, AMDGPU::V_CMP_LT_I16_t16_e64,
1366 AMDGPU::V_CMP_LT_I16_fake16_e64, AMDGPU::V_CMP_LT_I32_e64,
1367 AMDGPU::V_CMP_LT_I64_e64);
1369 return Select(AMDGPU::V_CMP_LE_I16_e64, AMDGPU::V_CMP_LE_I16_t16_e64,
1370 AMDGPU::V_CMP_LE_I16_fake16_e64, AMDGPU::V_CMP_LE_I32_e64,
1371 AMDGPU::V_CMP_LE_I64_e64);
1373 return Select(AMDGPU::V_CMP_GT_U16_e64, AMDGPU::V_CMP_GT_U16_t16_e64,
1374 AMDGPU::V_CMP_GT_U16_fake16_e64, AMDGPU::V_CMP_GT_U32_e64,
1375 AMDGPU::V_CMP_GT_U64_e64);
1377 return Select(AMDGPU::V_CMP_GE_U16_e64, AMDGPU::V_CMP_GE_U16_t16_e64,
1378 AMDGPU::V_CMP_GE_U16_fake16_e64, AMDGPU::V_CMP_GE_U32_e64,
1379 AMDGPU::V_CMP_GE_U64_e64);
1381 return Select(AMDGPU::V_CMP_LT_U16_e64, AMDGPU::V_CMP_LT_U16_t16_e64,
1382 AMDGPU::V_CMP_LT_U16_fake16_e64, AMDGPU::V_CMP_LT_U32_e64,
1383 AMDGPU::V_CMP_LT_U64_e64);
1385 return Select(AMDGPU::V_CMP_LE_U16_e64, AMDGPU::V_CMP_LE_U16_t16_e64,
1386 AMDGPU::V_CMP_LE_U16_fake16_e64, AMDGPU::V_CMP_LE_U32_e64,
1387 AMDGPU::V_CMP_LE_U64_e64);
1390 return Select(AMDGPU::V_CMP_EQ_F16_e64, AMDGPU::V_CMP_EQ_F16_t16_e64,
1391 AMDGPU::V_CMP_EQ_F16_fake16_e64, AMDGPU::V_CMP_EQ_F32_e64,
1392 AMDGPU::V_CMP_EQ_F64_e64);
1394 return Select(AMDGPU::V_CMP_GT_F16_e64, AMDGPU::V_CMP_GT_F16_t16_e64,
1395 AMDGPU::V_CMP_GT_F16_fake16_e64, AMDGPU::V_CMP_GT_F32_e64,
1396 AMDGPU::V_CMP_GT_F64_e64);
1398 return Select(AMDGPU::V_CMP_GE_F16_e64, AMDGPU::V_CMP_GE_F16_t16_e64,
1399 AMDGPU::V_CMP_GE_F16_fake16_e64, AMDGPU::V_CMP_GE_F32_e64,
1400 AMDGPU::V_CMP_GE_F64_e64);
1402 return Select(AMDGPU::V_CMP_LT_F16_e64, AMDGPU::V_CMP_LT_F16_t16_e64,
1403 AMDGPU::V_CMP_LT_F16_fake16_e64, AMDGPU::V_CMP_LT_F32_e64,
1404 AMDGPU::V_CMP_LT_F64_e64);
1406 return Select(AMDGPU::V_CMP_LE_F16_e64, AMDGPU::V_CMP_LE_F16_t16_e64,
1407 AMDGPU::V_CMP_LE_F16_fake16_e64, AMDGPU::V_CMP_LE_F32_e64,
1408 AMDGPU::V_CMP_LE_F64_e64);
1410 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1411 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1412 AMDGPU::V_CMP_NEQ_F64_e64);
1414 return Select(AMDGPU::V_CMP_O_F16_e64, AMDGPU::V_CMP_O_F16_t16_e64,
1415 AMDGPU::V_CMP_O_F16_fake16_e64, AMDGPU::V_CMP_O_F32_e64,
1416 AMDGPU::V_CMP_O_F64_e64);
1418 return Select(AMDGPU::V_CMP_U_F16_e64, AMDGPU::V_CMP_U_F16_t16_e64,
1419 AMDGPU::V_CMP_U_F16_fake16_e64, AMDGPU::V_CMP_U_F32_e64,
1420 AMDGPU::V_CMP_U_F64_e64);
1422 return Select(AMDGPU::V_CMP_NLG_F16_e64, AMDGPU::V_CMP_NLG_F16_t16_e64,
1423 AMDGPU::V_CMP_NLG_F16_fake16_e64, AMDGPU::V_CMP_NLG_F32_e64,
1424 AMDGPU::V_CMP_NLG_F64_e64);
1426 return Select(AMDGPU::V_CMP_NLE_F16_e64, AMDGPU::V_CMP_NLE_F16_t16_e64,
1427 AMDGPU::V_CMP_NLE_F16_fake16_e64, AMDGPU::V_CMP_NLE_F32_e64,
1428 AMDGPU::V_CMP_NLE_F64_e64);
1430 return Select(AMDGPU::V_CMP_NLT_F16_e64, AMDGPU::V_CMP_NLT_F16_t16_e64,
1431 AMDGPU::V_CMP_NLT_F16_fake16_e64, AMDGPU::V_CMP_NLT_F32_e64,
1432 AMDGPU::V_CMP_NLT_F64_e64);
1434 return Select(AMDGPU::V_CMP_NGE_F16_e64, AMDGPU::V_CMP_NGE_F16_t16_e64,
1435 AMDGPU::V_CMP_NGE_F16_fake16_e64, AMDGPU::V_CMP_NGE_F32_e64,
1436 AMDGPU::V_CMP_NGE_F64_e64);
1438 return Select(AMDGPU::V_CMP_NGT_F16_e64, AMDGPU::V_CMP_NGT_F16_t16_e64,
1439 AMDGPU::V_CMP_NGT_F16_fake16_e64, AMDGPU::V_CMP_NGT_F32_e64,
1440 AMDGPU::V_CMP_NGT_F64_e64);
1442 return Select(AMDGPU::V_CMP_NEQ_F16_e64, AMDGPU::V_CMP_NEQ_F16_t16_e64,
1443 AMDGPU::V_CMP_NEQ_F16_fake16_e64, AMDGPU::V_CMP_NEQ_F32_e64,
1444 AMDGPU::V_CMP_NEQ_F64_e64);
1446 return Select(AMDGPU::V_CMP_TRU_F16_e64, AMDGPU::V_CMP_TRU_F16_t16_e64,
1447 AMDGPU::V_CMP_TRU_F16_fake16_e64, AMDGPU::V_CMP_TRU_F32_e64,
1448 AMDGPU::V_CMP_TRU_F64_e64);
1450 return Select(AMDGPU::V_CMP_F_F16_e64, AMDGPU::V_CMP_F_F16_t16_e64,
1451 AMDGPU::V_CMP_F_F16_fake16_e64, AMDGPU::V_CMP_F_F32_e64,
1452 AMDGPU::V_CMP_F_F64_e64);
1457 unsigned Size)
const {
1459 if (!STI.hasScalarCompareEq64())
1464 return AMDGPU::S_CMP_LG_U64;
1466 return AMDGPU::S_CMP_EQ_U64;
1475 return AMDGPU::S_CMP_LG_U32;
1477 return AMDGPU::S_CMP_EQ_U32;
1479 return AMDGPU::S_CMP_GT_I32;
1481 return AMDGPU::S_CMP_GE_I32;
1483 return AMDGPU::S_CMP_LT_I32;
1485 return AMDGPU::S_CMP_LE_I32;
1487 return AMDGPU::S_CMP_GT_U32;
1489 return AMDGPU::S_CMP_GE_U32;
1491 return AMDGPU::S_CMP_LT_U32;
1493 return AMDGPU::S_CMP_LE_U32;
1495 return AMDGPU::S_CMP_EQ_F32;
1497 return AMDGPU::S_CMP_GT_F32;
1499 return AMDGPU::S_CMP_GE_F32;
1501 return AMDGPU::S_CMP_LT_F32;
1503 return AMDGPU::S_CMP_LE_F32;
1505 return AMDGPU::S_CMP_LG_F32;
1507 return AMDGPU::S_CMP_O_F32;
1509 return AMDGPU::S_CMP_U_F32;
1511 return AMDGPU::S_CMP_NLG_F32;
1513 return AMDGPU::S_CMP_NLE_F32;
1515 return AMDGPU::S_CMP_NLT_F32;
1517 return AMDGPU::S_CMP_NGE_F32;
1519 return AMDGPU::S_CMP_NGT_F32;
1521 return AMDGPU::S_CMP_NEQ_F32;
1528 if (!STI.hasSALUFloatInsts())
1533 return AMDGPU::S_CMP_EQ_F16;
1535 return AMDGPU::S_CMP_GT_F16;
1537 return AMDGPU::S_CMP_GE_F16;
1539 return AMDGPU::S_CMP_LT_F16;
1541 return AMDGPU::S_CMP_LE_F16;
1543 return AMDGPU::S_CMP_LG_F16;
1545 return AMDGPU::S_CMP_O_F16;
1547 return AMDGPU::S_CMP_U_F16;
1549 return AMDGPU::S_CMP_NLG_F16;
1551 return AMDGPU::S_CMP_NLE_F16;
1553 return AMDGPU::S_CMP_NLT_F16;
1555 return AMDGPU::S_CMP_NGE_F16;
1557 return AMDGPU::S_CMP_NGT_F16;
1559 return AMDGPU::S_CMP_NEQ_F16;
1568bool AMDGPUInstructionSelector::selectG_ICMP_or_FCMP(
MachineInstr &
I)
const {
1573 Register SrcReg =
I.getOperand(2).getReg();
1574 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1578 Register CCReg =
I.getOperand(0).getReg();
1579 if (!isVCC(CCReg, *MRI)) {
1580 int Opcode = getS_CMPOpcode(Pred,
Size);
1583 MachineInstr *ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode))
1584 .
add(
I.getOperand(2))
1585 .
add(
I.getOperand(3));
1586 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CCReg)
1590 RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32RegClass, *MRI);
1591 I.eraseFromParent();
1595 if (
I.getOpcode() == AMDGPU::G_FCMP)
1602 MachineInstrBuilder ICmp;
1605 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1607 .
add(
I.getOperand(2))
1609 .
add(
I.getOperand(3))
1612 ICmp =
BuildMI(*BB, &
I,
DL, TII.get(Opcode),
I.getOperand(0).getReg())
1613 .
add(
I.getOperand(2))
1614 .
add(
I.getOperand(3));
1618 *TRI.getBoolRC(), *MRI);
1620 I.eraseFromParent();
1624bool AMDGPUInstructionSelector::selectIntrinsicCmp(
MachineInstr &
I)
const {
1625 Register Dst =
I.getOperand(0).getReg();
1626 if (isVCC(Dst, *MRI))
1629 LLT DstTy = MRI->getType(Dst);
1635 Register SrcReg =
I.getOperand(2).getReg();
1636 unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
1644 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
1645 I.eraseFromParent();
1646 return RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1653 MachineInstrBuilder SelectedMI;
1654 MachineOperand &
LHS =
I.getOperand(2);
1655 MachineOperand &
RHS =
I.getOperand(3);
1656 auto [Src0, Src0Mods] = selectVOP3ModsImpl(
LHS.getReg());
1657 auto [Src1, Src1Mods] = selectVOP3ModsImpl(
RHS.getReg());
1659 copyToVGPRIfSrcFolded(Src0, Src0Mods,
LHS, &
I,
true);
1661 copyToVGPRIfSrcFolded(Src1, Src1Mods,
RHS, &
I,
true);
1662 SelectedMI =
BuildMI(*BB, &
I,
DL, TII.get(Opcode), Dst);
1664 SelectedMI.
addImm(Src0Mods);
1665 SelectedMI.
addReg(Src0Reg);
1667 SelectedMI.
addImm(Src1Mods);
1668 SelectedMI.
addReg(Src1Reg);
1674 RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
1677 I.eraseFromParent();
1688 if (
MI->getParent() !=
MBB)
1692 if (
MI->getOpcode() == AMDGPU::COPY) {
1695 if (DstRB && SrcRB && DstRB->
getID() == AMDGPU::VCCRegBankID &&
1696 SrcRB->getID() == AMDGPU::SGPRRegBankID)
1713bool AMDGPUInstructionSelector::selectBallot(
MachineInstr &
I)
const {
1716 Register DstReg =
I.getOperand(0).getReg();
1717 Register SrcReg =
I.getOperand(2).getReg();
1718 const unsigned BallotSize = MRI->getType(DstReg).getSizeInBits();
1719 const unsigned WaveSize = STI.getWavefrontSize();
1723 if (BallotSize != WaveSize && (BallotSize != 64 || WaveSize != 32))
1726 std::optional<ValueAndVReg> Arg =
1731 if (BallotSize != WaveSize) {
1732 Dst = MRI->createVirtualRegister(TRI.getBoolRC());
1736 const int64_t
Value = Arg->Value.getZExtValue();
1739 unsigned Opcode = WaveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
1746 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1752 if (!RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI))
1756 unsigned AndOpc = WaveSize == 64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
1766 if (BallotSize != WaveSize) {
1767 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1769 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
1776 I.eraseFromParent();
1780bool AMDGPUInstructionSelector::selectRelocConstant(
MachineInstr &
I)
const {
1781 Register DstReg =
I.getOperand(0).getReg();
1782 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
1783 const TargetRegisterClass *DstRC = TRI.getRegClassForSizeOnBank(32, *DstBank);
1784 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
1787 const bool IsVALU = DstBank->
getID() == AMDGPU::VGPRRegBankID;
1789 Module *
M =
MF->getFunction().getParent();
1790 const MDNode *
Metadata =
I.getOperand(2).getMetadata();
1797 TII.get(IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32), DstReg)
1800 I.eraseFromParent();
1804bool AMDGPUInstructionSelector::selectGroupStaticSize(
MachineInstr &
I)
const {
1807 Register DstReg =
I.getOperand(0).getReg();
1808 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
1809 unsigned Mov = DstRB->
getID() == AMDGPU::SGPRRegBankID ?
1810 AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
1818 const SIMachineFunctionInfo *MFI =
MF->getInfo<SIMachineFunctionInfo>();
1821 Module *
M =
MF->getFunction().getParent();
1822 const GlobalValue *GV =
1827 I.eraseFromParent();
1832bool AMDGPUInstructionSelector::selectReturnAddress(
MachineInstr &
I)
const {
1837 MachineOperand &Dst =
I.getOperand(0);
1839 unsigned Depth =
I.getOperand(2).getImm();
1841 const TargetRegisterClass *RC
1842 = TRI.getConstrainedRegClassForOperand(Dst, *MRI);
1844 !RBI.constrainGenericRegister(DstReg, *RC, *MRI))
1849 MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {
1852 I.eraseFromParent();
1856 MachineFrameInfo &MFI =
MF.getFrameInfo();
1861 Register ReturnAddrReg = TRI.getReturnAddressReg(
MF);
1863 AMDGPU::SReg_64RegClass,
DL);
1866 I.eraseFromParent();
1870bool AMDGPUInstructionSelector::selectEndCfIntrinsic(
MachineInstr &
MI)
const {
1873 MachineBasicBlock *BB =
MI.getParent();
1874 BuildMI(*BB, &
MI,
MI.getDebugLoc(), TII.get(AMDGPU::SI_END_CF))
1875 .
add(
MI.getOperand(1));
1878 MI.eraseFromParent();
1880 if (!MRI->getRegClassOrNull(
Reg))
1881 MRI->setRegClass(
Reg, TRI.getWaveMaskRegClass());
1885bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
1887 MachineBasicBlock *
MBB =
MI.getParent();
1891 unsigned IndexOperand =
MI.getOperand(7).getImm();
1892 bool WaveRelease =
MI.getOperand(8).getImm() != 0;
1893 bool WaveDone =
MI.getOperand(9).getImm() != 0;
1895 if (WaveDone && !WaveRelease) {
1899 Fn,
"ds_ordered_count: wave_done requires wave_release",
DL));
1902 unsigned OrderedCountIndex = IndexOperand & 0x3f;
1903 IndexOperand &= ~0x3f;
1904 unsigned CountDw = 0;
1907 CountDw = (IndexOperand >> 24) & 0xf;
1908 IndexOperand &= ~(0xf << 24);
1910 if (CountDw < 1 || CountDw > 4) {
1913 Fn,
"ds_ordered_count: dword count must be between 1 and 4",
DL));
1921 Fn,
"ds_ordered_count: bad index operand",
DL));
1924 unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
1927 unsigned Offset0 = OrderedCountIndex << 2;
1928 unsigned Offset1 = WaveRelease | (WaveDone << 1) | (Instruction << 4);
1931 Offset1 |= (CountDw - 1) << 6;
1934 Offset1 |= ShaderType << 2;
1936 unsigned Offset = Offset0 | (Offset1 << 8);
1944 MachineInstrBuilder
DS =
1945 BuildMI(*
MBB, &
MI,
DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
1950 if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
1954 MI.eraseFromParent();
1960 case Intrinsic::amdgcn_ds_gws_init:
1961 return AMDGPU::DS_GWS_INIT;
1962 case Intrinsic::amdgcn_ds_gws_barrier:
1963 return AMDGPU::DS_GWS_BARRIER;
1964 case Intrinsic::amdgcn_ds_gws_sema_v:
1965 return AMDGPU::DS_GWS_SEMA_V;
1966 case Intrinsic::amdgcn_ds_gws_sema_br:
1967 return AMDGPU::DS_GWS_SEMA_BR;
1968 case Intrinsic::amdgcn_ds_gws_sema_p:
1969 return AMDGPU::DS_GWS_SEMA_P;
1970 case Intrinsic::amdgcn_ds_gws_sema_release_all:
1971 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
1977bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(
MachineInstr &
MI,
1979 if (!STI.hasGWS() || (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
1980 !STI.hasGWSSemaReleaseAll()))
1984 const bool HasVSrc =
MI.getNumOperands() == 3;
1985 assert(HasVSrc ||
MI.getNumOperands() == 2);
1987 Register BaseOffset =
MI.getOperand(HasVSrc ? 2 : 1).getReg();
1988 const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
1989 if (OffsetRB->
getID() != AMDGPU::SGPRRegBankID)
1995 MachineBasicBlock *
MBB =
MI.getParent();
1998 MachineInstr *Readfirstlane =
nullptr;
2003 if (OffsetDef->
getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
2004 Readfirstlane = OffsetDef;
2009 if (OffsetDef->
getOpcode() == AMDGPU::G_CONSTANT) {
2019 std::tie(BaseOffset, ImmOffset) =
2022 if (Readfirstlane) {
2025 if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
2031 if (!RBI.constrainGenericRegister(BaseOffset,
2032 AMDGPU::SReg_32RegClass, *MRI))
2036 Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2051 const MCInstrDesc &InstrDesc = TII.get(
Opc);
2056 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
2057 const TargetRegisterClass *DataRC = TII.getRegClass(InstrDesc, Data0Idx);
2058 const TargetRegisterClass *SubRC =
2059 TRI.getSubRegisterClass(DataRC, AMDGPU::sub0);
2063 if (!RBI.constrainGenericRegister(VSrc, *DataRC, *MRI))
2073 Register DataReg = MRI->createVirtualRegister(DataRC);
2074 if (!RBI.constrainGenericRegister(VSrc, *SubRC, *MRI))
2077 Register UndefReg = MRI->createVirtualRegister(SubRC);
2096 MI.eraseFromParent();
2100bool AMDGPUInstructionSelector::selectDSAppendConsume(
MachineInstr &
MI,
2101 bool IsAppend)
const {
2102 Register PtrBase =
MI.getOperand(2).getReg();
2103 LLT PtrTy = MRI->getType(PtrBase);
2107 std::tie(PtrBase,
Offset) = selectDS1Addr1OffsetImpl(
MI.getOperand(2));
2110 if (!isDSOffsetLegal(PtrBase,
Offset)) {
2111 PtrBase =
MI.getOperand(2).getReg();
2115 MachineBasicBlock *
MBB =
MI.getParent();
2117 const unsigned Opc = IsAppend ? AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2121 if (!RBI.constrainGenericRegister(PtrBase, AMDGPU::SReg_32RegClass, *MRI))
2128 MI.eraseFromParent();
2133bool AMDGPUInstructionSelector::selectInitWholeWave(
MachineInstr &
MI)
const {
2134 MachineFunction *
MF =
MI.getMF();
2135 SIMachineFunctionInfo *MFInfo =
MF->getInfo<SIMachineFunctionInfo>();
2146 TFE = TexFailCtrl & 0x1;
2148 LWE = TexFailCtrl & 0x2;
2151 return TexFailCtrl == 0;
2154bool AMDGPUInstructionSelector::selectImageIntrinsic(
2156 MachineBasicBlock *
MBB =
MI.getParent();
2162 Register ResultDef =
MI.getOperand(0).getReg();
2163 if (MRI->use_nodbg_empty(ResultDef))
2167 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2176 const unsigned ArgOffset =
MI.getNumExplicitDefs() + 1;
2178 Register VDataIn = AMDGPU::NoRegister;
2179 Register VDataOut = AMDGPU::NoRegister;
2181 int NumVDataDwords = -1;
2182 bool IsD16 =
MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16 ||
2183 MI.getOpcode() == AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16;
2189 Unorm =
MI.getOperand(ArgOffset + Intr->
UnormIndex).getImm() != 0;
2193 bool IsTexFail =
false;
2195 TFE, LWE, IsTexFail))
2198 const int Flags =
MI.getOperand(ArgOffset + Intr->
NumArgs).getImm();
2199 const bool IsA16 = (
Flags & 1) != 0;
2200 const bool IsG16 = (
Flags & 2) != 0;
2203 if (IsA16 && !STI.hasG16() && !IsG16)
2207 unsigned DMaskLanes = 0;
2209 if (BaseOpcode->
Atomic) {
2211 VDataOut =
MI.getOperand(0).getReg();
2212 VDataIn =
MI.getOperand(2).getReg();
2213 LLT Ty = MRI->getType(VDataIn);
2216 const bool Is64Bit = BaseOpcode->
AtomicX2 ?
2221 assert(
MI.getOperand(3).getReg() == AMDGPU::NoRegister);
2223 DMask = Is64Bit ? 0xf : 0x3;
2224 NumVDataDwords = Is64Bit ? 4 : 2;
2226 DMask = Is64Bit ? 0x3 : 0x1;
2227 NumVDataDwords = Is64Bit ? 2 : 1;
2230 DMask =
MI.getOperand(ArgOffset + Intr->
DMaskIndex).getImm();
2233 if (BaseOpcode->
Store) {
2234 VDataIn =
MI.getOperand(1).getReg();
2235 VDataTy = MRI->getType(VDataIn);
2240 VDataOut =
MI.getOperand(0).getReg();
2241 VDataTy = MRI->getType(VDataOut);
2242 NumVDataDwords = DMaskLanes;
2244 if (IsD16 && !STI.hasUnpackedD16VMem())
2245 NumVDataDwords = (DMaskLanes + 1) / 2;
2250 if (Subtarget->hasG16() && IsG16) {
2251 const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
2254 IntrOpcode = G16MappingInfo->
G16;
2258 assert((!IsTexFail || DMaskLanes >= 1) &&
"should have legalized this");
2268 int NumVAddrRegs = 0;
2269 int NumVAddrDwords = 0;
2272 MachineOperand &AddrOp =
MI.getOperand(ArgOffset +
I);
2273 if (!AddrOp.
isReg())
2281 NumVAddrDwords += (MRI->getType(Addr).getSizeInBits() + 31) / 32;
2288 NumVAddrRegs != 1 &&
2289 (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
2290 : NumVAddrDwords == NumVAddrRegs);
2291 if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
2302 NumVDataDwords, NumVAddrDwords);
2303 }
else if (IsGFX12Plus) {
2305 NumVDataDwords, NumVAddrDwords);
2306 }
else if (IsGFX11Plus) {
2308 UseNSA ? AMDGPU::MIMGEncGfx11NSA
2309 : AMDGPU::MIMGEncGfx11Default,
2310 NumVDataDwords, NumVAddrDwords);
2311 }
else if (IsGFX10Plus) {
2313 UseNSA ? AMDGPU::MIMGEncGfx10NSA
2314 : AMDGPU::MIMGEncGfx10Default,
2315 NumVDataDwords, NumVAddrDwords);
2317 if (Subtarget->hasGFX90AInsts()) {
2319 NumVDataDwords, NumVAddrDwords);
2323 <<
"requested image instruction is not supported on this GPU\n");
2330 NumVDataDwords, NumVAddrDwords);
2333 NumVDataDwords, NumVAddrDwords);
2343 const bool Is64 = MRI->getType(VDataOut).getSizeInBits() == 64;
2345 Register TmpReg = MRI->createVirtualRegister(
2346 Is64 ? &AMDGPU::VReg_128RegClass : &AMDGPU::VReg_64RegClass);
2347 unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
2350 if (!MRI->use_empty(VDataOut)) {
2363 for (
int I = 0;
I != NumVAddrRegs; ++
I) {
2364 MachineOperand &SrcOp =
MI.getOperand(ArgOffset + Intr->
VAddrStart +
I);
2365 if (SrcOp.
isReg()) {
2384 STI.hasFeature(AMDGPU::FeatureR128A16) ? -1 : 0);
2386 MIB.
addImm(IsA16 ? -1 : 0);
2388 if (!Subtarget->hasGFX90AInsts()) {
2400 MIB.
addImm(IsD16 ? -1 : 0);
2402 MI.eraseFromParent();
2404 TII.enforceOperandRCAlignment(*MIB, AMDGPU::OpName::vaddr);
2410bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
2416 MachineBasicBlock *
MBB =
MI.getParent();
2421 unsigned Offset =
MI.getOperand(6).getImm();
2425 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2426 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2427 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
2429 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2430 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
2432 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2433 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
2445 MI.eraseFromParent();
2450bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
2453 switch (IntrinsicID) {
2454 case Intrinsic::amdgcn_end_cf:
2455 return selectEndCfIntrinsic(
I);
2456 case Intrinsic::amdgcn_ds_ordered_add:
2457 case Intrinsic::amdgcn_ds_ordered_swap:
2458 return selectDSOrderedIntrinsic(
I, IntrinsicID);
2459 case Intrinsic::amdgcn_ds_gws_init:
2460 case Intrinsic::amdgcn_ds_gws_barrier:
2461 case Intrinsic::amdgcn_ds_gws_sema_v:
2462 case Intrinsic::amdgcn_ds_gws_sema_br:
2463 case Intrinsic::amdgcn_ds_gws_sema_p:
2464 case Intrinsic::amdgcn_ds_gws_sema_release_all:
2465 return selectDSGWSIntrinsic(
I, IntrinsicID);
2466 case Intrinsic::amdgcn_ds_append:
2467 return selectDSAppendConsume(
I,
true);
2468 case Intrinsic::amdgcn_ds_consume:
2469 return selectDSAppendConsume(
I,
false);
2470 case Intrinsic::amdgcn_init_whole_wave:
2471 return selectInitWholeWave(
I);
2472 case Intrinsic::amdgcn_raw_buffer_load_lds:
2473 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
2474 case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
2475 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
2476 case Intrinsic::amdgcn_struct_buffer_load_lds:
2477 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
2478 case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
2479 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
2480 return selectBufferLoadLds(
I);
2485 case Intrinsic::amdgcn_load_to_lds:
2486 case Intrinsic::amdgcn_load_async_to_lds:
2487 case Intrinsic::amdgcn_global_load_lds:
2488 case Intrinsic::amdgcn_global_load_async_lds:
2489 return selectGlobalLoadLds(
I);
2490 case Intrinsic::amdgcn_tensor_load_to_lds:
2491 case Intrinsic::amdgcn_tensor_store_from_lds:
2492 return selectTensorLoadStore(
I, IntrinsicID);
2493 case Intrinsic::amdgcn_asyncmark:
2494 case Intrinsic::amdgcn_wait_asyncmark:
2495 if (!Subtarget->hasAsyncMark())
2498 case Intrinsic::amdgcn_exp_compr:
2499 if (!STI.hasCompressedExport()) {
2504 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
2505 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
2506 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
2507 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
2508 return selectDSBvhStackIntrinsic(
I);
2509 case Intrinsic::amdgcn_s_alloc_vgpr: {
2515 Register ResReg =
I.getOperand(0).getReg();
2517 MachineInstr *AllocMI =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_ALLOC_VGPR))
2518 .
add(
I.getOperand(2));
2521 I.eraseFromParent();
2523 return RBI.constrainGenericRegister(ResReg, AMDGPU::SReg_32RegClass, *MRI);
2525 case Intrinsic::amdgcn_s_barrier_init:
2526 case Intrinsic::amdgcn_s_barrier_signal_var:
2527 return selectNamedBarrierInit(
I, IntrinsicID);
2528 case Intrinsic::amdgcn_s_wakeup_barrier: {
2529 if (!STI.hasSWakeupBarrier()) {
2533 return selectNamedBarrierInst(
I, IntrinsicID);
2535 case Intrinsic::amdgcn_s_barrier_join:
2536 case Intrinsic::amdgcn_s_get_named_barrier_state:
2537 return selectNamedBarrierInst(
I, IntrinsicID);
2538 case Intrinsic::amdgcn_s_get_barrier_state:
2539 return selectSGetBarrierState(
I, IntrinsicID);
2540 case Intrinsic::amdgcn_s_barrier_signal_isfirst:
2541 return selectSBarrierSignalIsfirst(
I, IntrinsicID);
2546bool AMDGPUInstructionSelector::selectG_SELECT(
MachineInstr &
I)
const {
2553 Register DstReg =
I.getOperand(0).getReg();
2554 unsigned Size = RBI.getSizeInBits(DstReg, *MRI, TRI);
2556 const MachineOperand &CCOp =
I.getOperand(1);
2558 if (!isVCC(CCReg, *MRI)) {
2559 unsigned SelectOpcode =
Size == 64 ? AMDGPU::S_CSELECT_B64 :
2560 AMDGPU::S_CSELECT_B32;
2561 MachineInstr *CopySCC =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::SCC)
2567 if (!MRI->getRegClassOrNull(CCReg))
2568 MRI->setRegClass(CCReg, TRI.getConstrainedRegClassForOperand(CCOp, *MRI));
2570 .
add(
I.getOperand(2))
2571 .
add(
I.getOperand(3));
2575 I.eraseFromParent();
2584 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
2586 .
add(
I.getOperand(3))
2588 .
add(
I.getOperand(2))
2589 .
add(
I.getOperand(1));
2592 I.eraseFromParent();
2596bool AMDGPUInstructionSelector::selectG_TRUNC(
MachineInstr &
I)
const {
2597 Register DstReg =
I.getOperand(0).getReg();
2598 Register SrcReg =
I.getOperand(1).getReg();
2599 const LLT DstTy = MRI->getType(DstReg);
2600 const LLT SrcTy = MRI->getType(SrcReg);
2603 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
2604 const RegisterBank *DstRB;
2610 DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
2615 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
2620 const TargetRegisterClass *SrcRC =
2621 TRI.getRegClassForSizeOnBank(SrcSize, *SrcRB);
2622 const TargetRegisterClass *DstRC =
2623 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
2624 if (!SrcRC || !DstRC)
2627 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
2628 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI)) {
2633 if (DstRC == &AMDGPU::VGPR_16RegClass && SrcSize == 32) {
2634 assert(STI.useRealTrue16Insts());
2638 .
addReg(SrcReg, {}, AMDGPU::lo16);
2639 I.eraseFromParent();
2647 Register LoReg = MRI->createVirtualRegister(DstRC);
2648 Register HiReg = MRI->createVirtualRegister(DstRC);
2650 .
addReg(SrcReg, {}, AMDGPU::sub0);
2652 .
addReg(SrcReg, {}, AMDGPU::sub1);
2654 if (IsVALU && STI.hasSDWA()) {
2657 MachineInstr *MovSDWA =
2658 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_MOV_B32_sdwa), DstReg)
2668 Register TmpReg0 = MRI->createVirtualRegister(DstRC);
2669 Register TmpReg1 = MRI->createVirtualRegister(DstRC);
2670 Register ImmReg = MRI->createVirtualRegister(DstRC);
2672 BuildMI(*
MBB,
I,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), TmpReg0)
2682 unsigned MovOpc = IsVALU ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
2683 unsigned AndOpc = IsVALU ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
2684 unsigned OrOpc = IsVALU ? AMDGPU::V_OR_B32_e64 : AMDGPU::S_OR_B32;
2696 And.setOperandDead(3);
2697 Or.setOperandDead(3);
2701 I.eraseFromParent();
2709 unsigned SubRegIdx = DstSize < 32
2710 ?
static_cast<unsigned>(AMDGPU::sub0)
2711 : TRI.getSubRegFromChannel(0, DstSize / 32);
2712 if (SubRegIdx == AMDGPU::NoSubRegister)
2717 const TargetRegisterClass *SrcWithSubRC
2718 = TRI.getSubClassWithSubReg(SrcRC, SubRegIdx);
2722 if (SrcWithSubRC != SrcRC) {
2723 if (!RBI.constrainGenericRegister(SrcReg, *SrcWithSubRC, *MRI))
2727 I.getOperand(1).setSubReg(SubRegIdx);
2730 I.setDesc(TII.get(TargetOpcode::COPY));
2737 int SignedMask =
static_cast<int>(Mask);
2738 return SignedMask >= -16 && SignedMask <= 64;
2742const RegisterBank *AMDGPUInstructionSelector::getArtifactRegBank(
2751 return &RBI.getRegBankFromRegClass(*RC, LLT());
2755bool AMDGPUInstructionSelector::selectG_SZA_EXT(
MachineInstr &
I)
const {
2756 bool InReg =
I.getOpcode() == AMDGPU::G_SEXT_INREG;
2757 bool Signed =
I.getOpcode() == AMDGPU::G_SEXT || InReg;
2760 const Register DstReg =
I.getOperand(0).getReg();
2761 const Register SrcReg =
I.getOperand(1).getReg();
2763 const LLT DstTy = MRI->getType(DstReg);
2764 const LLT SrcTy = MRI->getType(SrcReg);
2765 const unsigned SrcSize =
I.getOpcode() == AMDGPU::G_SEXT_INREG ?
2772 const RegisterBank *SrcBank = getArtifactRegBank(SrcReg, *MRI, TRI);
2775 if (
I.getOpcode() == AMDGPU::G_ANYEXT) {
2777 return selectCOPY(
I);
2779 const TargetRegisterClass *SrcRC =
2780 TRI.getRegClassForTypeOnBank(SrcTy, *SrcBank);
2781 const RegisterBank *DstBank = RBI.getRegBank(DstReg, *MRI, TRI);
2782 const TargetRegisterClass *DstRC =
2783 TRI.getRegClassForSizeOnBank(DstSize, *DstBank);
2785 Register UndefReg = MRI->createVirtualRegister(SrcRC);
2786 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2792 I.eraseFromParent();
2794 return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) &&
2795 RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI);
2798 if (SrcBank->
getID() == AMDGPU::VGPRRegBankID && DstSize <= 32) {
2804 MachineInstr *ExtI =
2808 I.eraseFromParent();
2813 const unsigned BFE =
Signed ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2814 MachineInstr *ExtI =
2819 I.eraseFromParent();
2824 if (SrcBank->
getID() == AMDGPU::SGPRRegBankID && DstSize <= 64) {
2825 const TargetRegisterClass &SrcRC = InReg && DstSize > 32 ?
2826 AMDGPU::SReg_64RegClass : AMDGPU::SReg_32RegClass;
2827 if (!RBI.constrainGenericRegister(SrcReg, SrcRC, *MRI))
2830 if (
Signed && DstSize == 32 && (SrcSize == 8 || SrcSize == 16)) {
2831 const unsigned SextOpc = SrcSize == 8 ?
2832 AMDGPU::S_SEXT_I32_I8 : AMDGPU::S_SEXT_I32_I16;
2835 I.eraseFromParent();
2836 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2841 if (DstSize > 32 && SrcSize == 32) {
2842 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2843 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2846 .
addReg(SrcReg, {}, SubReg)
2854 .
addReg(SrcReg, {}, SubReg)
2855 .addImm(AMDGPU::sub0)
2858 I.eraseFromParent();
2859 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass,
2863 const unsigned BFE64 =
Signed ? AMDGPU::S_BFE_I64 : AMDGPU::S_BFE_U64;
2864 const unsigned BFE32 =
Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2867 if (DstSize > 32 && (SrcSize <= 32 || InReg)) {
2869 Register ExtReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
2870 Register UndefReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
2871 unsigned SubReg = InReg ? AMDGPU::sub0 : AMDGPU::NoSubRegister;
2873 BuildMI(
MBB,
I,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefReg);
2875 .
addReg(SrcReg, {}, SubReg)
2876 .addImm(AMDGPU::sub0)
2884 I.eraseFromParent();
2885 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_64RegClass, *MRI);
2900 I.eraseFromParent();
2901 return RBI.constrainGenericRegister(DstReg, AMDGPU::SReg_32RegClass, *MRI);
2925 if (Unmerge->getNumDefs() == 2 && Unmerge->getOperand(1).getReg() == In &&
2927 Out = Unmerge->getSourceReg();
2947 if (Shuffle->
getOpcode() != AMDGPU::G_SHUFFLE_VECTOR)
2954 assert(Mask.size() == 2);
2956 if (Mask[0] == 1 && Mask[1] <= 1) {
2964bool AMDGPUInstructionSelector::selectG_FPEXT(
MachineInstr &
I)
const {
2965 if (!Subtarget->hasSALUFloatInsts())
2968 Register Dst =
I.getOperand(0).getReg();
2969 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
2970 if (DstRB->
getID() != AMDGPU::SGPRRegBankID)
2973 Register Src =
I.getOperand(1).getReg();
2979 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_CVT_HI_F32_F16), Dst)
2981 I.eraseFromParent();
2982 return RBI.constrainGenericRegister(Dst, AMDGPU::SReg_32RegClass, *MRI);
2989bool AMDGPUInstructionSelector::selectG_FNEG(
MachineInstr &
MI)
const {
3002 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
3003 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
3008 MachineInstr *Fabs =
getOpcodeDef(TargetOpcode::G_FABS, Src, *MRI);
3012 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
3013 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
3016 MachineBasicBlock *BB =
MI.getParent();
3018 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3019 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3020 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3021 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3023 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
3024 .
addReg(Src, {}, AMDGPU::sub0);
3025 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
3026 .
addReg(Src, {}, AMDGPU::sub1);
3027 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
3031 unsigned Opc = Fabs ? AMDGPU::S_OR_B32 : AMDGPU::S_XOR_B32;
3036 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
3041 MI.eraseFromParent();
3046bool AMDGPUInstructionSelector::selectG_FABS(
MachineInstr &
MI)
const {
3048 const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
3049 if (DstRB->
getID() != AMDGPU::SGPRRegBankID ||
3054 MachineBasicBlock *BB =
MI.getParent();
3056 Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3057 Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3058 Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3059 Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
3061 if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
3062 !RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
3065 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), LoReg)
3066 .
addReg(Src, {}, AMDGPU::sub0);
3067 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), HiReg)
3068 .
addReg(Src, {}, AMDGPU::sub1);
3069 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
3074 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::S_AND_B32), OpReg)
3078 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
3084 MI.eraseFromParent();
3089 return MI.getOpcode() == TargetOpcode::G_CONSTANT;
3092void AMDGPUInstructionSelector::getAddrModeInfo(
const MachineInstr &Load,
3095 unsigned OpNo =
Load.getOpcode() == AMDGPU::G_PREFETCH ? 0 : 1;
3096 const MachineInstr *PtrMI =
3097 MRI.getUniqueVRegDef(
Load.getOperand(OpNo).getReg());
3101 if (PtrMI->
getOpcode() != TargetOpcode::G_PTR_ADD)
3106 for (
unsigned i = 1; i != 3; ++i) {
3107 const MachineOperand &GEPOp = PtrMI->
getOperand(i);
3108 const MachineInstr *OpDef = MRI.getUniqueVRegDef(GEPOp.
getReg());
3113 assert(GEPInfo.Imm == 0);
3117 const RegisterBank *OpBank = RBI.getRegBank(GEPOp.
getReg(), MRI, TRI);
3118 if (OpBank->
getID() == AMDGPU::SGPRRegBankID)
3119 GEPInfo.SgprParts.push_back(GEPOp.
getReg());
3121 GEPInfo.VgprParts.push_back(GEPOp.
getReg());
3125 getAddrModeInfo(*PtrMI, MRI, AddrInfo);
3128bool AMDGPUInstructionSelector::isSGPR(
Register Reg)
const {
3129 return RBI.getRegBank(
Reg, *MRI, TRI)->getID() == AMDGPU::SGPRRegBankID;
3132bool AMDGPUInstructionSelector::isInstrUniform(
const MachineInstr &
MI)
const {
3133 if (!
MI.hasOneMemOperand())
3136 const MachineMemOperand *MMO = *
MI.memoperands_begin();
3149 if (
MI.getOpcode() == AMDGPU::G_PREFETCH)
3150 return RBI.getRegBank(
MI.getOperand(0).getReg(), *MRI, TRI)->getID() ==
3151 AMDGPU::SGPRRegBankID;
3154 return I &&
I->getMetadata(
"amdgpu.uniform");
3158 for (
const GEPInfo &GEPInfo : AddrInfo) {
3159 if (!GEPInfo.VgprParts.empty())
3165void AMDGPUInstructionSelector::initM0(
MachineInstr &
I)
const {
3166 const LLT PtrTy = MRI->getType(
I.getOperand(1).getReg());
3169 STI.ldsRequiresM0Init()) {
3173 BuildMI(*BB, &
I,
I.getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
3178bool AMDGPUInstructionSelector::selectG_LOAD_STORE_ATOMICRMW(
3185 if (
Reg.isPhysical())
3189 const unsigned Opcode =
MI.getOpcode();
3191 if (Opcode == AMDGPU::COPY)
3194 if (Opcode == AMDGPU::G_AND || Opcode == AMDGPU::G_OR ||
3195 Opcode == AMDGPU::G_XOR)
3200 return GI->is(Intrinsic::amdgcn_class);
3202 return Opcode == AMDGPU::G_ICMP || Opcode == AMDGPU::G_FCMP;
3205bool AMDGPUInstructionSelector::selectG_BRCOND(
MachineInstr &
I)
const {
3207 MachineOperand &CondOp =
I.getOperand(0);
3213 const TargetRegisterClass *ConstrainRC;
3220 if (!isVCC(CondReg, *MRI)) {
3224 CondPhysReg = AMDGPU::SCC;
3225 BrOpcode = AMDGPU::S_CBRANCH_SCC1;
3226 ConstrainRC = &AMDGPU::SReg_32RegClass;
3233 const bool Is64 = STI.isWave64();
3234 const unsigned Opcode = Is64 ? AMDGPU::S_AND_B64 : AMDGPU::S_AND_B32;
3235 const Register Exec = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
3237 Register TmpReg = MRI->createVirtualRegister(TRI.getBoolRC());
3238 BuildMI(*BB, &
I,
DL, TII.get(Opcode), TmpReg)
3245 CondPhysReg = TRI.getVCC();
3246 BrOpcode = AMDGPU::S_CBRANCH_VCCNZ;
3247 ConstrainRC = TRI.getBoolRC();
3250 if (!MRI->getRegClassOrNull(CondReg))
3251 MRI->setRegClass(CondReg, ConstrainRC);
3253 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), CondPhysReg)
3256 .
addMBB(
I.getOperand(1).getMBB());
3258 I.eraseFromParent();
3262bool AMDGPUInstructionSelector::selectG_GLOBAL_VALUE(
3264 Register DstReg =
I.getOperand(0).getReg();
3265 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3266 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3267 I.setDesc(TII.get(IsVGPR ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32));
3271 return RBI.constrainGenericRegister(
3272 DstReg, IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass, *MRI);
3275bool AMDGPUInstructionSelector::selectG_PTRMASK(
MachineInstr &
I)
const {
3276 Register DstReg =
I.getOperand(0).getReg();
3277 Register SrcReg =
I.getOperand(1).getReg();
3278 Register MaskReg =
I.getOperand(2).getReg();
3279 LLT Ty = MRI->getType(DstReg);
3280 LLT MaskTy = MRI->getType(MaskReg);
3284 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3285 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3286 const RegisterBank *MaskRB = RBI.getRegBank(MaskReg, *MRI, TRI);
3287 const bool IsVGPR = DstRB->
getID() == AMDGPU::VGPRRegBankID;
3293 APInt MaskOnes =
VT->getKnownOnes(MaskReg).zext(64);
3297 const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
3298 const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
3301 !CanCopyLow32 && !CanCopyHi32) {
3302 auto MIB =
BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_AND_B64), DstReg)
3306 I.eraseFromParent();
3311 unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
3312 const TargetRegisterClass &RegRC
3313 = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
3315 const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(Ty, *DstRB);
3316 const TargetRegisterClass *SrcRC = TRI.getRegClassForTypeOnBank(Ty, *SrcRB);
3317 const TargetRegisterClass *MaskRC =
3318 TRI.getRegClassForTypeOnBank(MaskTy, *MaskRB);
3320 if (!RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3321 !RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3322 !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
3327 "ptrmask should have been narrowed during legalize");
3329 auto NewOp =
BuildMI(*BB, &
I,
DL, TII.get(NewOpc), DstReg)
3335 I.eraseFromParent();
3339 Register HiReg = MRI->createVirtualRegister(&RegRC);
3340 Register LoReg = MRI->createVirtualRegister(&RegRC);
3343 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), LoReg)
3344 .
addReg(SrcReg, {}, AMDGPU::sub0);
3345 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), HiReg)
3346 .
addReg(SrcReg, {}, AMDGPU::sub1);
3355 Register MaskLo = MRI->createVirtualRegister(&RegRC);
3356 MaskedLo = MRI->createVirtualRegister(&RegRC);
3358 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskLo)
3359 .
addReg(MaskReg, {}, AMDGPU::sub0);
3360 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedLo)
3369 Register MaskHi = MRI->createVirtualRegister(&RegRC);
3370 MaskedHi = MRI->createVirtualRegister(&RegRC);
3372 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::COPY), MaskHi)
3373 .
addReg(MaskReg, {}, AMDGPU::sub1);
3374 BuildMI(*BB, &
I,
DL, TII.get(NewOpc), MaskedHi)
3379 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
3384 I.eraseFromParent();
3390static std::pair<Register, unsigned>
3397 std::tie(IdxBaseReg,
Offset) =
3399 if (IdxBaseReg == AMDGPU::NoRegister) {
3403 IdxBaseReg = IdxReg;
3410 if (
static_cast<unsigned>(
Offset) >= SubRegs.
size())
3411 return std::pair(IdxReg, SubRegs[0]);
3412 return std::pair(IdxBaseReg, SubRegs[
Offset]);
3415bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
3421 LLT DstTy = MRI->getType(DstReg);
3422 LLT SrcTy = MRI->getType(SrcReg);
3424 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
3425 const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
3426 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3430 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3433 const TargetRegisterClass *SrcRC =
3434 TRI.getRegClassForTypeOnBank(SrcTy, *SrcRB);
3435 const TargetRegisterClass *DstRC =
3436 TRI.getRegClassForTypeOnBank(DstTy, *DstRB);
3437 if (!SrcRC || !DstRC)
3439 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
3440 !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
3441 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3444 MachineBasicBlock *BB =
MI.getParent();
3452 if (SrcRB->
getID() == AMDGPU::SGPRRegBankID) {
3456 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3459 unsigned Opc = Is64 ? AMDGPU::S_MOVRELS_B64 : AMDGPU::S_MOVRELS_B32;
3461 .
addReg(SrcReg, {}, SubReg)
3463 MI.eraseFromParent();
3470 if (!STI.useVGPRIndexMode()) {
3471 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3473 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::V_MOVRELS_B32_e32), DstReg)
3474 .
addReg(SrcReg, {}, SubReg)
3476 MI.eraseFromParent();
3480 const MCInstrDesc &GPRIDXDesc =
3481 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*SrcRC),
true);
3487 MI.eraseFromParent();
3492bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
3499 LLT VecTy = MRI->getType(DstReg);
3500 LLT ValTy = MRI->getType(ValReg);
3504 const RegisterBank *VecRB = RBI.getRegBank(VecReg, *MRI, TRI);
3505 const RegisterBank *ValRB = RBI.getRegBank(ValReg, *MRI, TRI);
3506 const RegisterBank *IdxRB = RBI.getRegBank(IdxReg, *MRI, TRI);
3512 if (IdxRB->
getID() != AMDGPU::SGPRRegBankID)
3515 const TargetRegisterClass *VecRC =
3516 TRI.getRegClassForTypeOnBank(VecTy, *VecRB);
3517 const TargetRegisterClass *ValRC =
3518 TRI.getRegClassForTypeOnBank(ValTy, *ValRB);
3520 if (!RBI.constrainGenericRegister(VecReg, *VecRC, *MRI) ||
3521 !RBI.constrainGenericRegister(DstReg, *VecRC, *MRI) ||
3522 !RBI.constrainGenericRegister(ValReg, *ValRC, *MRI) ||
3523 !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))
3526 if (VecRB->
getID() == AMDGPU::VGPRRegBankID && ValSize != 32)
3530 std::tie(IdxReg, SubReg) =
3533 const bool IndexMode = VecRB->
getID() == AMDGPU::VGPRRegBankID &&
3534 STI.useVGPRIndexMode();
3536 MachineBasicBlock *BB =
MI.getParent();
3540 BuildMI(*BB, &
MI,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
3543 const MCInstrDesc &RegWriteOp = TII.getIndirectRegWriteMovRelPseudo(
3544 VecSize, ValSize, VecRB->
getID() == AMDGPU::SGPRRegBankID);
3549 MI.eraseFromParent();
3553 const MCInstrDesc &GPRIDXDesc =
3554 TII.getIndirectGPRIDXPseudo(TRI.getRegSizeInBits(*VecRC),
false);
3561 MI.eraseFromParent();
3567 case Intrinsic::amdgcn_raw_buffer_load_async_lds:
3568 case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds:
3569 case Intrinsic::amdgcn_struct_buffer_load_async_lds:
3570 case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds:
3571 case Intrinsic::amdgcn_load_async_to_lds:
3572 case Intrinsic::amdgcn_global_load_async_lds:
3578bool AMDGPUInstructionSelector::selectBufferLoadLds(
MachineInstr &
MI)
const {
3579 if (!Subtarget->hasVMemToLDSLoad())
3582 unsigned Size =
MI.getOperand(3).getImm();
3586 const bool HasVIndex =
MI.getNumOperands() == 9;
3590 VIndex =
MI.getOperand(4).getReg();
3594 Register VOffset =
MI.getOperand(4 + OpOffset).getReg();
3595 std::optional<ValueAndVReg> MaybeVOffset =
3597 const bool HasVOffset = !MaybeVOffset || MaybeVOffset->Value.getZExtValue();
3603 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_BOTHEN
3604 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_IDXEN
3605 : HasVOffset ? AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFEN
3606 : AMDGPU::BUFFER_LOAD_UBYTE_LDS_OFFSET;
3609 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_BOTHEN
3610 : AMDGPU::BUFFER_LOAD_USHORT_LDS_IDXEN
3611 : HasVOffset ? AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFEN
3612 : AMDGPU::BUFFER_LOAD_USHORT_LDS_OFFSET;
3615 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_BOTHEN
3616 : AMDGPU::BUFFER_LOAD_DWORD_LDS_IDXEN
3617 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFEN
3618 : AMDGPU::BUFFER_LOAD_DWORD_LDS_OFFSET;
3621 if (!Subtarget->hasLDSLoadB96_B128())
3624 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_BOTHEN
3625 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_IDXEN
3626 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFEN
3627 : AMDGPU::BUFFER_LOAD_DWORDX3_LDS_OFFSET;
3630 if (!Subtarget->hasLDSLoadB96_B128())
3633 Opc = HasVIndex ? HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_BOTHEN
3634 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_IDXEN
3635 : HasVOffset ? AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFEN
3636 : AMDGPU::BUFFER_LOAD_DWORDX4_LDS_OFFSET;
3640 MachineBasicBlock *
MBB =
MI.getParent();
3643 .
add(
MI.getOperand(2));
3647 if (HasVIndex && HasVOffset) {
3648 Register IdxReg = MRI->createVirtualRegister(TRI.getVGPR64Class());
3649 BuildMI(*
MBB, &*MIB,
DL, TII.get(AMDGPU::REG_SEQUENCE), IdxReg)
3656 }
else if (HasVIndex) {
3658 }
else if (HasVOffset) {
3662 MIB.
add(
MI.getOperand(1));
3663 MIB.
add(
MI.getOperand(5 + OpOffset));
3664 MIB.
add(
MI.getOperand(6 + OpOffset));
3666 unsigned Aux =
MI.getOperand(7 + OpOffset).getImm();
3675 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3680 MachinePointerInfo StorePtrI = LoadPtrI;
3691 MachineMemOperand *StoreMMO =
3697 MI.eraseFromParent();
3710 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3716 return Def->getOperand(1).getReg();
3730 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3738 return Def->getOperand(1).getReg();
3740 if (
VT->signBitIsZero(
Reg))
3741 return matchZeroExtendFromS32(
Reg);
3749AMDGPUInstructionSelector::matchZeroExtendFromS32OrS32(
Register Reg)
const {
3751 : matchZeroExtendFromS32(
Reg);
3757AMDGPUInstructionSelector::matchSignExtendFromS32OrS32(
Register Reg)
const {
3759 : matchSignExtendFromS32(
Reg);
3763AMDGPUInstructionSelector::matchExtendFromS32OrS32(
Register Reg,
3764 bool IsSigned)
const {
3766 return matchSignExtendFromS32OrS32(
Reg);
3768 return matchZeroExtendFromS32OrS32(
Reg);
3778 if (
Def->getOpcode() != AMDGPU::G_MERGE_VALUES)
3785 return Def->getOperand(1).getReg();
3790bool AMDGPUInstructionSelector::selectGlobalLoadLds(
MachineInstr &
MI)
const{
3791 if (!Subtarget->hasVMemToLDSLoad())
3795 unsigned Size =
MI.getOperand(3).getImm();
3802 Opc = AMDGPU::GLOBAL_LOAD_LDS_UBYTE;
3805 Opc = AMDGPU::GLOBAL_LOAD_LDS_USHORT;
3808 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORD;
3811 if (!Subtarget->hasLDSLoadB96_B128())
3813 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX3;
3816 if (!Subtarget->hasLDSLoadB96_B128())
3818 Opc = AMDGPU::GLOBAL_LOAD_LDS_DWORDX4;
3822 MachineBasicBlock *
MBB =
MI.getParent();
3825 .
add(
MI.getOperand(2));
3831 if (!isSGPR(Addr)) {
3833 if (isSGPR(AddrDef->Reg)) {
3834 Addr = AddrDef->Reg;
3835 }
else if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
3838 if (isSGPR(SAddr)) {
3839 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
3840 if (
Register Off = matchZeroExtendFromS32(PtrBaseOffset)) {
3851 VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
3863 MIB.
add(
MI.getOperand(4));
3865 unsigned Aux =
MI.getOperand(5).getImm();
3869 MachineMemOperand *LoadMMO = *
MI.memoperands_begin();
3871 LoadPtrI.
Offset =
MI.getOperand(4).getImm();
3872 MachinePointerInfo StorePtrI = LoadPtrI;
3881 MachineMemOperand *StoreMMO =
3883 sizeof(int32_t),
Align(4));
3887 MI.eraseFromParent();
3892bool AMDGPUInstructionSelector::selectTensorLoadStore(
MachineInstr &
MI,
3894 bool IsLoad = IID == Intrinsic::amdgcn_tensor_load_to_lds;
3896 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3900 const auto isAllZeros = [&](MachineOperand &Opnd) {
3901 const MachineInstr *
DefMI = MRI->getVRegDef(Opnd.getReg());
3910 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3911 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3916 MachineBasicBlock *
MBB =
MI.getParent();
3918 .
add(
MI.getOperand(1))
3919 .
add(
MI.getOperand(2));
3921 if (NumGroups >= 4) {
3922 MIB.
add(
MI.getOperand(3))
3923 .
add(
MI.getOperand(4));
3927 .
add(
MI.getOperand(6));
3929 MI.eraseFromParent();
3933bool AMDGPUInstructionSelector::selectBVHIntersectRayIntrinsic(
3935 unsigned OpcodeOpIdx =
3936 MI.getOpcode() == AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY ? 1 : 3;
3937 MI.setDesc(TII.get(
MI.getOperand(OpcodeOpIdx).getImm()));
3938 MI.removeOperand(OpcodeOpIdx);
3939 MI.addImplicitDefUseOperands(*
MI.getMF());
3946bool AMDGPUInstructionSelector::selectSMFMACIntrin(
MachineInstr &
MI)
const {
3949 case Intrinsic::amdgcn_smfmac_f32_16x16x32_f16:
3950 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_F16_e64;
3952 case Intrinsic::amdgcn_smfmac_f32_32x32x16_f16:
3953 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_F16_e64;
3955 case Intrinsic::amdgcn_smfmac_f32_16x16x32_bf16:
3956 Opc = AMDGPU::V_SMFMAC_F32_16X16X32_BF16_e64;
3958 case Intrinsic::amdgcn_smfmac_f32_32x32x16_bf16:
3959 Opc = AMDGPU::V_SMFMAC_F32_32X32X16_BF16_e64;
3961 case Intrinsic::amdgcn_smfmac_i32_16x16x64_i8:
3962 Opc = AMDGPU::V_SMFMAC_I32_16X16X64_I8_e64;
3964 case Intrinsic::amdgcn_smfmac_i32_32x32x32_i8:
3965 Opc = AMDGPU::V_SMFMAC_I32_32X32X32_I8_e64;
3967 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_bf8:
3968 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_BF8_e64;
3970 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf8_fp8:
3971 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF8_FP8_e64;
3973 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_bf8:
3974 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_BF8_e64;
3976 case Intrinsic::amdgcn_smfmac_f32_16x16x64_fp8_fp8:
3977 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_FP8_FP8_e64;
3979 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_bf8:
3980 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_BF8_e64;
3982 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf8_fp8:
3983 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF8_FP8_e64;
3985 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_bf8:
3986 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_BF8_e64;
3988 case Intrinsic::amdgcn_smfmac_f32_32x32x32_fp8_fp8:
3989 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_FP8_FP8_e64;
3991 case Intrinsic::amdgcn_smfmac_f32_16x16x64_f16:
3992 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_F16_e64;
3994 case Intrinsic::amdgcn_smfmac_f32_32x32x32_f16:
3995 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_F16_e64;
3997 case Intrinsic::amdgcn_smfmac_f32_16x16x64_bf16:
3998 Opc = AMDGPU::V_SMFMAC_F32_16X16X64_BF16_e64;
4000 case Intrinsic::amdgcn_smfmac_f32_32x32x32_bf16:
4001 Opc = AMDGPU::V_SMFMAC_F32_32X32X32_BF16_e64;
4003 case Intrinsic::amdgcn_smfmac_i32_16x16x128_i8:
4004 Opc = AMDGPU::V_SMFMAC_I32_16X16X128_I8_e64;
4006 case Intrinsic::amdgcn_smfmac_i32_32x32x64_i8:
4007 Opc = AMDGPU::V_SMFMAC_I32_32X32X64_I8_e64;
4009 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_bf8:
4010 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_BF8_e64;
4012 case Intrinsic::amdgcn_smfmac_f32_16x16x128_bf8_fp8:
4013 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_BF8_FP8_e64;
4015 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_bf8:
4016 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_BF8_e64;
4018 case Intrinsic::amdgcn_smfmac_f32_16x16x128_fp8_fp8:
4019 Opc = AMDGPU::V_SMFMAC_F32_16X16X128_FP8_FP8_e64;
4021 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_bf8:
4022 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_BF8_e64;
4024 case Intrinsic::amdgcn_smfmac_f32_32x32x64_bf8_fp8:
4025 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_BF8_FP8_e64;
4027 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_bf8:
4028 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_BF8_e64;
4030 case Intrinsic::amdgcn_smfmac_f32_32x32x64_fp8_fp8:
4031 Opc = AMDGPU::V_SMFMAC_F32_32X32X64_FP8_FP8_e64;
4037 auto VDst_In =
MI.getOperand(4);
4039 MI.setDesc(TII.get(
Opc));
4040 MI.removeOperand(4);
4041 MI.removeOperand(1);
4042 MI.addOperand(VDst_In);
4043 MI.addImplicitDefUseOperands(*
MI.getMF());
4044 const MCInstrDesc &MCID =
MI.getDesc();
4046 MI.getOperand(0).setIsEarlyClobber(
true);
4051bool AMDGPUInstructionSelector::selectPermlaneSwapIntrin(
4053 if (IntrID == Intrinsic::amdgcn_permlane16_swap &&
4054 !Subtarget->hasPermlane16Swap())
4056 if (IntrID == Intrinsic::amdgcn_permlane32_swap &&
4057 !Subtarget->hasPermlane32Swap())
4060 unsigned Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
4061 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
4062 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
4064 MI.removeOperand(2);
4065 MI.setDesc(TII.get(Opcode));
4068 MachineOperand &FI =
MI.getOperand(4);
4075bool AMDGPUInstructionSelector::selectWaveAddress(
MachineInstr &
MI)
const {
4078 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4079 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4080 MachineBasicBlock *
MBB =
MI.getParent();
4084 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHRREV_B32_e64), DstReg)
4085 .
addImm(Subtarget->getWavefrontSizeLog2())
4090 .
addImm(Subtarget->getWavefrontSizeLog2())
4094 const TargetRegisterClass &RC =
4095 IsVALU ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
4096 if (!RBI.constrainGenericRegister(DstReg, RC, *MRI))
4099 MI.eraseFromParent();
4103bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
4106 MachineBasicBlock *
MBB =
MI.getParent();
4113 const LLT DstTy = MRI->getType(DstReg);
4115 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4116 const TargetRegisterClass *DstRC =
4117 TRI.getRegClassForSizeOnBank(DstSize, *DstRB);
4122 if (!Subtarget->supportsBPermute())
4126 if (Subtarget->supportsWaveWideBPermute()) {
4127 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4128 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4138 assert(Subtarget->isWave64());
4142 MRI->createVirtualRegister(TRI.getRegClass(AMDGPU::SReg_32RegClassID));
4143 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefValReg);
4145 Register UndefExecReg = MRI->createVirtualRegister(
4146 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4147 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::IMPLICIT_DEF), UndefExecReg);
4149 Register PoisonValReg = MRI->createVirtualRegister(DstRC);
4150 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonValReg)
4158 Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
4159 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_LSHLREV_B32_e64), ShiftIdxReg)
4163 Register PoisonIdxReg = MRI->createVirtualRegister(DstRC);
4164 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_SET_INACTIVE_B32), PoisonIdxReg)
4172 Register SameSidePermReg = MRI->createVirtualRegister(DstRC);
4173 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), SameSidePermReg)
4178 Register SwappedValReg = MRI->createVirtualRegister(DstRC);
4179 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_PERMLANE64_B32), SwappedValReg)
4182 Register OppSidePermReg = MRI->createVirtualRegister(DstRC);
4183 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::DS_BPERMUTE_B32), OppSidePermReg)
4188 Register WWMSwapPermReg = MRI->createVirtualRegister(DstRC);
4189 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::STRICT_WWM), WWMSwapPermReg)
4196 Register ThreadIDReg = MRI->createVirtualRegister(DstRC);
4197 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_MBCNT_LO_U32_B32_e64), ThreadIDReg)
4201 Register XORReg = MRI->createVirtualRegister(DstRC);
4206 Register ANDReg = MRI->createVirtualRegister(DstRC);
4211 Register CompareReg = MRI->createVirtualRegister(
4212 TRI.getRegClass(AMDGPU::SReg_64_XEXECRegClassID));
4213 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), CompareReg)
4218 BuildMI(*
MBB,
MI,
DL, TII.get(AMDGPU::V_CNDMASK_B32_e64), DstReg)
4226 MI.eraseFromParent();
4235 unsigned NumOpcodes = 0;
4248 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4259 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4273 if (Src.size() == 3) {
4280 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4281 if (Src[
I] ==
LHS) {
4291 Bits = SrcBits[Src.size()];
4297 switch (
MI->getOpcode()) {
4298 case TargetOpcode::G_AND:
4299 case TargetOpcode::G_OR:
4300 case TargetOpcode::G_XOR: {
4305 if (!getOperandBits(
LHS, LHSBits) ||
4306 !getOperandBits(
RHS, RHSBits)) {
4307 Src = std::move(Backup);
4308 return std::make_pair(0, 0);
4314 NumOpcodes +=
Op.first;
4315 LHSBits =
Op.second;
4320 NumOpcodes +=
Op.first;
4321 RHSBits =
Op.second;
4326 return std::make_pair(0, 0);
4330 switch (
MI->getOpcode()) {
4331 case TargetOpcode::G_AND:
4332 TTbl = LHSBits & RHSBits;
4334 case TargetOpcode::G_OR:
4335 TTbl = LHSBits | RHSBits;
4337 case TargetOpcode::G_XOR:
4338 TTbl = LHSBits ^ RHSBits;
4344 return std::make_pair(NumOpcodes + 1, TTbl);
4347bool AMDGPUInstructionSelector::selectBITOP3(
MachineInstr &
MI)
const {
4348 if (!Subtarget->hasBitOp3Insts())
4352 const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
4353 const bool IsVALU = DstRB->
getID() == AMDGPU::VGPRRegBankID;
4359 unsigned NumOpcodes;
4361 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(DstReg, Src, *MRI);
4365 if (NumOpcodes < 2 || Src.empty())
4368 const bool IsB32 = MRI->getType(DstReg) ==
LLT::scalar(32);
4369 if (NumOpcodes == 2 && IsB32) {
4377 }
else if (NumOpcodes < 4) {
4384 unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
4385 if (!IsB32 && STI.hasTrue16BitInsts())
4386 Opc = STI.useRealTrue16Insts() ? AMDGPU::V_BITOP3_B16_gfx1250_t16_e64
4387 : AMDGPU::V_BITOP3_B16_gfx1250_fake16_e64;
4388 unsigned CBL = STI.getConstantBusLimit(
Opc);
4389 MachineBasicBlock *
MBB =
MI.getParent();
4392 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4393 const RegisterBank *RB = RBI.getRegBank(Src[
I], *MRI, TRI);
4394 if (RB->
getID() != AMDGPU::SGPRRegBankID)
4400 Register NewReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
4411 while (Src.size() < 3)
4412 Src.push_back(Src[0]);
4429 MI.eraseFromParent();
4434bool AMDGPUInstructionSelector::selectStackRestore(
MachineInstr &
MI)
const {
4436 if (!RBI.constrainGenericRegister(SrcReg, AMDGPU::SReg_32RegClass, *MRI))
4439 MachineInstr *
DefMI = MRI->getVRegDef(SrcReg);
4441 Subtarget->getTargetLowering()->getStackPointerRegisterToSaveRestore();
4443 MachineBasicBlock *
MBB =
MI.getParent();
4447 WaveAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
4450 .
addImm(Subtarget->getWavefrontSizeLog2())
4457 MI.eraseFromParent();
4463 if (!
I.isPreISelOpcode()) {
4465 return selectCOPY(
I);
4469 switch (
I.getOpcode()) {
4470 case TargetOpcode::G_AND:
4471 case TargetOpcode::G_OR:
4472 case TargetOpcode::G_XOR:
4473 if (selectBITOP3(
I))
4477 return selectG_AND_OR_XOR(
I);
4478 case TargetOpcode::G_ADD:
4479 case TargetOpcode::G_SUB:
4480 case TargetOpcode::G_PTR_ADD:
4483 return selectG_ADD_SUB(
I);
4484 case TargetOpcode::G_UADDO:
4485 case TargetOpcode::G_USUBO:
4486 case TargetOpcode::G_UADDE:
4487 case TargetOpcode::G_USUBE:
4488 return selectG_UADDO_USUBO_UADDE_USUBE(
I);
4489 case AMDGPU::G_AMDGPU_MAD_U64_U32:
4490 case AMDGPU::G_AMDGPU_MAD_I64_I32:
4491 return selectG_AMDGPU_MAD_64_32(
I);
4492 case TargetOpcode::G_INTTOPTR:
4493 case TargetOpcode::G_BITCAST:
4494 case TargetOpcode::G_PTRTOINT:
4495 case TargetOpcode::G_FREEZE:
4496 return selectCOPY(
I);
4497 case TargetOpcode::G_FNEG:
4500 return selectG_FNEG(
I);
4501 case TargetOpcode::G_FABS:
4504 return selectG_FABS(
I);
4505 case TargetOpcode::G_EXTRACT:
4506 return selectG_EXTRACT(
I);
4507 case TargetOpcode::G_MERGE_VALUES:
4508 case TargetOpcode::G_CONCAT_VECTORS:
4509 return selectG_MERGE_VALUES(
I);
4510 case TargetOpcode::G_UNMERGE_VALUES:
4511 return selectG_UNMERGE_VALUES(
I);
4512 case TargetOpcode::G_BUILD_VECTOR:
4513 case TargetOpcode::G_BUILD_VECTOR_TRUNC:
4514 return selectG_BUILD_VECTOR(
I);
4515 case TargetOpcode::G_IMPLICIT_DEF:
4516 return selectG_IMPLICIT_DEF(
I);
4517 case TargetOpcode::G_INSERT:
4518 return selectG_INSERT(
I);
4519 case TargetOpcode::G_INTRINSIC:
4520 case TargetOpcode::G_INTRINSIC_CONVERGENT:
4521 return selectG_INTRINSIC(
I);
4522 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
4523 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS:
4524 return selectG_INTRINSIC_W_SIDE_EFFECTS(
I);
4525 case TargetOpcode::G_ICMP:
4526 case TargetOpcode::G_FCMP:
4527 if (selectG_ICMP_or_FCMP(
I))
4530 case TargetOpcode::G_LOAD:
4531 case TargetOpcode::G_ZEXTLOAD:
4532 case TargetOpcode::G_SEXTLOAD:
4533 case TargetOpcode::G_STORE:
4534 case TargetOpcode::G_ATOMIC_CMPXCHG:
4535 case TargetOpcode::G_ATOMICRMW_XCHG:
4536 case TargetOpcode::G_ATOMICRMW_ADD:
4537 case TargetOpcode::G_ATOMICRMW_SUB:
4538 case TargetOpcode::G_ATOMICRMW_AND:
4539 case TargetOpcode::G_ATOMICRMW_OR:
4540 case TargetOpcode::G_ATOMICRMW_XOR:
4541 case TargetOpcode::G_ATOMICRMW_MIN:
4542 case TargetOpcode::G_ATOMICRMW_MAX:
4543 case TargetOpcode::G_ATOMICRMW_UMIN:
4544 case TargetOpcode::G_ATOMICRMW_UMAX:
4545 case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
4546 case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
4547 case TargetOpcode::G_ATOMICRMW_USUB_COND:
4548 case TargetOpcode::G_ATOMICRMW_USUB_SAT:
4549 case TargetOpcode::G_ATOMICRMW_FADD:
4550 case TargetOpcode::G_ATOMICRMW_FMIN:
4551 case TargetOpcode::G_ATOMICRMW_FMAX:
4552 return selectG_LOAD_STORE_ATOMICRMW(
I);
4553 case TargetOpcode::G_SELECT:
4554 return selectG_SELECT(
I);
4555 case TargetOpcode::G_TRUNC:
4556 return selectG_TRUNC(
I);
4557 case TargetOpcode::G_SEXT:
4558 case TargetOpcode::G_ZEXT:
4559 case TargetOpcode::G_ANYEXT:
4560 case TargetOpcode::G_SEXT_INREG:
4564 if (MRI->getType(
I.getOperand(1).getReg()) !=
LLT::scalar(1) &&
4567 return selectG_SZA_EXT(
I);
4568 case TargetOpcode::G_FPEXT:
4569 if (selectG_FPEXT(
I))
4572 case TargetOpcode::G_BRCOND:
4573 return selectG_BRCOND(
I);
4574 case TargetOpcode::G_GLOBAL_VALUE:
4575 return selectG_GLOBAL_VALUE(
I);
4576 case TargetOpcode::G_PTRMASK:
4577 return selectG_PTRMASK(
I);
4578 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4579 return selectG_EXTRACT_VECTOR_ELT(
I);
4580 case TargetOpcode::G_INSERT_VECTOR_ELT:
4581 return selectG_INSERT_VECTOR_ELT(
I);
4582 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD:
4583 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_D16:
4584 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_LOAD_NORET:
4585 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE:
4586 case AMDGPU::G_AMDGPU_INTRIN_IMAGE_STORE_D16: {
4589 assert(Intr &&
"not an image intrinsic with image pseudo");
4590 return selectImageIntrinsic(
I, Intr);
4592 case AMDGPU::G_AMDGPU_BVH_DUAL_INTERSECT_RAY:
4593 case AMDGPU::G_AMDGPU_BVH_INTERSECT_RAY:
4594 case AMDGPU::G_AMDGPU_BVH8_INTERSECT_RAY:
4595 return selectBVHIntersectRayIntrinsic(
I);
4596 case AMDGPU::G_SBFX:
4597 case AMDGPU::G_UBFX:
4598 return selectG_SBFX_UBFX(
I);
4599 case AMDGPU::G_SI_CALL:
4600 I.setDesc(TII.get(AMDGPU::SI_CALL));
4602 case AMDGPU::G_AMDGPU_WAVE_ADDRESS:
4603 return selectWaveAddress(
I);
4604 case AMDGPU::G_AMDGPU_WHOLE_WAVE_FUNC_RETURN: {
4605 I.setDesc(TII.get(AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN));
4608 case AMDGPU::G_STACKRESTORE:
4609 return selectStackRestore(
I);
4611 return selectPHI(
I);
4612 case AMDGPU::G_AMDGPU_COPY_SCC_VCC:
4613 return selectCOPY_SCC_VCC(
I);
4614 case AMDGPU::G_AMDGPU_COPY_VCC_SCC:
4615 return selectCOPY_VCC_SCC(
I);
4616 case AMDGPU::G_AMDGPU_READANYLANE:
4617 return selectReadAnyLane(
I);
4618 case TargetOpcode::G_CONSTANT:
4619 case TargetOpcode::G_FCONSTANT:
4627AMDGPUInstructionSelector::selectVCSRC(
MachineOperand &Root)
const {
4634std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
4635 Register Src,
bool IsCanonicalizing,
bool AllowAbs,
bool OpSel)
const {
4639 if (
MI->getOpcode() == AMDGPU::G_FNEG) {
4640 Src =
MI->getOperand(1).getReg();
4643 }
else if (
MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
4648 if (
LHS &&
LHS->isZero()) {
4650 Src =
MI->getOperand(2).getReg();
4654 if (AllowAbs &&
MI->getOpcode() == AMDGPU::G_FABS) {
4655 Src =
MI->getOperand(1).getReg();
4662 return std::pair(Src, Mods);
4665std::pair<Register, unsigned>
4666AMDGPUInstructionSelector::selectVOP3PModsF32Impl(
Register Src)
const {
4668 std::tie(Src, Mods) = selectVOP3ModsImpl(Src);
4670 return std::pair(Src, Mods);
4673Register AMDGPUInstructionSelector::copyToVGPRIfSrcFolded(
4675 bool ForceVGPR)
const {
4676 if ((Mods != 0 || ForceVGPR) &&
4677 RBI.getRegBank(Src, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID) {
4684 TII.
get(AMDGPU::COPY), VGPRSrc)
4696AMDGPUInstructionSelector::selectVSRC0(
MachineOperand &Root)
const {
4698 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); }
4703AMDGPUInstructionSelector::selectVOP3Mods0(
MachineOperand &Root)
const {
4706 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4709 [=](MachineInstrBuilder &MIB) {
4710 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4712 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4713 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4714 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4719AMDGPUInstructionSelector::selectVOP3BMods0(
MachineOperand &Root)
const {
4722 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
4727 [=](MachineInstrBuilder &MIB) {
4728 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4730 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
4731 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4732 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4737AMDGPUInstructionSelector::selectVOP3OMods(
MachineOperand &Root)
const {
4739 [=](MachineInstrBuilder &MIB) { MIB.
add(Root); },
4740 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); },
4741 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
4746AMDGPUInstructionSelector::selectVOP3Mods(
MachineOperand &Root)
const {
4749 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
4752 [=](MachineInstrBuilder &MIB) {
4753 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4755 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4760AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
4764 std::tie(Src, Mods) =
4765 selectVOP3ModsImpl(Root.
getReg(),
false);
4768 [=](MachineInstrBuilder &MIB) {
4769 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4771 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4776AMDGPUInstructionSelector::selectVOP3BMods(
MachineOperand &Root)
const {
4779 std::tie(Src, Mods) =
4780 selectVOP3ModsImpl(Root.
getReg(),
true,
4784 [=](MachineInstrBuilder &MIB) {
4785 MIB.
addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
4787 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
4792AMDGPUInstructionSelector::selectVOP3NoMods(
MachineOperand &Root)
const {
4795 if (
Def->getOpcode() == AMDGPU::G_FNEG ||
Def->getOpcode() == AMDGPU::G_FABS)
4798 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
4823 if (
MI->getOpcode() != AMDGPU::G_TRUNC)
4828 return DstSize * 2 == SrcSize;
4834 if (
MI->getOpcode() != AMDGPU::G_LSHR)
4838 std::optional<ValueAndVReg> ShiftAmt;
4839 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4842 unsigned Shift = ShiftAmt->Value.getZExtValue();
4843 return Shift * 2 == SrcSize;
4851 if (
MI->getOpcode() != AMDGPU::G_SHL)
4855 std::optional<ValueAndVReg> ShiftAmt;
4856 if (
mi_match(
MI->getOperand(0).getReg(), MRI,
4859 unsigned Shift = ShiftAmt->Value.getZExtValue();
4860 return Shift * 2 == SrcSize;
4868 if (
MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
4870 return MI->getNumOperands() == 3 &&
MI->getOperand(0).isDef() &&
4871 MI->getOperand(1).isDef() && !
MI->getOperand(2).isDef();
5041static std::optional<std::pair<Register, SrcStatus>>
5046 unsigned Opc =
MI->getOpcode();
5050 case AMDGPU::G_BITCAST:
5051 return std::optional<std::pair<Register, SrcStatus>>(
5052 {
MI->getOperand(1).getReg(), Curr.second});
5054 if (
MI->getOperand(1).getReg().isPhysical())
5055 return std::nullopt;
5056 return std::optional<std::pair<Register, SrcStatus>>(
5057 {
MI->getOperand(1).getReg(), Curr.second});
5058 case AMDGPU::G_FNEG: {
5061 return std::nullopt;
5062 return std::optional<std::pair<Register, SrcStatus>>(
5063 {
MI->getOperand(1).getReg(), Stat});
5070 switch (Curr.second) {
5073 return std::optional<std::pair<Register, SrcStatus>>(
5076 if (Curr.first ==
MI->getOperand(0).getReg())
5077 return std::optional<std::pair<Register, SrcStatus>>(
5079 return std::optional<std::pair<Register, SrcStatus>>(
5091 return std::optional<std::pair<Register, SrcStatus>>(
5095 if (Curr.first ==
MI->getOperand(0).getReg())
5096 return std::optional<std::pair<Register, SrcStatus>>(
5098 return std::optional<std::pair<Register, SrcStatus>>(
5104 return std::optional<std::pair<Register, SrcStatus>>(
5109 return std::optional<std::pair<Register, SrcStatus>>(
5114 return std::optional<std::pair<Register, SrcStatus>>(
5119 return std::optional<std::pair<Register, SrcStatus>>(
5125 return std::nullopt;
5135 bool HasNeg =
false;
5137 bool HasOpsel =
true;
5142 unsigned Opc =
MI->getOpcode();
5144 if (
Opc == TargetOpcode::G_INTRINSIC) {
5147 if (IntrinsicID == Intrinsic::amdgcn_fdot2)
5174 while (
Depth <= MaxDepth && Curr.has_value()) {
5177 Statlist.push_back(Curr.value());
5184static std::pair<Register, SrcStatus>
5191 while (
Depth <= MaxDepth && Curr.has_value()) {
5197 LastSameOrNeg = Curr.value();
5202 return LastSameOrNeg;
5209 return Width1 == Width2;
5244 return isSameBitWidth(NewReg, RootReg, MRI) && IsHalfState(LoStat) &&
5245 IsHalfState(HiStat);
5248std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3PModsImpl(
5254 return {RootReg, Mods};
5257 SearchOptions SO(RootReg, MRI);
5268 MachineInstr *
MI = MRI.getVRegDef(Stat.first);
5270 if (
MI->getOpcode() != AMDGPU::G_BUILD_VECTOR ||
MI->getNumOperands() != 3 ||
5271 (IsDOT && Subtarget->hasDOTOpSelHazard())) {
5273 return {Stat.first, Mods};
5279 if (StatlistHi.
empty()) {
5281 return {Stat.first, Mods};
5287 if (StatlistLo.
empty()) {
5289 return {Stat.first, Mods};
5292 for (
int I = StatlistHi.
size() - 1;
I >= 0;
I--) {
5293 for (
int J = StatlistLo.
size() - 1; J >= 0; J--) {
5294 if (StatlistHi[
I].first == StatlistLo[J].first &&
5296 StatlistHi[
I].first, RootReg, TII, MRI))
5297 return {StatlistHi[
I].first,
5298 updateMods(StatlistHi[
I].second, StatlistLo[J].second, Mods)};
5304 return {Stat.first, Mods};
5314 return RB->
getID() == RBNo;
5331 if (
checkRB(RootReg, AMDGPU::SGPRRegBankID, RBI, MRI,
TRI) ||
5332 checkRB(NewReg, AMDGPU::VGPRRegBankID, RBI, MRI,
TRI))
5336 if (
MI->getOpcode() == AMDGPU::COPY && NewReg ==
MI->getOperand(1).getReg()) {
5345 BuildMI(*BB,
MI,
MI->getDebugLoc(),
TII.get(AMDGPU::COPY), DstReg)
5353AMDGPUInstructionSelector::selectVOP3PRetHelper(
MachineOperand &Root,
5358 std::tie(
Reg, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI, IsDOT);
5362 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
5363 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5368AMDGPUInstructionSelector::selectVOP3PMods(
MachineOperand &Root)
const {
5370 return selectVOP3PRetHelper(Root);
5374AMDGPUInstructionSelector::selectVOP3PModsDOT(
MachineOperand &Root)
const {
5376 return selectVOP3PRetHelper(Root,
true);
5380AMDGPUInstructionSelector::selectVOP3PNoModsDOT(
MachineOperand &Root)
const {
5384 std::tie(Src, Mods) = selectVOP3PModsImpl(Root.
getReg(), MRI,
true );
5388 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5392AMDGPUInstructionSelector::selectVOP3PModsF32(
MachineOperand &Root)
const {
5395 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5398 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5399 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5404AMDGPUInstructionSelector::selectVOP3PNoModsF32(
MachineOperand &Root)
const {
5407 std::tie(Src, Mods) = selectVOP3PModsF32Impl(Root.
getReg());
5411 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); }}};
5415AMDGPUInstructionSelector::selectWMMAOpSelVOP3PMods(
5418 "expected i1 value");
5424 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5432 switch (Elts.
size()) {
5434 DstRegClass = &AMDGPU::VReg_256RegClass;
5437 DstRegClass = &AMDGPU::VReg_128RegClass;
5440 DstRegClass = &AMDGPU::VReg_64RegClass;
5447 auto MIB =
B.buildInstr(AMDGPU::REG_SEQUENCE)
5449 for (
unsigned i = 0; i < Elts.
size(); ++i) {
5460 if (ModOpcode == TargetOpcode::G_FNEG) {
5464 for (
auto El : Elts) {
5470 if (Elts.size() != NegAbsElts.
size()) {
5479 assert(ModOpcode == TargetOpcode::G_FABS);
5487AMDGPUInstructionSelector::selectWMMAModsF32NegAbs(
MachineOperand &Root)
const {
5493 assert(BV->getNumSources() > 0);
5495 MachineInstr *ElF32 = MRI->getVRegDef(BV->getSourceReg(0));
5496 unsigned ModOpcode = (ElF32->
getOpcode() == AMDGPU::G_FNEG)
5499 for (
unsigned i = 0; i < BV->getNumSources(); ++i) {
5500 ElF32 = MRI->getVRegDef(BV->getSourceReg(i));
5507 if (BV->getNumSources() == EltsF32.
size()) {
5513 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5514 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5518AMDGPUInstructionSelector::selectWMMAModsF16Neg(
MachineOperand &Root)
const {
5524 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5532 if (CV->getNumSources() == EltsV2F16.
size()) {
5539 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5540 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5544AMDGPUInstructionSelector::selectWMMAModsF16NegAbs(
MachineOperand &Root)
const {
5550 assert(CV->getNumSources() > 0);
5551 MachineInstr *ElV2F16 = MRI->getVRegDef(CV->getSourceReg(0));
5553 unsigned ModOpcode = (ElV2F16->
getOpcode() == AMDGPU::G_FNEG)
5557 for (
unsigned i = 0; i < CV->getNumSources(); ++i) {
5558 ElV2F16 = MRI->getVRegDef(CV->getSourceReg(i));
5565 if (CV->getNumSources() == EltsV2F16.
size()) {
5572 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5573 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }}};
5577AMDGPUInstructionSelector::selectWMMAVISrc(
MachineOperand &Root)
const {
5578 std::optional<FPValueAndVReg> FPValReg;
5580 if (TII.isInlineConstant(FPValReg->Value)) {
5581 return {{[=](MachineInstrBuilder &MIB) {
5582 MIB.
addImm(FPValReg->Value.bitcastToAPInt().getSExtValue());
5592 if (TII.isInlineConstant(ICst)) {
5602AMDGPUInstructionSelector::selectSWMMACIndex8(
MachineOperand &Root)
const {
5608 std::optional<ValueAndVReg> ShiftAmt;
5610 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5611 ShiftAmt->Value.getZExtValue() % 8 == 0) {
5612 Key = ShiftAmt->Value.getZExtValue() / 8;
5617 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5618 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5623AMDGPUInstructionSelector::selectSWMMACIndex16(
MachineOperand &Root)
const {
5630 std::optional<ValueAndVReg> ShiftAmt;
5632 MRI->getType(ShiftSrc).getSizeInBits() == 32 &&
5633 ShiftAmt->Value.getZExtValue() == 16) {
5639 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5640 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5645AMDGPUInstructionSelector::selectSWMMACIndex32(
MachineOperand &Root)
const {
5652 S32 = matchAnyExtendFromS32(Src);
5656 if (
Def->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
5661 Src =
Def->getOperand(2).getReg();
5668 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5669 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Key); }
5674AMDGPUInstructionSelector::selectVOP3OpSelMods(
MachineOperand &Root)
const {
5677 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
5681 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
5682 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
5688AMDGPUInstructionSelector::selectVINTERPMods(
MachineOperand &Root)
const {
5691 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5697 [=](MachineInstrBuilder &MIB) {
5699 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5701 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5706AMDGPUInstructionSelector::selectVINTERPModsHi(
MachineOperand &Root)
const {
5709 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg(),
5715 [=](MachineInstrBuilder &MIB) {
5717 copyToVGPRIfSrcFolded(Src, Mods, Root, MIB,
true));
5719 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); },
5726bool AMDGPUInstructionSelector::selectScaleOffset(
MachineOperand &Root,
5728 bool IsSigned)
const {
5729 if (!Subtarget->hasScaleOffset())
5733 MachineMemOperand *MMO = *
MI.memoperands_begin();
5745 OffsetReg =
Def->Reg;
5760 m_BinOp(IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO : AMDGPU::S_MUL_U64,
5764 (
Mul->getOpcode() == (IsSigned ? AMDGPU::G_AMDGPU_MAD_I64_I32
5765 : AMDGPU::G_AMDGPU_MAD_U64_U32) ||
5766 (IsSigned &&
Mul->getOpcode() == AMDGPU::G_AMDGPU_MAD_U64_U32 &&
5767 VT->signBitIsZero(
Mul->getOperand(2).getReg()))) &&
5780bool AMDGPUInstructionSelector::selectSmrdOffset(
MachineOperand &Root,
5784 bool *ScaleOffset)
const {
5786 MachineBasicBlock *
MBB =
MI->getParent();
5791 getAddrModeInfo(*
MI, *MRI, AddrInfo);
5793 if (AddrInfo.
empty())
5796 const GEPInfo &GEPI = AddrInfo[0];
5797 std::optional<int64_t> EncodedImm;
5800 *ScaleOffset =
false;
5805 if (GEPI.SgprParts.size() == 1 && GEPI.Imm != 0 && EncodedImm &&
5806 AddrInfo.
size() > 1) {
5807 const GEPInfo &GEPI2 = AddrInfo[1];
5808 if (GEPI2.SgprParts.size() == 2 && GEPI2.Imm == 0) {
5809 Register OffsetReg = GEPI2.SgprParts[1];
5812 selectScaleOffset(Root, OffsetReg,
false );
5813 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5815 Base = GEPI2.SgprParts[0];
5816 *SOffset = OffsetReg;
5825 auto SKnown =
VT->getKnownBits(*SOffset);
5826 if (*
Offset + SKnown.getMinValue().getSExtValue() < 0)
5838 if (
Offset && GEPI.SgprParts.size() == 1 && EncodedImm) {
5839 Base = GEPI.SgprParts[0];
5845 if (SOffset && GEPI.SgprParts.size() == 1 &&
isUInt<32>(GEPI.Imm) &&
5851 Base = GEPI.SgprParts[0];
5852 *SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
5853 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::S_MOV_B32), *SOffset)
5858 if (SOffset && GEPI.SgprParts.size() && GEPI.Imm == 0) {
5859 Register OffsetReg = GEPI.SgprParts[1];
5861 *ScaleOffset = selectScaleOffset(Root, OffsetReg,
false );
5862 OffsetReg = matchZeroExtendFromS32OrS32(OffsetReg);
5864 Base = GEPI.SgprParts[0];
5865 *SOffset = OffsetReg;
5874AMDGPUInstructionSelector::selectSmrdImm(
MachineOperand &Root)
const {
5877 if (!selectSmrdOffset(Root,
Base,
nullptr, &
Offset,
5879 return std::nullopt;
5881 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5882 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset); }}};
5886AMDGPUInstructionSelector::selectSmrdImm32(
MachineOperand &Root)
const {
5888 getAddrModeInfo(*Root.
getParent(), *MRI, AddrInfo);
5890 if (AddrInfo.
empty() || AddrInfo[0].SgprParts.size() != 1)
5891 return std::nullopt;
5893 const GEPInfo &GEPInfo = AddrInfo[0];
5894 Register PtrReg = GEPInfo.SgprParts[0];
5895 std::optional<int64_t> EncodedImm =
5898 return std::nullopt;
5901 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrReg); },
5902 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); }
5907AMDGPUInstructionSelector::selectSmrdSgpr(
MachineOperand &Root)
const {
5910 if (!selectSmrdOffset(Root,
Base, &SOffset,
nullptr,
5912 return std::nullopt;
5915 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5916 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5917 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5921AMDGPUInstructionSelector::selectSmrdSgprImm(
MachineOperand &Root)
const {
5925 if (!selectSmrdOffset(Root,
Base, &SOffset, &
Offset, &ScaleOffset))
5926 return std::nullopt;
5929 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(
Base); },
5930 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
5932 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }}};
5935std::pair<Register, int> AMDGPUInstructionSelector::selectFlatOffsetImpl(
5941 if (!STI.hasFlatInstOffsets())
5945 int64_t ConstOffset;
5947 std::tie(PtrBase, ConstOffset, IsInBounds) =
5948 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
5954 if (ConstOffset == 0 ||
5956 !isFlatScratchBaseLegal(Root.
getReg())) ||
5960 unsigned AddrSpace = (*
MI->memoperands_begin())->getAddrSpace();
5961 if (!TII.isLegalFLATOffset(ConstOffset, AddrSpace, FlatVariant))
5964 return std::pair(PtrBase, ConstOffset);
5968AMDGPUInstructionSelector::selectFlatOffset(
MachineOperand &Root)
const {
5972 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5973 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5978AMDGPUInstructionSelector::selectGlobalOffset(
MachineOperand &Root)
const {
5979 auto PtrWithOffset =
5983 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5984 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
5989AMDGPUInstructionSelector::selectScratchOffset(
MachineOperand &Root)
const {
5990 auto PtrWithOffset =
5994 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrWithOffset.first); },
5995 [=](MachineInstrBuilder &MIB) { MIB.
addImm(PtrWithOffset.second); },
6001AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root,
6003 bool NeedIOffset)
const {
6006 int64_t ConstOffset;
6007 int64_t ImmOffset = 0;
6011 std::tie(PtrBase, ConstOffset, std::ignore) =
6012 getPtrBaseWithConstantOffset(Addr, *MRI);
6014 if (ConstOffset != 0) {
6019 ImmOffset = ConstOffset;
6022 if (isSGPR(PtrBaseDef->Reg)) {
6023 if (ConstOffset > 0) {
6029 int64_t SplitImmOffset = 0, RemainderOffset = ConstOffset;
6031 std::tie(SplitImmOffset, RemainderOffset) =
6036 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
6039 MachineBasicBlock *
MBB =
MI->getParent();
6041 MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6043 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6045 .
addImm(RemainderOffset);
6049 [=](MachineInstrBuilder &MIB) {
6052 [=](MachineInstrBuilder &MIB) {
6055 [=](MachineInstrBuilder &MIB) { MIB.
addImm(SplitImmOffset); },
6056 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
6059 [=](MachineInstrBuilder &MIB) { MIB.
addReg(PtrBase); },
6060 [=](MachineInstrBuilder &MIB) {
6063 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); },
6073 unsigned NumLiterals =
6074 !TII.isInlineConstant(APInt(32,
Lo_32(ConstOffset))) +
6075 !TII.isInlineConstant(APInt(32,
Hi_32(ConstOffset)));
6076 if (STI.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
6077 return std::nullopt;
6084 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6089 if (isSGPR(SAddr)) {
6090 Register PtrBaseOffset = AddrDef->MI->getOperand(2).getReg();
6094 bool ScaleOffset = selectScaleOffset(Root, PtrBaseOffset,
6095 Subtarget->hasSignedGVSOffset());
6096 if (
Register VOffset = matchExtendFromS32OrS32(
6097 PtrBaseOffset, Subtarget->hasSignedGVSOffset())) {
6099 return {{[=](MachineInstrBuilder &MIB) {
6102 [=](MachineInstrBuilder &MIB) {
6105 [=](MachineInstrBuilder &MIB) {
6108 [=](MachineInstrBuilder &MIB) {
6112 return {{[=](MachineInstrBuilder &MIB) {
6115 [=](MachineInstrBuilder &MIB) {
6118 [=](MachineInstrBuilder &MIB) {
6128 if (AddrDef->MI->getOpcode() == AMDGPU::G_IMPLICIT_DEF ||
6129 AddrDef->MI->getOpcode() == AMDGPU::G_CONSTANT || !isSGPR(AddrDef->Reg))
6130 return std::nullopt;
6135 MachineBasicBlock *
MBB =
MI->getParent();
6136 Register VOffset = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6138 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32), VOffset)
6143 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6144 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6145 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6146 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6149 [=](MachineInstrBuilder &MIB) { MIB.
addReg(AddrDef->Reg); },
6150 [=](MachineInstrBuilder &MIB) { MIB.
addReg(VOffset); },
6151 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPolBits); }
6156AMDGPUInstructionSelector::selectGlobalSAddr(
MachineOperand &Root)
const {
6157 return selectGlobalSAddr(Root, 0);
6161AMDGPUInstructionSelector::selectGlobalSAddrCPol(
MachineOperand &Root)
const {
6167 return selectGlobalSAddr(Root, PassedCPol);
6171AMDGPUInstructionSelector::selectGlobalSAddrCPolM0(
MachineOperand &Root)
const {
6177 return selectGlobalSAddr(Root, PassedCPol);
6181AMDGPUInstructionSelector::selectGlobalSAddrGLC(
MachineOperand &Root)
const {
6186AMDGPUInstructionSelector::selectGlobalSAddrNoIOffset(
6193 return selectGlobalSAddr(Root, PassedCPol,
false);
6197AMDGPUInstructionSelector::selectGlobalSAddrNoIOffsetM0(
6204 return selectGlobalSAddr(Root, PassedCPol,
false);
6208AMDGPUInstructionSelector::selectScratchSAddr(
MachineOperand &Root)
const {
6211 int64_t ConstOffset;
6212 int64_t ImmOffset = 0;
6216 std::tie(PtrBase, ConstOffset, std::ignore) =
6217 getPtrBaseWithConstantOffset(Addr, *MRI);
6219 if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
6223 ImmOffset = ConstOffset;
6227 if (AddrDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6228 int FI = AddrDef->MI->getOperand(1).
getIndex();
6231 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6237 if (AddrDef->MI->getOpcode() == AMDGPU::G_PTR_ADD) {
6238 Register LHS = AddrDef->MI->getOperand(1).getReg();
6239 Register RHS = AddrDef->MI->getOperand(2).getReg();
6243 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX &&
6244 isSGPR(RHSDef->Reg)) {
6245 int FI = LHSDef->MI->getOperand(1).getIndex();
6249 SAddr = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6251 BuildMI(*BB, &
I,
DL, TII.get(AMDGPU::S_ADD_I32), SAddr)
6259 return std::nullopt;
6262 [=](MachineInstrBuilder &MIB) { MIB.
addReg(SAddr); },
6263 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); }
6268bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
6270 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
6276 auto VKnown =
VT->getKnownBits(VAddr);
6279 uint64_t VMax = VKnown.getMaxValue().getZExtValue();
6280 uint64_t
SMax = SKnown.getMaxValue().getZExtValue();
6281 return (VMax & 3) + (
SMax & 3) >= 4;
6285AMDGPUInstructionSelector::selectScratchSVAddr(
MachineOperand &Root)
const {
6288 int64_t ConstOffset;
6289 int64_t ImmOffset = 0;
6293 std::tie(PtrBase, ConstOffset, std::ignore) =
6294 getPtrBaseWithConstantOffset(Addr, *MRI);
6297 if (ConstOffset != 0 &&
6301 ImmOffset = ConstOffset;
6305 if (AddrDef->MI->getOpcode() != AMDGPU::G_PTR_ADD)
6306 return std::nullopt;
6308 Register RHS = AddrDef->MI->getOperand(2).getReg();
6309 if (RBI.getRegBank(
RHS, *MRI, TRI)->getID() != AMDGPU::VGPRRegBankID)
6310 return std::nullopt;
6312 Register LHS = AddrDef->MI->getOperand(1).getReg();
6315 if (OrigAddr != Addr) {
6316 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
6317 return std::nullopt;
6319 if (!isFlatScratchBaseLegalSV(OrigAddr))
6320 return std::nullopt;
6323 if (checkFlatScratchSVSSwizzleBug(
RHS,
LHS, ImmOffset))
6324 return std::nullopt;
6326 unsigned CPol = selectScaleOffset(Root,
RHS,
true )
6330 if (LHSDef->MI->getOpcode() == AMDGPU::G_FRAME_INDEX) {
6331 int FI = LHSDef->MI->getOperand(1).getIndex();
6333 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6335 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6336 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6345 return std::nullopt;
6348 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
RHS); },
6349 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
LHS); },
6350 [=](MachineInstrBuilder &MIB) { MIB.
addImm(ImmOffset); },
6351 [=](MachineInstrBuilder &MIB) { MIB.
addImm(CPol); }
6356AMDGPUInstructionSelector::selectMUBUFScratchOffen(
MachineOperand &Root)
const {
6358 MachineBasicBlock *
MBB =
MI->getParent();
6360 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6365 Register HighBits = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
6370 BuildMI(*
MBB,
MI,
MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
6374 return {{[=](MachineInstrBuilder &MIB) {
6377 [=](MachineInstrBuilder &MIB) {
6380 [=](MachineInstrBuilder &MIB) {
6385 [=](MachineInstrBuilder &MIB) {
6394 std::optional<int> FI;
6397 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6399 int64_t ConstOffset;
6400 std::tie(PtrBase, ConstOffset, std::ignore) =
6401 getPtrBaseWithConstantOffset(VAddr, *MRI);
6402 if (ConstOffset != 0) {
6403 if (TII.isLegalMUBUFImmOffset(ConstOffset) &&
6404 (!STI.privateMemoryResourceIsRangeChecked() ||
6405 VT->signBitIsZero(PtrBase))) {
6406 const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
6407 if (PtrBaseDef->
getOpcode() == AMDGPU::G_FRAME_INDEX)
6413 }
else if (RootDef->
getOpcode() == AMDGPU::G_FRAME_INDEX) {
6417 return {{[=](MachineInstrBuilder &MIB) {
6420 [=](MachineInstrBuilder &MIB) {
6426 [=](MachineInstrBuilder &MIB) {
6431 [=](MachineInstrBuilder &MIB) {
6436bool AMDGPUInstructionSelector::isDSOffsetLegal(
Register Base,
6441 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6446 return VT->signBitIsZero(
Base);
6449bool AMDGPUInstructionSelector::isDSOffset2Legal(
Register Base, int64_t Offset0,
6451 unsigned Size)
const {
6452 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
6457 if (STI.hasUsableDSOffset() || STI.unsafeDSOffsetFoldingEnabled())
6462 return VT->signBitIsZero(
Base);
6467 return Addr->
getOpcode() == TargetOpcode::G_OR ||
6468 (Addr->
getOpcode() == TargetOpcode::G_PTR_ADD &&
6475bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Addr)
const {
6483 if (STI.hasSignedScratchOffsets())
6489 if (AddrMI->
getOpcode() == TargetOpcode::G_PTR_ADD) {
6490 std::optional<ValueAndVReg> RhsValReg =
6496 if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
6497 RhsValReg->Value.getSExtValue() > -0x40000000)
6501 return VT->signBitIsZero(
LHS);
6506bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(
Register Addr)
const {
6514 if (STI.hasSignedScratchOffsets())
6519 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6524bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
6528 if (STI.hasSignedScratchOffsets())
6533 std::optional<DefinitionAndSourceRegister> BaseDef =
6535 std::optional<ValueAndVReg> RHSOffset =
6545 (RHSOffset->Value.getSExtValue() < 0 &&
6546 RHSOffset->Value.getSExtValue() > -0x40000000)))
6549 Register LHS = BaseDef->MI->getOperand(1).getReg();
6550 Register RHS = BaseDef->MI->getOperand(2).getReg();
6551 return VT->signBitIsZero(
RHS) &&
VT->signBitIsZero(
LHS);
6554bool AMDGPUInstructionSelector::isUnneededShiftMask(
const MachineInstr &
MI,
6555 unsigned ShAmtBits)
const {
6556 assert(
MI.getOpcode() == TargetOpcode::G_AND);
6558 std::optional<APInt>
RHS =
6563 if (
RHS->countr_one() >= ShAmtBits)
6566 const APInt &LHSKnownZeros =
VT->getKnownZeroes(
MI.getOperand(1).getReg());
6567 return (LHSKnownZeros | *
RHS).countr_one() >= ShAmtBits;
6571AMDGPUInstructionSelector::selectMUBUFScratchOffset(
6574 const SIMachineFunctionInfo *
Info =
MF->getInfo<SIMachineFunctionInfo>();
6576 std::optional<DefinitionAndSourceRegister>
Def =
6578 assert(Def &&
"this shouldn't be an optional result");
6583 [=](MachineInstrBuilder &MIB) {
6586 [=](MachineInstrBuilder &MIB) {
6589 [=](MachineInstrBuilder &MIB) { MIB.
addImm(0); }
6600 if (!TII.isLegalMUBUFImmOffset(
Offset))
6608 [=](MachineInstrBuilder &MIB) {
6611 [=](MachineInstrBuilder &MIB) {
6619 !TII.isLegalMUBUFImmOffset(
Offset))
6623 [=](MachineInstrBuilder &MIB) {
6626 [=](MachineInstrBuilder &MIB) {
6633std::pair<Register, unsigned>
6634AMDGPUInstructionSelector::selectDS1Addr1OffsetImpl(
MachineOperand &Root)
const {
6635 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6636 int64_t ConstAddr = 0;
6640 std::tie(PtrBase,
Offset, std::ignore) =
6641 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6644 if (isDSOffsetLegal(PtrBase,
Offset)) {
6646 return std::pair(PtrBase,
Offset);
6648 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6657 return std::pair(Root.
getReg(), 0);
6661AMDGPUInstructionSelector::selectDS1Addr1Offset(
MachineOperand &Root)
const {
6664 std::tie(
Reg,
Offset) = selectDS1Addr1OffsetImpl(Root);
6666 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6672AMDGPUInstructionSelector::selectDS64Bit4ByteAligned(
MachineOperand &Root)
const {
6673 return selectDSReadWrite2(Root, 4);
6677AMDGPUInstructionSelector::selectDS128Bit8ByteAligned(
MachineOperand &Root)
const {
6678 return selectDSReadWrite2(Root, 8);
6682AMDGPUInstructionSelector::selectDSReadWrite2(
MachineOperand &Root,
6683 unsigned Size)
const {
6688 [=](MachineInstrBuilder &MIB) { MIB.
addReg(
Reg); },
6690 [=](MachineInstrBuilder &MIB) { MIB.
addImm(
Offset+1); }
6694std::pair<Register, unsigned>
6695AMDGPUInstructionSelector::selectDSReadWrite2Impl(
MachineOperand &Root,
6696 unsigned Size)
const {
6697 const MachineInstr *RootDef = MRI->getVRegDef(Root.
getReg());
6698 int64_t ConstAddr = 0;
6702 std::tie(PtrBase,
Offset, std::ignore) =
6703 getPtrBaseWithConstantOffset(Root.
getReg(), *MRI);
6706 int64_t OffsetValue0 =
Offset;
6708 if (isDSOffset2Legal(PtrBase, OffsetValue0, OffsetValue1,
Size)) {
6710 return std::pair(PtrBase, OffsetValue0 /
Size);
6712 }
else if (RootDef->
getOpcode() == AMDGPU::G_SUB) {
6720 return std::pair(Root.
getReg(), 0);
6728std::tuple<Register, int64_t, bool>
6729AMDGPUInstructionSelector::getPtrBaseWithConstantOffset(
6732 if (RootI->
getOpcode() != TargetOpcode::G_PTR_ADD)
6733 return {Root, 0,
false};
6736 std::optional<ValueAndVReg> MaybeOffset =
6739 return {Root, 0,
false};
6759 B.buildInstr(AMDGPU::S_MOV_B32)
6762 B.buildInstr(AMDGPU::S_MOV_B32)
6769 B.buildInstr(AMDGPU::REG_SEQUENCE)
6772 .addImm(AMDGPU::sub0)
6774 .addImm(AMDGPU::sub1);
6779 B.buildInstr(AMDGPU::S_MOV_B64)
6784 B.buildInstr(AMDGPU::REG_SEQUENCE)
6787 .addImm(AMDGPU::sub0_sub1)
6789 .addImm(AMDGPU::sub2_sub3);
6796 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6805 uint64_t DefaultFormat =
TII.getDefaultRsrcDataFormat();
6812AMDGPUInstructionSelector::MUBUFAddressData
6813AMDGPUInstructionSelector::parseMUBUFAddress(
Register Src)
const {
6814 MUBUFAddressData
Data;
6820 std::tie(PtrBase,
Offset, std::ignore) =
6821 getPtrBaseWithConstantOffset(Src, *MRI);
6827 if (MachineInstr *InputAdd
6829 Data.N2 = InputAdd->getOperand(1).getReg();
6830 Data.N3 = InputAdd->getOperand(2).getReg();
6845bool AMDGPUInstructionSelector::shouldUseAddr64(MUBUFAddressData Addr)
const {
6851 const RegisterBank *N0Bank = RBI.getRegBank(Addr.N0, *MRI, TRI);
6852 return N0Bank->
getID() == AMDGPU::VGPRRegBankID;
6858void AMDGPUInstructionSelector::splitIllegalMUBUFOffset(
6860 if (TII.isLegalMUBUFImmOffset(ImmOffset))
6864 SOffset = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
6865 B.buildInstr(AMDGPU::S_MOV_B32)
6871bool AMDGPUInstructionSelector::selectMUBUFAddr64Impl(
6876 if (!STI.hasAddr64() || STI.useFlatForGlobal())
6879 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6880 if (!shouldUseAddr64(AddrData))
6886 Offset = AddrData.Offset;
6892 if (RBI.getRegBank(N2, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6894 if (RBI.getRegBank(N3, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6907 }
else if (RBI.getRegBank(N0, *MRI, TRI)->getID() == AMDGPU::VGPRRegBankID) {
6918 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6922bool AMDGPUInstructionSelector::selectMUBUFOffsetImpl(
6927 if (STI.useFlatForGlobal())
6930 MUBUFAddressData AddrData = parseMUBUFAddress(Root.
getReg());
6931 if (shouldUseAddr64(AddrData))
6937 Offset = AddrData.Offset;
6943 splitIllegalMUBUFOffset(
B, SOffset,
Offset);
6948AMDGPUInstructionSelector::selectMUBUFAddr64(
MachineOperand &Root)
const {
6954 if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset,
Offset))
6960 [=](MachineInstrBuilder &MIB) {
6963 [=](MachineInstrBuilder &MIB) {
6966 [=](MachineInstrBuilder &MIB) {
6969 else if (STI.hasRestrictedSOffset())
6970 MIB.
addReg(AMDGPU::SGPR_NULL);
6974 [=](MachineInstrBuilder &MIB) {
6984AMDGPUInstructionSelector::selectMUBUFOffset(
MachineOperand &Root)
const {
6989 if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset,
Offset))
6993 [=](MachineInstrBuilder &MIB) {
6996 [=](MachineInstrBuilder &MIB) {
6999 else if (STI.hasRestrictedSOffset())
7000 MIB.
addReg(AMDGPU::SGPR_NULL);
7012AMDGPUInstructionSelector::selectBUFSOffset(
MachineOperand &Root)
const {
7017 SOffset = AMDGPU::SGPR_NULL;
7019 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); }}};
7023static std::optional<uint64_t>
7027 if (!OffsetVal || !
isInt<32>(*OffsetVal))
7028 return std::nullopt;
7029 return Lo_32(*OffsetVal);
7033AMDGPUInstructionSelector::selectSMRDBufferImm(
MachineOperand &Root)
const {
7034 std::optional<uint64_t> OffsetVal =
7039 std::optional<int64_t> EncodedImm =
7044 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
7048AMDGPUInstructionSelector::selectSMRDBufferImm32(
MachineOperand &Root)
const {
7055 std::optional<int64_t> EncodedImm =
7060 return {{ [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedImm); } }};
7064AMDGPUInstructionSelector::selectSMRDBufferSgprImm(
MachineOperand &Root)
const {
7072 return std::nullopt;
7074 std::optional<int64_t> EncodedOffset =
7077 return std::nullopt;
7080 return {{[=](MachineInstrBuilder &MIB) { MIB.
addReg(SOffset); },
7081 [=](MachineInstrBuilder &MIB) { MIB.
addImm(*EncodedOffset); }}};
7084std::pair<Register, unsigned>
7085AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(
MachineOperand &Root,
7086 bool &Matched)
const {
7091 std::tie(Src, Mods) = selectVOP3ModsImpl(Root.
getReg());
7101 const auto CheckAbsNeg = [&]() {
7106 std::tie(Src, ModsTmp) = selectVOP3ModsImpl(Src);
7137AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
7142 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7147 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7148 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7153AMDGPUInstructionSelector::selectVOP3PMadMixMods(
MachineOperand &Root)
const {
7157 std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
7160 [=](MachineInstrBuilder &MIB) { MIB.
addReg(Src); },
7161 [=](MachineInstrBuilder &MIB) { MIB.
addImm(Mods); }
7165bool AMDGPUInstructionSelector::selectSBarrierSignalIsfirst(
7169 Register CCReg =
I.getOperand(0).getReg();
7174 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM))
7175 .
addImm(
I.getOperand(2).getImm());
7179 I.eraseFromParent();
7180 return RBI.constrainGenericRegister(CCReg, AMDGPU::SReg_32_XM0_XEXECRegClass,
7184bool AMDGPUInstructionSelector::selectSGetBarrierState(
7188 const MachineOperand &BarOp =
I.getOperand(2);
7189 std::optional<int64_t> BarValImm =
7193 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7197 MachineInstrBuilder MIB;
7198 unsigned Opc = BarValImm ? AMDGPU::S_GET_BARRIER_STATE_IMM
7199 : AMDGPU::S_GET_BARRIER_STATE_M0;
7202 auto DstReg =
I.getOperand(0).getReg();
7203 const TargetRegisterClass *DstRC =
7204 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7205 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7211 I.eraseFromParent();
7216 if (HasInlineConst) {
7220 case Intrinsic::amdgcn_s_barrier_join:
7221 return AMDGPU::S_BARRIER_JOIN_IMM;
7222 case Intrinsic::amdgcn_s_wakeup_barrier:
7223 return AMDGPU::S_WAKEUP_BARRIER_IMM;
7224 case Intrinsic::amdgcn_s_get_named_barrier_state:
7225 return AMDGPU::S_GET_BARRIER_STATE_IMM;
7231 case Intrinsic::amdgcn_s_barrier_join:
7232 return AMDGPU::S_BARRIER_JOIN_M0;
7233 case Intrinsic::amdgcn_s_wakeup_barrier:
7234 return AMDGPU::S_WAKEUP_BARRIER_M0;
7235 case Intrinsic::amdgcn_s_get_named_barrier_state:
7236 return AMDGPU::S_GET_BARRIER_STATE_M0;
7241bool AMDGPUInstructionSelector::selectNamedBarrierInit(
7245 const MachineOperand &BarOp =
I.getOperand(1);
7246 const MachineOperand &CntOp =
I.getOperand(2);
7250 if (IntrID == Intrinsic::amdgcn_s_barrier_signal_var) {
7251 std::optional<int64_t> CntImm =
7253 if (CntImm && *CntImm == 0) {
7254 std::optional<int64_t> BarValImm =
7257 auto BarID = ((*BarValImm) >> 4) & 0x3F;
7258 BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_IMM))
7260 I.eraseFromParent();
7267 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7273 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7280 Register TmpReg2 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7286 Register TmpReg3 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7287 constexpr unsigned ShAmt = 16;
7293 Register TmpReg4 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7303 unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
7304 ? AMDGPU::S_BARRIER_INIT_M0
7305 : AMDGPU::S_BARRIER_SIGNAL_M0;
7306 MachineInstrBuilder MIB;
7309 I.eraseFromParent();
7313bool AMDGPUInstructionSelector::selectNamedBarrierInst(
7317 MachineOperand BarOp = IntrID == Intrinsic::amdgcn_s_get_named_barrier_state
7320 std::optional<int64_t> BarValImm =
7325 Register TmpReg0 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7331 Register TmpReg1 = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
7337 auto CopyMIB =
BuildMI(*
MBB, &
I,
DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
7342 MachineInstrBuilder MIB;
7346 if (IntrID == Intrinsic::amdgcn_s_get_named_barrier_state) {
7347 auto DstReg =
I.getOperand(0).getReg();
7348 const TargetRegisterClass *DstRC =
7349 TRI.getConstrainedRegClassForOperand(
I.getOperand(0), *MRI);
7350 if (!DstRC || !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI))
7356 auto BarId = ((*BarValImm) >> 4) & 0x3F;
7360 I.eraseFromParent();
7367 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7368 "Expected G_CONSTANT");
7369 MIB.
addImm(
MI.getOperand(1).getCImm()->getSExtValue());
7375 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7376 "Expected G_CONSTANT");
7377 MIB.
addImm(-
MI.getOperand(1).getCImm()->getSExtValue());
7383 const MachineOperand &
Op =
MI.getOperand(1);
7384 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT &&
OpIdx == -1);
7385 MIB.
addImm(
Op.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
7388void AMDGPUInstructionSelector::renderCountTrailingOnesImm(
7390 assert(
MI.getOpcode() == TargetOpcode::G_CONSTANT &&
OpIdx == -1 &&
7391 "Expected G_CONSTANT");
7392 MIB.
addImm(
MI.getOperand(1).getCImm()->getValue().countTrailingOnes());
7400 const MachineOperand &
Op =
MI.getOperand(
OpIdx);
7417 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7421void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_0(
7423 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7428void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_0_1(
7430 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7436void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_0(
7438 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7443void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_1_1(
7445 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7451void AMDGPUInstructionSelector::renderDstSelToOpSelXForm(
7453 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7458void AMDGPUInstructionSelector::renderSrcSelToOpSelXForm(
7460 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7465void AMDGPUInstructionSelector::renderSrcAndDstSelToOpSelXForm_2_0(
7467 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7472void AMDGPUInstructionSelector::renderDstSelToOpSel3XFormXForm(
7474 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7483 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7492 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7499void AMDGPUInstructionSelector::renderExtractCpolSetGLC(
7501 assert(
OpIdx >= 0 &&
"expected to match an immediate operand");
7502 const uint32_t Cpol =
MI.getOperand(
OpIdx).getImm() &
7517 const APFloat &APF =
MI.getOperand(1).getFPImm()->getValueAPF();
7519 assert(ExpVal != INT_MIN);
7537 if (
MI.getOperand(
OpIdx).getImm())
7539 MIB.
addImm((int64_t)Mods);
7546 if (
MI.getOperand(
OpIdx).getImm())
7548 MIB.
addImm((int64_t)Mods);
7554 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7562 MIB.
addImm((int64_t)Mods);
7568 uint32_t
V =
MI.getOperand(2).getImm();
7571 if (!Subtarget->hasSafeCUPrefetch())
7577void AMDGPUInstructionSelector::renderScaledMAIIntrinsicOperand(
7579 unsigned Val =
MI.getOperand(
OpIdx).getImm();
7588bool AMDGPUInstructionSelector::isInlineImmediate(
const APInt &Imm)
const {
7589 return TII.isInlineConstant(Imm);
7592bool AMDGPUInstructionSelector::isInlineImmediate(
const APFloat &Imm)
const {
7593 return TII.isInlineConstant(Imm);
MachineInstrBuilder MachineInstrBuilder & DefMI
static unsigned getIntrinsicID(const SDNode *N)
#define GET_GLOBALISEL_PREDICATES_INIT
#define GET_GLOBALISEL_TEMPORARIES_INIT
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static Register getLegalRegBank(Register NewReg, Register RootReg, const AMDGPURegisterBankInfo &RBI, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const SIInstrInfo &TII)
static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is shift left with half bits, such as reg0:2n =G_SHL reg1:2n, CONST(n)
static bool isNoUnsignedWrap(MachineInstr *Addr)
static Register buildOffsetSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID)
static bool checkRB(Register Reg, unsigned int RBNo, const AMDGPURegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
static unsigned updateMods(SrcStatus HiStat, SrcStatus LoStat, unsigned Mods)
static bool isTruncHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is truncating to half, such as reg0:n = G_TRUNC reg1:2n
static Register getWaveAddress(const MachineInstr *Def)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static bool shouldUseAndMask(unsigned Size, unsigned &Mask)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static TypeClass isVectorOfTwoOrScalar(Register Reg, const MachineRegisterInfo &MRI)
static bool isLaneMaskFromSameBlock(Register Reg, MachineRegisterInfo &MRI, MachineBasicBlock *MBB)
static bool parseTexFail(uint64_t TexFailCtrl, bool &TFE, bool &LWE, bool &IsTexFail)
static void addZeroImm(MachineInstrBuilder &MIB)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
static bool isConstant(const MachineInstr &MI)
static bool isSameBitWidth(Register Reg1, Register Reg2, const MachineRegisterInfo &MRI)
static Register buildRegSequence(SmallVectorImpl< Register > &Elts, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
static Register buildRSRC(MachineIRBuilder &B, MachineRegisterInfo &MRI, uint32_t FormatLo, uint32_t FormatHi, Register BasePtr)
Return a resource descriptor for use with an arbitrary 64-bit pointer.
static bool isAsyncLDSDMA(Intrinsic::ID Intr)
static void diagnoseUnsupportedIntrinsic(const MachineInstr &I)
static std::pair< Register, unsigned > computeIndirectRegIndex(MachineRegisterInfo &MRI, const SIRegisterInfo &TRI, const TargetRegisterClass *SuperRC, Register IdxReg, unsigned EltSize, GISelValueTracking &ValueTracking)
Return the register to use for the index value, and the subregister to use for the indirectly accesse...
static unsigned getLogicalBitOpcode(unsigned Opc, bool Is64)
static std::pair< Register, SrcStatus > getLastSameOrNeg(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static Register stripCopy(Register Reg, MachineRegisterInfo &MRI)
static std::optional< std::pair< Register, SrcStatus > > calcNextStatus(std::pair< Register, SrcStatus > Curr, const MachineRegisterInfo &MRI)
static Register stripBitCast(Register Reg, MachineRegisterInfo &MRI)
static std::optional< uint64_t > getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI)
Get an immediate that must be 32-bits, and treated as zero extended.
static bool isValidToPack(SrcStatus HiStat, SrcStatus LoStat, Register NewReg, Register RootReg, const SIInstrInfo &TII, const MachineRegisterInfo &MRI)
static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size, const GCNSubtarget &ST)
static SmallVector< std::pair< Register, SrcStatus > > getSrcStats(Register Reg, const MachineRegisterInfo &MRI, SearchOptions SO, int MaxDepth=3)
static bool isUnmergeHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test function, if the MI is reg0:n, reg1:n = G_UNMERGE_VALUES reg2:2n
static SrcStatus getNegStatus(Register Reg, SrcStatus S, const MachineRegisterInfo &MRI)
static bool isVCmpResult(Register Reg, MachineRegisterInfo &MRI)
static Register buildAddr64RSrc(MachineIRBuilder &B, MachineRegisterInfo &MRI, const SIInstrInfo &TII, Register BasePtr)
static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI)
Test if the MI is logic shift right with half bits, such as reg0:2n =G_LSHR reg1:2n,...
static void selectWMMAModsNegAbs(unsigned ModOpcode, unsigned &Mods, SmallVectorImpl< Register > &Elts, Register &Src, MachineInstr *InsertPt, MachineRegisterInfo &MRI)
This file declares the targeting of the InstructionSelector class for AMDGPU.
AMDGPU Register Bank Select
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool isAllZeros(StringRef Arr)
Return true if the array is empty or all zeros.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
Contains matchers for matching SSA Machine Instructions.
Machine Check Debug Module
This file declares the MachineIRBuilder class.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
static std::vector< std::pair< int, unsigned > > Swizzle(std::vector< std::pair< int, unsigned > > Src, R600InstrInfo::BankSwizzle Swz)
This is used to control valid status that current MI supports.
bool checkOptions(SrcStatus Stat) const
SearchOptions(Register Reg, const MachineRegisterInfo &MRI)
AMDGPUInstructionSelector(const GCNSubtarget &STI, const AMDGPURegisterBankInfo &RBI, const AMDGPUTargetMachine &TM)
static const char * getName()
bool select(MachineInstr &I) override
Select the (possibly generic) instruction I to only use target-specific opcodes.
void setupMF(MachineFunction &MF, GISelValueTracking *VT, CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) override
Setup per-MF executor state.
uint32_t getLDSSize() const
LLVM_READONLY int getExactLog2Abs() const
Class for arbitrary precision integers.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
int64_t getSExtValue() const
Get sign extended value.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
@ ICMP_UGE
unsigned greater or equal
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ FCMP_ULT
1 1 0 0 True if unordered or less than
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
@ ICMP_ULT
unsigned less than
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
@ ICMP_SGE
signed greater or equal
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
@ ICMP_ULE
unsigned less or equal
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
bool isFPPredicate() const
bool isIntPredicate() const
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
DILocation * get() const
Get the underlying DILocation.
Diagnostic information for unsupported feature in backend.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
std::optional< SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > > ComplexRendererFns
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
CodeGenCoverage * CoverageInfo
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
TypeSize getValue() const
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void setReturnAddressIsTaken(bool s)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Helper class to build MachineInstr.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & setOperandDead(unsigned OpIdx) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void tieOperands(unsigned DefIdx, unsigned UseIdx)
Add a tie between the register operands at DefIdx and UseIdx.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const ConstantInt * getCImm() const
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
ArrayRef< int > getShuffleMask() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
bool isEarlyClobber() const
Register getReg() const
getReg - Returns the register number.
bool isInternalRead() const
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Analysis providing profile information.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
static unsigned getDSShaderTypeValue(const MachineFunction &MF)
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static bool isGenericOpcode(unsigned Opc)
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUFFER_RESOURCE
Address space for 128-bit buffer resources.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getGlobalSaddrOp(uint32_t Opcode)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
Intrinsic::ID getIntrinsicID(const MachineInstr &I)
Return the intrinsic ID for opcodes with the G_AMDGPU_INTRIN_ prefix.
std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg, GISelValueTracking *ValueTracking=nullptr, bool CheckNUW=false)
Returns base register and constant offset.
const ImageDimIntrinsicInfo * getImageDimIntrinsicInfo(unsigned Intr)
IndexMode
ARM Index Modes.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
UnaryOp_match< SrcTy, TargetOpcode::COPY > m_Copy(SrcTy &&Src)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ConstantMatch< APInt > m_ICst(APInt &Cst)
SpecificConstantMatch m_AllOnesInt()
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
SpecificRegisterMatch m_SpecificReg(Register RequestedReg)
Matches a register only if it is equal to RequestedReg.
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
UnaryOp_match< SrcTy, TargetOpcode::G_FABS > m_GFabs(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, TargetOpcode::G_MUL, true > m_GMul(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
NodeAddr< DefNode * > Def
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
FunctionAddr VTableAddr Value
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
@ Or
Bitwise or logical OR of integers.
@ Mul
Product of integers.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
constexpr RegState getUndefRegState(bool B)
@ Default
The result value is uniform if and only if all operands are uniform.
unsigned AtomicNoRetBaseOpcode
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
int64_t Offset
Offset - This is an offset from the base Value*.
PointerUnion< const Value *, const PseudoSourceValue * > V
This is the IR pointer value for the access, or it is null if unknown.