29#include "llvm/IR/IntrinsicsAMDGPU.h"
33#ifdef EXPENSIVE_CHECKS
38#define DEBUG_TYPE "amdgpu-isel"
53 In = stripBitcast(In);
59 Out = In.getOperand(0);
70 if (ShiftAmt->getZExtValue() == 16) {
90 if (
Lo->isDivergent()) {
92 SL,
Lo.getValueType()),
100 Src.getValueType(),
Ops),
118 SDValue Idx = In.getOperand(1);
120 return In.getOperand(0);
124 SDValue Src = In.getOperand(0);
125 if (Src.getValueType().getSizeInBits() == 32)
126 return stripBitcast(Src);
136 assert(Elts.
size() == SubRegClass.
size() &&
"array size mismatch");
137 unsigned NumElts = Elts.
size();
140 for (
unsigned i = 0; i < NumElts; ++i) {
141 Ops[2 * i + 1] = Elts[i];
151 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
155#ifdef EXPENSIVE_CHECKS
160 "AMDGPU DAG->DAG Pattern Instruction Selection",
false,
181bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(
unsigned Opc)
const {
215 case AMDGPUISD::FRACT:
216 case AMDGPUISD::CLAMP:
217 case AMDGPUISD::COS_HW:
218 case AMDGPUISD::SIN_HW:
219 case AMDGPUISD::FMIN3:
220 case AMDGPUISD::FMAX3:
221 case AMDGPUISD::FMED3:
222 case AMDGPUISD::FMAD_FTZ:
225 case AMDGPUISD::RCP_IFLAG:
235 case AMDGPUISD::DIV_FIXUP:
245#ifdef EXPENSIVE_CHECKS
249 assert(L->isLCSSAForm(DT));
257#ifdef EXPENSIVE_CHECKS
265 assert(Subtarget->d16PreservesUnusedBits());
266 MVT VT =
N->getValueType(0).getSimpleVT();
267 if (VT != MVT::v2i16 && VT != MVT::v2f16)
289 unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
292 AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
298 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdHi), VTList,
311 if (LdLo &&
Lo.hasOneUse()) {
317 unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
320 AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
332 CurDAG->getMemIntrinsicNode(LoadOp,
SDLoc(LdLo), VTList,
345 if (!Subtarget->d16PreservesUnusedBits())
350 bool MadeChange =
false;
351 while (Position !=
CurDAG->allnodes_begin()) {
356 switch (
N->getOpcode()) {
367 CurDAG->RemoveDeadNodes();
373bool AMDGPUDAGToDAGISel::isInlineImmediate(
const SDNode *
N)
const {
379 return TII->isInlineConstant(
C->getAPIntValue());
382 return TII->isInlineConstant(
C->getValueAPF());
392 unsigned OpNo)
const {
393 if (!
N->isMachineOpcode()) {
396 if (
Reg.isVirtual()) {
401 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
402 return TRI->getPhysRegBaseClass(
Reg);
408 switch (
N->getMachineOpcode()) {
410 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
411 const MCInstrDesc &
Desc =
TII->get(
N->getMachineOpcode());
412 unsigned OpIdx =
Desc.getNumDefs() + OpNo;
416 int16_t RegClass =
TII->getOpRegClassID(
Desc.operands()[
OpIdx]);
420 return Subtarget->getRegisterInfo()->getRegClass(RegClass);
422 case AMDGPU::REG_SEQUENCE: {
423 unsigned RCID =
N->getConstantOperandVal(0);
424 const TargetRegisterClass *SuperRC =
425 Subtarget->getRegisterInfo()->getRegClass(RCID);
427 SDValue SubRegOp =
N->getOperand(OpNo + 1);
429 return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
438 Ops.push_back(NewChain);
439 for (
unsigned i = 1, e =
N->getNumOperands(); i != e; ++i)
440 Ops.push_back(
N->getOperand(i));
443 return CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(),
Ops);
450 assert(
N->getOperand(0).getValueType() == MVT::Other &&
"Expected chain");
453 return glueCopyToOp(
N,
M0,
M0.getValue(1));
456SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(
SDNode *
N)
const {
459 if (Subtarget->ldsRequiresM0Init())
461 N,
CurDAG->getSignedTargetConstant(-1, SDLoc(
N), MVT::i32));
463 MachineFunction &
MF =
CurDAG->getMachineFunction();
464 unsigned Value =
MF.getInfo<SIMachineFunctionInfo>()->getGDSSize();
466 glueCopyToM0(
N,
CurDAG->getTargetConstant(
Value, SDLoc(
N), MVT::i32));
473 SDNode *
Lo =
CurDAG->getMachineNode(
474 AMDGPU::S_MOV_B32,
DL, MVT::i32,
476 SDNode *
Hi =
CurDAG->getMachineNode(
477 AMDGPU::S_MOV_B32,
DL, MVT::i32,
480 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
484 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, VT,
Ops);
487SDNode *AMDGPUDAGToDAGISel::packConstantV2I16(
const SDNode *
N,
492 uint32_t LHSVal, RHSVal;
496 uint32_t
K = (LHSVal & 0xffff) | (RHSVal << 16);
498 isVGPRImm(
N) ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32, SL,
506 EVT VT =
N->getValueType(0);
510 SDValue RegClass =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
512 if (NumVectorElts == 1) {
513 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, EltVT,
N->getOperand(0),
518 bool IsGCN =
CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
519 if (IsGCN && Subtarget->has64BitLiterals() && VT.
getSizeInBits() == 64 &&
522 bool AllConst =
true;
524 for (
unsigned I = 0;
I < NumVectorElts; ++
I) {
532 Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
535 C |= Val << (EltSize *
I);
540 CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO,
DL, VT, CV);
541 CurDAG->SelectNodeTo(
N, AMDGPU::COPY_TO_REGCLASS, VT,
SDValue(Copy, 0),
547 assert(NumVectorElts <= 32 &&
"Vectors with more than 32 elements not "
554 RegSeqArgs[0] =
CurDAG->getTargetConstant(RegClassID,
DL, MVT::i32);
555 bool IsRegSeq =
true;
556 unsigned NOps =
N->getNumOperands();
557 for (
unsigned i = 0; i < NOps; i++) {
565 RegSeqArgs[1 + (2 * i)] =
N->getOperand(i);
566 RegSeqArgs[1 + (2 * i) + 1] =
CurDAG->getTargetConstant(
Sub,
DL, MVT::i32);
568 if (NOps != NumVectorElts) {
573 for (
unsigned i = NOps; i < NumVectorElts; ++i) {
576 RegSeqArgs[1 + (2 * i)] =
SDValue(ImpDef, 0);
577 RegSeqArgs[1 + (2 * i) + 1] =
584 CurDAG->SelectNodeTo(
N, AMDGPU::REG_SEQUENCE,
N->getVTList(), RegSeqArgs);
588 EVT VT =
N->getValueType(0);
592 if (!Subtarget->hasPkMovB32() || !EltVT.
bitsEq(MVT::i32) ||
606 Mask[0] < 4 && Mask[1] < 4);
608 SDValue VSrc0 = Mask[0] < 2 ? Src0 : Src1;
609 SDValue VSrc1 = Mask[1] < 2 ? Src0 : Src1;
610 unsigned Src0SubReg = Mask[0] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
611 unsigned Src1SubReg = Mask[1] & 1 ? AMDGPU::sub1 : AMDGPU::sub0;
614 Src0SubReg = Src1SubReg;
616 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
621 Src1SubReg = Src0SubReg;
623 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, VT);
633 if (
N->isDivergent() && Src0SubReg == AMDGPU::sub1 &&
634 Src1SubReg == AMDGPU::sub0) {
650 SDValue Src0OpSelVal =
CurDAG->getTargetConstant(Src0OpSel,
DL, MVT::i32);
651 SDValue Src1OpSelVal =
CurDAG->getTargetConstant(Src1OpSel,
DL, MVT::i32);
654 CurDAG->SelectNodeTo(
N, AMDGPU::V_PK_MOV_B32,
N->getVTList(),
655 {Src0OpSelVal, VSrc0, Src1OpSelVal, VSrc1,
665 CurDAG->getTargetExtractSubreg(Src0SubReg,
DL, EltVT, VSrc0);
667 CurDAG->getTargetExtractSubreg(Src1SubReg,
DL, EltVT, VSrc1);
670 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
671 ResultElt0,
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32),
672 ResultElt1,
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32)};
673 CurDAG->SelectNodeTo(
N, TargetOpcode::REG_SEQUENCE, VT,
Ops);
677 unsigned int Opc =
N->getOpcode();
678 if (
N->isMachineOpcode()) {
686 N = glueCopyToM0LDSInit(
N);
701 if (
N->getValueType(0) != MVT::i64)
704 SelectADD_SUB_I64(
N);
709 if (
N->getValueType(0) != MVT::i32)
716 SelectUADDO_USUBO(
N);
719 case AMDGPUISD::FMUL_W_CHAIN: {
720 SelectFMUL_W_CHAIN(
N);
723 case AMDGPUISD::FMA_W_CHAIN: {
724 SelectFMA_W_CHAIN(
N);
730 EVT VT =
N->getValueType(0);
747 ?
TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32)
759 if (
N->getValueType(0) == MVT::i128) {
760 RC =
CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID,
DL, MVT::i32);
761 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0_sub1,
DL, MVT::i32);
762 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub2_sub3,
DL, MVT::i32);
763 }
else if (
N->getValueType(0) == MVT::i64) {
764 RC =
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32);
765 SubReg0 =
CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
766 SubReg1 =
CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
770 const SDValue Ops[] = { RC,
N->getOperand(0), SubReg0,
771 N->getOperand(1), SubReg1 };
773 N->getValueType(0),
Ops));
779 if (
N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(
N) ||
780 Subtarget->has64BitLiterals())
785 Imm =
FP->getValueAPF().bitcastToAPInt().getZExtValue();
790 Imm =
C->getZExtValue();
799 case AMDGPUISD::BFE_I32:
800 case AMDGPUISD::BFE_U32: {
826 case AMDGPUISD::DIV_SCALE: {
837 return SelectMUL_LOHI(
N);
848 if (
N->getValueType(0) != MVT::i32)
859 case AMDGPUISD::CVT_PKRTZ_F16_F32:
860 case AMDGPUISD::CVT_PKNORM_I16_F32:
861 case AMDGPUISD::CVT_PKNORM_U16_F32:
862 case AMDGPUISD::CVT_PK_U16_U32:
863 case AMDGPUISD::CVT_PK_I16_I32: {
865 if (
N->getValueType(0) == MVT::i32) {
866 MVT NewVT =
Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16;
868 { N->getOperand(0), N->getOperand(1) });
876 SelectINTRINSIC_W_CHAIN(
N);
880 SelectINTRINSIC_WO_CHAIN(
N);
884 SelectINTRINSIC_VOID(
N);
888 SelectWAVE_ADDRESS(
N);
892 SelectSTACKRESTORE(
N);
901 if (!Subtarget->hasSDWA())
911 return RHS->getZExtValue() == 0xFF || RHS->getZExtValue() == 0xFFFF;
915 return (RHS->getZExtValue() % 8) == 0;
920bool AMDGPUDAGToDAGISel::isUniformBr(
const SDNode *
N)
const {
923 return Term->getMetadata(
"amdgpu.uniform") ||
924 Term->getMetadata(
"structurizecfg.uniform");
927bool AMDGPUDAGToDAGISel::isUnneededShiftMask(
const SDNode *
N,
928 unsigned ShAmtBits)
const {
931 const APInt &
RHS =
N->getConstantOperandAPInt(1);
932 if (
RHS.countr_one() >= ShAmtBits)
962 N1 =
Lo.getOperand(1);
972 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
987 return "AMDGPU DAG->DAG Pattern Instruction Selection";
997#ifdef EXPENSIVE_CHECKS
1003 for (
auto &L : LI.getLoopsInPreorder())
1004 assert(L->isLCSSAForm(DT) &&
"Loop is not in LCSSA form!");
1026 }
else if ((Addr.
getOpcode() == AMDGPUISD::DWORDADDR) &&
1028 Base =
CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
1042SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
1044 SDNode *Mov =
CurDAG->getMachineNode(
1045 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1046 CurDAG->getTargetConstant(Val,
DL, MVT::i32));
1051void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(
SDNode *
N) {
1056 unsigned Opcode =
N->getOpcode();
1065 SDNode *Lo0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1066 DL, MVT::i32,
LHS, Sub0);
1067 SDNode *Hi0 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1068 DL, MVT::i32,
LHS, Sub1);
1070 SDNode *Lo1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1071 DL, MVT::i32,
RHS, Sub0);
1072 SDNode *Hi1 =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1073 DL, MVT::i32,
RHS, Sub1);
1075 SDVTList VTList =
CurDAG->getVTList(MVT::i32, MVT::Glue);
1077 static const unsigned OpcMap[2][2][2] = {
1078 {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
1079 {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
1080 {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
1081 {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
1083 unsigned Opc = OpcMap[0][
N->isDivergent()][IsAdd];
1084 unsigned CarryOpc = OpcMap[1][
N->isDivergent()][IsAdd];
1087 if (!ConsumeCarry) {
1089 AddLo =
CurDAG->getMachineNode(
Opc,
DL, VTList, Args);
1092 AddLo =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, Args);
1099 SDNode *AddHi =
CurDAG->getMachineNode(CarryOpc,
DL, VTList, AddHiArgs);
1102 CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID,
DL, MVT::i32),
1109 MVT::i64, RegSequenceArgs);
1120void AMDGPUDAGToDAGISel::SelectAddcSubb(
SDNode *
N) {
1125 if (
N->isDivergent()) {
1127 : AMDGPU::V_SUBB_U32_e64;
1129 N,
Opc,
N->getVTList(),
1131 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1134 : AMDGPU::S_SUB_CO_PSEUDO;
1135 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(), {LHS, RHS, CI});
1139void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(
SDNode *
N) {
1144 bool IsVALU =
N->isDivergent();
1146 for (SDNode::user_iterator UI =
N->user_begin(),
E =
N->user_end(); UI !=
E;
1148 if (UI.getUse().getResNo() == 1) {
1149 if (UI->isMachineOpcode()) {
1150 if (UI->getMachineOpcode() !=
1151 (IsAdd ? AMDGPU::S_ADD_CO_PSEUDO : AMDGPU::S_SUB_CO_PSEUDO)) {
1164 unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
1167 N,
Opc,
N->getVTList(),
1168 {N->getOperand(0), N->getOperand(1),
1169 CurDAG->getTargetConstant(0, {}, MVT::i1) });
1171 unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;
1173 CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
1174 {N->getOperand(0), N->getOperand(1)});
1178void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(
SDNode *
N) {
1182 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1183 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1184 SelectVOP3Mods(
N->getOperand(3),
Ops[5],
Ops[4]);
1185 Ops[8] =
N->getOperand(0);
1186 Ops[9] =
N->getOperand(4);
1190 bool UseFMAC = Subtarget->hasDLInsts() &&
1194 unsigned Opcode = UseFMAC ? AMDGPU::V_FMAC_F32_e64 : AMDGPU::V_FMA_F32_e64;
1195 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(),
Ops);
1198void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(
SDNode *
N) {
1202 SelectVOP3Mods0(
N->getOperand(1),
Ops[1],
Ops[0],
Ops[4],
Ops[5]);
1203 SelectVOP3Mods(
N->getOperand(2),
Ops[3],
Ops[2]);
1204 Ops[6] =
N->getOperand(0);
1205 Ops[7] =
N->getOperand(3);
1207 CurDAG->SelectNodeTo(
N, AMDGPU::V_MUL_F32_e64,
N->getVTList(),
Ops);
1212void AMDGPUDAGToDAGISel::SelectDIV_SCALE(
SDNode *
N) {
1213 EVT VT =
N->getValueType(0);
1215 assert(VT == MVT::f32 || VT == MVT::f64);
1218 = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64;
1223 SelectVOP3BMods0(
N->getOperand(0),
Ops[1],
Ops[0],
Ops[6],
Ops[7]);
1224 SelectVOP3BMods(
N->getOperand(1),
Ops[3],
Ops[2]);
1225 SelectVOP3BMods(
N->getOperand(2),
Ops[5],
Ops[4]);
1231void AMDGPUDAGToDAGISel::SelectMAD_64_32(
SDNode *
N) {
1235 bool UseNoCarry = Subtarget->hasMadNC64_32Insts() && !
N->hasAnyUseOfValue(1);
1236 if (Subtarget->hasMADIntraFwdBug())
1237 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1238 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1239 else if (UseNoCarry)
1240 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1242 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1245 SDValue Ops[] = {
N->getOperand(0),
N->getOperand(1),
N->getOperand(2),
1249 MachineSDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, MVT::i64,
Ops);
1260void AMDGPUDAGToDAGISel::SelectMUL_LOHI(
SDNode *
N) {
1265 if (Subtarget->hasMadNC64_32Insts()) {
1266 VTList =
CurDAG->getVTList(MVT::i64);
1267 Opc =
Signed ? AMDGPU::V_MAD_NC_I64_I32_e64 : AMDGPU::V_MAD_NC_U64_U32_e64;
1269 VTList =
CurDAG->getVTList(MVT::i64, MVT::i1);
1270 if (Subtarget->hasMADIntraFwdBug()) {
1271 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_gfx11_e64
1272 : AMDGPU::V_MAD_U64_U32_gfx11_e64;
1274 Opc =
Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64;
1281 SDNode *Mad =
CurDAG->getMachineNode(
Opc, SL, VTList,
Ops);
1283 SDValue Sub0 =
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32);
1284 SDNode *
Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1285 MVT::i32,
SDValue(Mad, 0), Sub0);
1289 SDValue Sub1 =
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32);
1290 SDNode *
Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SL,
1291 MVT::i32,
SDValue(Mad, 0), Sub1);
1301 if (!
Base || Subtarget->hasUsableDSOffset() ||
1302 Subtarget->unsafeDSOffsetFoldingEnabled())
1313 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1326 int64_t ByteOffset =
C->getSExtValue();
1327 if (isDSOffsetLegal(
SDValue(), ByteOffset)) {
1336 if (isDSOffsetLegal(
Sub, ByteOffset)) {
1342 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1343 if (Subtarget->hasAddNoCarryInsts()) {
1344 SubOp = AMDGPU::V_SUB_U32_e64;
1346 CurDAG->getTargetConstant(0, {}, MVT::i1));
1349 MachineSDNode *MachineSub =
1350 CurDAG->getMachineNode(SubOp,
DL, MVT::i32, Opnds);
1366 if (isDSOffsetLegal(
SDValue(), CAddr->getZExtValue())) {
1368 MachineSDNode *MovZero =
CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
1369 DL, MVT::i32, Zero);
1371 Offset =
CurDAG->getTargetConstant(CAddr->getZExtValue(),
DL, MVT::i16);
1378 Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
1382bool AMDGPUDAGToDAGISel::isDSOffset2Legal(
SDValue Base,
unsigned Offset0,
1384 unsigned Size)
const {
1385 if (Offset0 %
Size != 0 || Offset1 %
Size != 0)
1390 if (!
Base || Subtarget->hasUsableDSOffset() ||
1391 Subtarget->unsafeDSOffsetFoldingEnabled())
1409bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(
SDValue Addr)
const {
1415 if (Subtarget->hasSignedScratchOffsets())
1425 ConstantSDNode *ImmOp =
nullptr;
1436bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(
SDValue Addr)
const {
1442 if (Subtarget->hasSignedScratchOffsets())
1452bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(
SDValue Addr)
const {
1466 (RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
1469 auto LHS =
Base.getOperand(0);
1470 auto RHS =
Base.getOperand(1);
1478 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 4);
1484 return SelectDSReadWrite2(Addr,
Base, Offset0, Offset1, 8);
1489 unsigned Size)
const {
1492 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1497 unsigned OffsetValue1 = OffsetValue0 +
Size;
1500 if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1,
Size)) {
1502 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1503 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1508 if (
const ConstantSDNode *
C =
1510 unsigned OffsetValue0 =
C->getZExtValue();
1511 unsigned OffsetValue1 = OffsetValue0 +
Size;
1513 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1523 if (isDSOffset2Legal(
Sub, OffsetValue0, OffsetValue1,
Size)) {
1527 unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
1528 if (Subtarget->hasAddNoCarryInsts()) {
1529 SubOp = AMDGPU::V_SUB_U32_e64;
1531 CurDAG->getTargetConstant(0, {}, MVT::i1));
1534 MachineSDNode *MachineSub =
CurDAG->getMachineNode(
1539 CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1541 CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1547 unsigned OffsetValue0 = CAddr->getZExtValue();
1548 unsigned OffsetValue1 = OffsetValue0 +
Size;
1550 if (isDSOffset2Legal(
SDValue(), OffsetValue0, OffsetValue1,
Size)) {
1552 MachineSDNode *MovZero =
1553 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, Zero);
1555 Offset0 =
CurDAG->getTargetConstant(OffsetValue0 /
Size,
DL, MVT::i32);
1556 Offset1 =
CurDAG->getTargetConstant(OffsetValue1 /
Size,
DL, MVT::i32);
1564 Offset0 =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1565 Offset1 =
CurDAG->getTargetConstant(1,
DL, MVT::i32);
1575 if (Subtarget->useFlatForGlobal())
1580 Idxen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1581 Offen =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1582 Addr64 =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
1583 SOffset = Subtarget->hasRestrictedSOffset()
1584 ?
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32)
1585 :
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1587 ConstantSDNode *C1 =
nullptr;
1589 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1602 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1608 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1624 Ptr =
SDValue(buildSMovImm64(
DL, 0, MVT::v2i32), 0);
1626 Addr64 =
CurDAG->getTargetConstant(1,
DL, MVT::i1);
1630 VAddr =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1640 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1651 AMDGPU::S_MOV_B32,
DL, MVT::i32,
1657bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(
SDValue Addr,
SDValue &SRsrc,
1660 SDValue Ptr, Offen, Idxen, Addr64;
1664 if (!Subtarget->hasAddr64())
1667 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1671 if (
C->getSExtValue()) {
1684std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(
SDValue N)
const {
1689 FI ?
CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) :
N;
1695 return std::pair(TFI,
CurDAG->getTargetConstant(0,
DL, MVT::i32));
1698bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(
SDNode *Parent,
1704 MachineFunction &
MF =
CurDAG->getMachineFunction();
1705 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1707 Rsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1710 int64_t
Imm = CAddr->getSExtValue();
1711 const int64_t NullPtr =
1714 if (Imm != NullPtr) {
1717 CurDAG->getTargetConstant(Imm & ~MaxOffset,
DL, MVT::i32);
1718 MachineSDNode *MovHighBits =
CurDAG->getMachineNode(
1719 AMDGPU::V_MOV_B32_e32,
DL, MVT::i32, HighBits);
1720 VAddr =
SDValue(MovHighBits, 0);
1722 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1723 ImmOffset =
CurDAG->getTargetConstant(Imm & MaxOffset,
DL, MVT::i32);
1728 if (
CurDAG->isBaseWithConstantOffset(Addr)) {
1749 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1750 if (
TII->isLegalMUBUFImmOffset(C1) &&
1751 (!Subtarget->privateMemoryResourceIsRangeChecked() ||
1752 CurDAG->SignBitIsZero(N0))) {
1753 std::tie(VAddr, SOffset) = foldFrameIndex(N0);
1754 ImmOffset =
CurDAG->getTargetConstant(C1,
DL, MVT::i32);
1760 std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
1761 ImmOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1769 if (!
Reg.isPhysical())
1771 const auto *RC =
TRI.getPhysRegBaseClass(
Reg);
1772 return RC &&
TRI.isSGPRClass(RC);
1775bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(
SDNode *Parent,
1780 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
1781 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1782 MachineFunction &
MF =
CurDAG->getMachineFunction();
1783 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
1788 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1794 ConstantSDNode *CAddr;
1807 SOffset =
CurDAG->getTargetConstant(0,
DL, MVT::i32);
1812 SRsrc =
CurDAG->getRegister(
Info->getScratchRSrcReg(), MVT::v4i32);
1818bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(
SDValue Addr,
SDValue &SRsrc,
1821 SDValue Ptr, VAddr, Offen, Idxen, Addr64;
1822 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1824 if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset,
Offset, Offen, Idxen, Addr64))
1830 uint64_t Rsrc =
TII->getDefaultRsrcDataFormat() |
1843bool AMDGPUDAGToDAGISel::SelectBUFSOffset(
SDValue ByteOffsetNode,
1845 if (Subtarget->hasRestrictedSOffset() &&
isNullConstant(ByteOffsetNode)) {
1846 SOffset =
CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
1850 SOffset = ByteOffsetNode;
1868bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(
1872 int64_t OffsetVal = 0;
1876 bool CanHaveFlatSegmentOffsetBug =
1877 Subtarget->hasFlatSegmentOffsetBug() &&
1878 FlatVariant == FlatAddrSpace::FLAT &&
1881 if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
1883 if (isBaseWithConstantOffset64(Addr, N0, N1) &&
1884 (FlatVariant != FlatAddrSpace::FlatScratch ||
1885 isFlatScratchBaseLegal(Addr))) {
1893 if (COffsetVal == 0 || FlatVariant != FlatAddrSpace::FLAT || IsInBounds) {
1894 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
1895 if (
TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) {
1897 OffsetVal = COffsetVal;
1910 uint64_t RemainderOffset;
1912 std::tie(OffsetVal, RemainderOffset) =
1913 TII->splitFlatOffset(COffsetVal, AS, FlatVariant);
1916 getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL);
1923 unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32;
1924 if (Subtarget->hasAddNoCarryInsts()) {
1925 AddOp = AMDGPU::V_ADD_U32_e64;
1934 CurDAG->getTargetConstant(AMDGPU::sub0,
DL, MVT::i32);
1936 CurDAG->getTargetConstant(AMDGPU::sub1,
DL, MVT::i32);
1938 SDNode *N0Lo =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1939 DL, MVT::i32, N0, Sub0);
1940 SDNode *N0Hi =
CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
1941 DL, MVT::i32, N0, Sub1);
1944 getMaterializedScalarImm32(
Hi_32(RemainderOffset),
DL);
1946 SDVTList VTs =
CurDAG->getVTList(MVT::i32, MVT::i1);
1949 CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64,
DL, VTs,
1950 {AddOffsetLo,
SDValue(N0Lo, 0), Clamp});
1952 SDNode *Addc =
CurDAG->getMachineNode(
1953 AMDGPU::V_ADDC_U32_e64,
DL, VTs,
1957 CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID,
DL,
1962 MVT::i64, RegSequenceArgs),
1971 Offset =
CurDAG->getSignedTargetConstant(OffsetVal, SDLoc(), MVT::i32);
1975bool AMDGPUDAGToDAGISel::SelectFlatOffset(
SDNode *
N,
SDValue Addr,
1978 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1982bool AMDGPUDAGToDAGISel::SelectGlobalOffset(
SDNode *
N,
SDValue Addr,
1985 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
1989bool AMDGPUDAGToDAGISel::SelectScratchOffset(
SDNode *
N,
SDValue Addr,
1992 return SelectFlatOffsetImpl(
N, Addr, VAddr,
Offset,
2000 if (
Op.getValueType() == MVT::i32)
2015bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2018 bool NeedIOffset)
const {
2020 int64_t ImmOffset = 0;
2021 ScaleOffset =
false;
2027 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2029 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2033 FlatAddrSpace::FlatGlobal)) {
2035 ImmOffset = COffsetVal;
2036 }
else if (!
LHS->isDivergent()) {
2037 if (COffsetVal > 0) {
2042 int64_t SplitImmOffset = 0, RemainderOffset = COffsetVal;
2044 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2048 if (Subtarget->hasSignedGVSOffset() ?
isInt<32>(RemainderOffset)
2050 SDNode *VMov =
CurDAG->getMachineNode(
2051 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2052 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2055 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2065 unsigned NumLiterals =
2066 !
TII->isInlineConstant(APInt(32,
Lo_32(COffsetVal))) +
2067 !
TII->isInlineConstant(APInt(32,
Hi_32(COffsetVal)));
2068 if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals)
2077 if (!
LHS->isDivergent()) {
2080 ScaleOffset = SelectScaleOffset(
N,
RHS, Subtarget->hasSignedGVSOffset());
2082 RHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2089 if (!SAddr && !
RHS->isDivergent()) {
2091 ScaleOffset = SelectScaleOffset(
N,
LHS, Subtarget->hasSignedGVSOffset());
2093 LHS, Subtarget->hasSignedGVSOffset(),
CurDAG)) {
2100 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2105 if (Subtarget->hasScaleOffset() &&
2106 (Addr.
getOpcode() == (Subtarget->hasSignedGVSOffset()
2121 Offset =
CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2134 CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32,
2135 CurDAG->getTargetConstant(0, SDLoc(), MVT::i32));
2137 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2141bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(
SDNode *
N,
SDValue Addr,
2146 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2154bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPol(
SDNode *
N,
SDValue Addr,
2159 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2164 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2170bool AMDGPUDAGToDAGISel::SelectGlobalSAddrCPolM0(
SDNode *
N,
SDValue Addr,
2176 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2181 N->getConstantOperandVal(
N->getNumOperands() - 2) & ~AMDGPU::CPol::SCAL;
2187bool AMDGPUDAGToDAGISel::SelectGlobalSAddrGLC(
SDNode *
N,
SDValue Addr,
2192 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset,
Offset, ScaleOffset))
2196 CPol =
CurDAG->getTargetConstant(CPolVal, SDLoc(), MVT::i32);
2200bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffset(
SDNode *
N,
SDValue Addr,
2206 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2212 N->getConstantOperandVal(
N->getNumOperands() - 1) & ~AMDGPU::CPol::SCAL;
2218bool AMDGPUDAGToDAGISel::SelectGlobalSAddrNoIOffsetM0(
SDNode *
N,
SDValue Addr,
2224 if (!SelectGlobalSAddr(
N, Addr, SAddr, VOffset, DummyOffset, ScaleOffset,
2245 FI->getValueType(0));
2255bool AMDGPUDAGToDAGISel::SelectScratchSAddr(
SDNode *Parent,
SDValue Addr,
2264 int64_t COffsetVal = 0;
2266 if (
CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
2275 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2278 FlatAddrSpace::FlatScratch)) {
2279 int64_t SplitImmOffset, RemainderOffset;
2280 std::tie(SplitImmOffset, RemainderOffset) =
TII->splitFlatOffset(
2283 COffsetVal = SplitImmOffset;
2287 ? getMaterializedScalarImm32(
Lo_32(RemainderOffset),
DL)
2288 :
CurDAG->getSignedTargetConstant(RemainderOffset,
DL, MVT::i32);
2289 SAddr =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_ADD_I32,
DL, MVT::i32,
2294 Offset =
CurDAG->getSignedTargetConstant(COffsetVal,
DL, MVT::i32);
2300bool AMDGPUDAGToDAGISel::checkFlatScratchSVSSwizzleBug(
2302 if (!Subtarget->hasFlatScratchSVSSwizzleBug())
2308 KnownBits VKnown =
CurDAG->computeKnownBits(VAddr);
2315 return (VMax & 3) + (
SMax & 3) >= 4;
2318bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(
SDNode *
N,
SDValue Addr,
2322 int64_t ImmOffset = 0;
2326 if (isBaseWithConstantOffset64(Addr,
LHS,
RHS)) {
2328 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
2333 ImmOffset = COffsetVal;
2334 }
else if (!
LHS->isDivergent() && COffsetVal > 0) {
2338 int64_t SplitImmOffset, RemainderOffset;
2339 std::tie(SplitImmOffset, RemainderOffset) =
2344 SDNode *VMov =
CurDAG->getMachineNode(
2345 AMDGPU::V_MOV_B32_e32, SL, MVT::i32,
2346 CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
2349 if (!isFlatScratchBaseLegal(Addr))
2351 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
2353 Offset =
CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32);
2354 CPol =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
2366 if (!
LHS->isDivergent() &&
RHS->isDivergent()) {
2369 }
else if (!
RHS->isDivergent() &&
LHS->isDivergent()) {
2376 if (OrigAddr != Addr) {
2377 if (!isFlatScratchBaseLegalSVImm(OrigAddr))
2380 if (!isFlatScratchBaseLegalSV(OrigAddr))
2384 if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
2387 Offset =
CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
2389 bool ScaleOffset = SelectScaleOffset(
N, VAddr,
true );
2398bool AMDGPUDAGToDAGISel::isSOffsetLegalWithImmOffset(
SDValue *SOffset,
2401 int64_t ImmOffset)
const {
2402 if (!IsBuffer && !Imm32Only && ImmOffset < 0 &&
2404 KnownBits SKnown =
CurDAG->computeKnownBits(*SOffset);
2416 bool IsSigned)
const {
2417 bool ScaleOffset =
false;
2418 if (!Subtarget->hasScaleOffset() || !
Offset)
2432 (IsSigned &&
Offset.getOpcode() == AMDGPUISD::MUL_I24) ||
2433 Offset.getOpcode() == AMDGPUISD::MUL_U24 ||
2434 (
Offset.isMachineOpcode() &&
2435 Offset.getMachineOpcode() ==
2436 (IsSigned ? AMDGPU::S_MUL_I64_I32_PSEUDO
2437 : AMDGPU::S_MUL_U64_U32_PSEUDO))) {
2439 ScaleOffset =
C->getZExtValue() ==
Size;
2451bool AMDGPUDAGToDAGISel::SelectSMRDOffset(
SDNode *
N,
SDValue ByteOffsetNode,
2453 bool Imm32Only,
bool IsBuffer,
2454 bool HasSOffset, int64_t ImmOffset,
2455 bool *ScaleOffset)
const {
2457 "Cannot match both soffset and offset at the same time!");
2462 *ScaleOffset = SelectScaleOffset(
N, ByteOffsetNode,
false );
2472 *SOffset = ByteOffsetNode;
2473 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2479 return isSOffsetLegalWithImmOffset(SOffset, Imm32Only, IsBuffer,
2486 SDLoc SL(ByteOffsetNode);
2490 int64_t ByteOffset = IsBuffer ?
C->getZExtValue() :
C->getSExtValue();
2492 *Subtarget, ByteOffset, IsBuffer, HasSOffset);
2493 if (EncodedOffset &&
Offset && !Imm32Only) {
2494 *
Offset =
CurDAG->getSignedTargetConstant(*EncodedOffset, SL, MVT::i32);
2503 if (EncodedOffset &&
Offset && Imm32Only) {
2504 *
Offset =
CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32);
2512 SDValue C32Bit =
CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
2514 CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0);
2521SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(
SDValue Addr)
const {
2528 const MachineFunction &
MF =
CurDAG->getMachineFunction();
2529 const SIMachineFunctionInfo *
Info =
MF.getInfo<SIMachineFunctionInfo>();
2530 unsigned AddrHiVal =
Info->get32BitAddressHighBits();
2531 SDValue AddrHi =
CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
2534 CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
2536 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
2537 SDValue(
CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
2539 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
2542 return SDValue(
CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
2549bool AMDGPUDAGToDAGISel::SelectSMRDBaseOffset(
SDNode *
N,
SDValue Addr,
2552 bool IsBuffer,
bool HasSOffset,
2554 bool *ScaleOffset)
const {
2556 assert(!Imm32Only && !IsBuffer);
2559 if (!SelectSMRDBaseOffset(
N, Addr,
B,
nullptr,
Offset,
false,
false,
true))
2564 ImmOff =
C->getSExtValue();
2566 return SelectSMRDBaseOffset(
N,
B, SBase, SOffset,
nullptr,
false,
false,
2567 true, ImmOff, ScaleOffset);
2587 if (SelectSMRDOffset(
N, N1, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2588 ImmOffset, ScaleOffset)) {
2592 if (SelectSMRDOffset(
N, N0, SOffset,
Offset, Imm32Only, IsBuffer, HasSOffset,
2593 ImmOffset, ScaleOffset)) {
2602 bool Imm32Only,
bool *ScaleOffset)
const {
2603 if (SelectSMRDBaseOffset(
N, Addr, SBase, SOffset,
Offset, Imm32Only,
2606 SBase = Expand32BitAddress(SBase);
2611 SBase = Expand32BitAddress(Addr);
2612 *
Offset =
CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
2619bool AMDGPUDAGToDAGISel::SelectSMRDImm(
SDValue Addr,
SDValue &SBase,
2621 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2625bool AMDGPUDAGToDAGISel::SelectSMRDImm32(
SDValue Addr,
SDValue &SBase,
2628 return SelectSMRD(
nullptr, Addr, SBase,
nullptr,
2635 if (!SelectSMRD(
N, Addr, SBase, &SOffset,
nullptr,
2636 false, &ScaleOffset))
2640 SDLoc(
N), MVT::i32);
2644bool AMDGPUDAGToDAGISel::SelectSMRDSgprImm(
SDNode *
N,
SDValue Addr,
2649 if (!SelectSMRD(
N, Addr, SBase, &SOffset, &
Offset,
false, &ScaleOffset))
2653 SDLoc(
N), MVT::i32);
2658 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2662bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(
SDValue N,
2665 return SelectSMRDOffset(
nullptr,
N,
nullptr, &
Offset,
2669bool AMDGPUDAGToDAGISel::SelectSMRDBufferSgprImm(
SDValue N,
SDValue &SOffset,
2673 return N.getValueType() == MVT::i32 &&
2674 SelectSMRDBaseOffset(
nullptr,
N, SOffset,
2679bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(
SDValue Index,
2684 if (
CurDAG->isBaseWithConstantOffset(Index)) {
2709SDNode *AMDGPUDAGToDAGISel::getBFE32(
bool IsSigned,
const SDLoc &
DL,
2713 unsigned Opcode = IsSigned ? AMDGPU::V_BFE_I32_e64 : AMDGPU::V_BFE_U32_e64;
2717 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, Off, W);
2719 unsigned Opcode = IsSigned ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
2723 uint32_t PackedVal =
Offset | (Width << 16);
2724 SDValue PackedConst =
CurDAG->getTargetConstant(PackedVal,
DL, MVT::i32);
2726 return CurDAG->getMachineNode(Opcode,
DL, MVT::i32, Val, PackedConst);
2729void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(
SDNode *
N) {
2734 const SDValue &Shl =
N->getOperand(0);
2739 uint32_t BVal =
B->getZExtValue();
2740 uint32_t CVal =
C->getZExtValue();
2742 if (0 < BVal && BVal <= CVal && CVal < 32) {
2752void AMDGPUDAGToDAGISel::SelectS_BFE(
SDNode *
N) {
2753 switch (
N->getOpcode()) {
2755 if (
N->getOperand(0).getOpcode() ==
ISD::SRL) {
2758 const SDValue &Srl =
N->getOperand(0);
2762 if (Shift && Mask) {
2764 uint32_t MaskVal =
Mask->getZExtValue();
2776 if (
N->getOperand(0).getOpcode() ==
ISD::AND) {
2783 if (Shift && Mask) {
2785 uint32_t MaskVal =
Mask->getZExtValue() >> ShiftVal;
2794 }
else if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2795 SelectS_BFEFromShifts(
N);
2800 if (
N->getOperand(0).getOpcode() ==
ISD::SHL) {
2801 SelectS_BFEFromShifts(
N);
2816 unsigned Width =
cast<VTSDNode>(
N->getOperand(1))->getVT().getSizeInBits();
2826bool AMDGPUDAGToDAGISel::isCBranchSCC(
const SDNode *
N)
const {
2828 if (!
N->hasOneUse())
2838 MVT VT =
Cond.getOperand(0).getSimpleValueType();
2842 if (VT == MVT::i64) {
2845 Subtarget->hasScalarCompareEq64();
2848 if ((VT == MVT::f16 || VT == MVT::f32) && Subtarget->hasSALUFloatInsts())
2881void AMDGPUDAGToDAGISel::SelectBRCOND(
SDNode *
N) {
2884 if (
Cond.isUndef()) {
2885 CurDAG->SelectNodeTo(
N, AMDGPU::SI_BR_UNDEF, MVT::Other,
2886 N->getOperand(2),
N->getOperand(0));
2890 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
2892 bool UseSCCBr = isCBranchSCC(
N) && isUniformBr(
N);
2893 bool AndExec = !UseSCCBr;
2894 bool Negate =
false;
2897 Cond->getOperand(0)->getOpcode() == AMDGPUISD::SETCC) {
2912 bool NegatedBallot =
false;
2915 UseSCCBr = !BallotCond->isDivergent();
2916 Negate = Negate ^ NegatedBallot;
2931 UseSCCBr ? (Negate ? AMDGPU::S_CBRANCH_SCC0 : AMDGPU::S_CBRANCH_SCC1)
2932 : (Negate ? AMDGPU::S_CBRANCH_VCCZ : AMDGPU::S_CBRANCH_VCCNZ);
2933 Register CondReg = UseSCCBr ? AMDGPU::SCC :
TRI->getVCC();
2952 Subtarget->isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64, SL,
2954 CurDAG->getRegister(Subtarget->isWave32() ? AMDGPU::EXEC_LO
2962 CurDAG->SelectNodeTo(
N, BrOp, MVT::Other,
2967void AMDGPUDAGToDAGISel::SelectFP_EXTEND(
SDNode *
N) {
2968 if (Subtarget->hasSALUFloatInsts() &&
N->getValueType(0) == MVT::f32 &&
2969 !
N->isDivergent()) {
2971 if (Src.getValueType() == MVT::f16) {
2973 CurDAG->SelectNodeTo(
N, AMDGPU::S_CVT_HI_F32_F16,
N->getVTList(),
2983void AMDGPUDAGToDAGISel::SelectDSAppendConsume(
SDNode *
N,
unsigned IntrID) {
2986 unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
2987 AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
2992 MachineMemOperand *MMO =
M->getMemOperand();
2996 if (
CurDAG->isBaseWithConstantOffset(Ptr)) {
3001 if (isDSOffsetLegal(PtrBase, OffsetVal.
getZExtValue())) {
3002 N = glueCopyToM0(
N, PtrBase);
3003 Offset =
CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
3008 N = glueCopyToM0(
N, Ptr);
3009 Offset =
CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
3014 CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
3019 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3025void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(
SDNode *
N,
unsigned IntrID) {
3028 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3029 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3030 Opc = AMDGPU::DS_BVH_STACK_RTN_B32;
3032 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3033 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP1_RTN_B32;
3035 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3036 Opc = AMDGPU::DS_BVH_STACK_PUSH8_POP2_RTN_B64;
3039 SDValue Ops[] = {
N->getOperand(2),
N->getOperand(3),
N->getOperand(4),
3040 N->getOperand(5),
N->getOperand(0)};
3043 MachineMemOperand *MMO =
M->getMemOperand();
3044 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3048void AMDGPUDAGToDAGISel::SelectTensorLoadStore(
SDNode *
N,
unsigned IntrID) {
3049 bool IsLoad = IntrID == Intrinsic::amdgcn_tensor_load_to_lds;
3051 IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d4 : AMDGPU::TENSOR_STORE_FROM_LDS_d4;
3063 Opc = IsLoad ? AMDGPU::TENSOR_LOAD_TO_LDS_d2
3064 : AMDGPU::TENSOR_STORE_FROM_LDS_d2;
3076 (void)
CurDAG->SelectNodeTo(
N,
Opc, MVT::Other, TensorOps);
3081 case Intrinsic::amdgcn_ds_gws_init:
3082 return AMDGPU::DS_GWS_INIT;
3083 case Intrinsic::amdgcn_ds_gws_barrier:
3084 return AMDGPU::DS_GWS_BARRIER;
3085 case Intrinsic::amdgcn_ds_gws_sema_v:
3086 return AMDGPU::DS_GWS_SEMA_V;
3087 case Intrinsic::amdgcn_ds_gws_sema_br:
3088 return AMDGPU::DS_GWS_SEMA_BR;
3089 case Intrinsic::amdgcn_ds_gws_sema_p:
3090 return AMDGPU::DS_GWS_SEMA_P;
3091 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3092 return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
3098void AMDGPUDAGToDAGISel::SelectDS_GWS(
SDNode *
N,
unsigned IntrID) {
3099 if (!Subtarget->hasGWS() ||
3100 (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
3101 !Subtarget->hasGWSSemaReleaseAll())) {
3108 const bool HasVSrc =
N->getNumOperands() == 4;
3109 assert(HasVSrc ||
N->getNumOperands() == 3);
3112 SDValue BaseOffset =
N->getOperand(HasVSrc ? 3 : 2);
3115 MachineMemOperand *MMO =
M->getMemOperand();
3128 glueCopyToM0(
N,
CurDAG->getTargetConstant(0, SL, MVT::i32));
3129 ImmOffset = ConstOffset->getZExtValue();
3131 if (
CurDAG->isBaseWithConstantOffset(BaseOffset)) {
3140 =
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
3144 =
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3146 CurDAG->getTargetConstant(16, SL, MVT::i32));
3147 glueCopyToM0(
N,
SDValue(M0Base, 0));
3151 SDValue OffsetField =
CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
3155 const MCInstrDesc &InstrDesc =
TII->get(
Opc);
3156 int Data0Idx = AMDGPU::getNamedOperandIdx(
Opc, AMDGPU::OpName::data0);
3158 const TargetRegisterClass *DataRC =
TII->getRegClass(InstrDesc, Data0Idx);
3162 const SIRegisterInfo *
TRI = Subtarget->getRegisterInfo();
3165 MVT DataVT =
Data.getValueType().getSimpleVT();
3166 if (
TRI->isTypeLegalForClass(*DataRC, DataVT)) {
3168 Ops.push_back(
N->getOperand(2));
3174 CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3176 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, MVT::i32),
3178 CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32)};
3181 SL, MVT::v2i32, RegSeqOps),
3186 Ops.push_back(OffsetField);
3187 Ops.push_back(Chain);
3189 SDNode *Selected =
CurDAG->SelectNodeTo(
N,
Opc,
N->getVTList(),
Ops);
3193void AMDGPUDAGToDAGISel::SelectInterpP1F16(
SDNode *
N) {
3194 if (Subtarget->getLDSBankCount() != 16) {
3224 SDVTList VTs =
CurDAG->getVTList(MVT::f32, MVT::Other);
3227 CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32,
DL, VTs, {
3228 CurDAG->getTargetConstant(2,
DL, MVT::i32),
3234 SDNode *InterpP1LV =
3235 CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16,
DL, MVT::f32, {
3236 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3240 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3243 CurDAG->getTargetConstant(0,
DL, MVT::i1),
3244 CurDAG->getTargetConstant(0,
DL, MVT::i32),
3251void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(
SDNode *
N) {
3252 unsigned IntrID =
N->getConstantOperandVal(1);
3254 case Intrinsic::amdgcn_ds_append:
3255 case Intrinsic::amdgcn_ds_consume: {
3256 if (
N->getValueType(0) != MVT::i32)
3258 SelectDSAppendConsume(
N, IntrID);
3261 case Intrinsic::amdgcn_ds_bvh_stack_rtn:
3262 case Intrinsic::amdgcn_ds_bvh_stack_push4_pop1_rtn:
3263 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop1_rtn:
3264 case Intrinsic::amdgcn_ds_bvh_stack_push8_pop2_rtn:
3265 SelectDSBvhStackIntrinsic(
N, IntrID);
3267 case Intrinsic::amdgcn_init_whole_wave:
3268 CurDAG->getMachineFunction()
3269 .getInfo<SIMachineFunctionInfo>()
3270 ->setInitWholeWave();
3277void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(
SDNode *
N) {
3278 unsigned IntrID =
N->getConstantOperandVal(0);
3279 unsigned Opcode = AMDGPU::INSTRUCTION_LIST_END;
3280 SDNode *ConvGlueNode =
N->getGluedNode();
3286 CurDAG->getMachineNode(TargetOpcode::CONVERGENCECTRL_GLUE, {},
3287 MVT::Glue,
SDValue(ConvGlueNode, 0));
3289 ConvGlueNode =
nullptr;
3292 case Intrinsic::amdgcn_wqm:
3293 Opcode = AMDGPU::WQM;
3295 case Intrinsic::amdgcn_softwqm:
3296 Opcode = AMDGPU::SOFT_WQM;
3298 case Intrinsic::amdgcn_wwm:
3299 case Intrinsic::amdgcn_strict_wwm:
3300 Opcode = AMDGPU::STRICT_WWM;
3302 case Intrinsic::amdgcn_strict_wqm:
3303 Opcode = AMDGPU::STRICT_WQM;
3305 case Intrinsic::amdgcn_interp_p1_f16:
3306 SelectInterpP1F16(
N);
3308 case Intrinsic::amdgcn_permlane16_swap:
3309 case Intrinsic::amdgcn_permlane32_swap: {
3310 if ((IntrID == Intrinsic::amdgcn_permlane16_swap &&
3311 !Subtarget->hasPermlane16Swap()) ||
3312 (IntrID == Intrinsic::amdgcn_permlane32_swap &&
3313 !Subtarget->hasPermlane32Swap())) {
3318 Opcode = IntrID == Intrinsic::amdgcn_permlane16_swap
3319 ? AMDGPU::V_PERMLANE16_SWAP_B32_e64
3320 : AMDGPU::V_PERMLANE32_SWAP_B32_e64;
3324 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3326 bool FI =
N->getConstantOperandVal(3);
3327 NewOps[2] =
CurDAG->getTargetConstant(
3330 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), NewOps);
3338 if (Opcode != AMDGPU::INSTRUCTION_LIST_END) {
3340 CurDAG->SelectNodeTo(
N, Opcode,
N->getVTList(), {Src});
3345 NewOps.push_back(
SDValue(ConvGlueNode, 0));
3346 CurDAG->MorphNodeTo(
N,
N->getOpcode(),
N->getVTList(), NewOps);
3350void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(
SDNode *
N) {
3351 unsigned IntrID =
N->getConstantOperandVal(1);
3353 case Intrinsic::amdgcn_ds_gws_init:
3354 case Intrinsic::amdgcn_ds_gws_barrier:
3355 case Intrinsic::amdgcn_ds_gws_sema_v:
3356 case Intrinsic::amdgcn_ds_gws_sema_br:
3357 case Intrinsic::amdgcn_ds_gws_sema_p:
3358 case Intrinsic::amdgcn_ds_gws_sema_release_all:
3359 SelectDS_GWS(
N, IntrID);
3361 case Intrinsic::amdgcn_tensor_load_to_lds:
3362 case Intrinsic::amdgcn_tensor_store_from_lds:
3363 SelectTensorLoadStore(
N, IntrID);
3372void AMDGPUDAGToDAGISel::SelectWAVE_ADDRESS(
SDNode *
N) {
3374 CurDAG->getTargetConstant(Subtarget->getWavefrontSizeLog2(), SDLoc(
N), MVT::i32);
3375 CurDAG->SelectNodeTo(
N, AMDGPU::S_LSHR_B32,
N->getVTList(),
3376 {N->getOperand(0), Log2WaveSize});
3379void AMDGPUDAGToDAGISel::SelectSTACKRESTORE(
SDNode *
N) {
3394 Subtarget->getWavefrontSizeLog2(), SL, MVT::i32);
3396 if (
N->isDivergent()) {
3397 SrcVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL,
3402 CopyVal =
SDValue(
CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
3403 {SrcVal, Log2WaveSize}),
3407 SDValue CopyToSP =
CurDAG->getCopyToReg(
N->getOperand(0), SL,
SP, CopyVal);
3411bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(
SDValue In,
SDValue &Src,
3413 bool IsCanonicalizing,
3414 bool AllowAbs)
const {
3420 Src = Src.getOperand(0);
3421 }
else if (Src.getOpcode() ==
ISD::FSUB && IsCanonicalizing) {
3425 if (
LHS &&
LHS->isZero()) {
3427 Src = Src.getOperand(1);
3431 if (AllowAbs && Src.getOpcode() ==
ISD::FABS) {
3433 Src = Src.getOperand(0);
3446 if (IsCanonicalizing)
3461 EVT VT = Src.getValueType();
3463 (VT != MVT::i32 && VT != MVT::v2i32 && VT != MVT::i64))
3470 auto ReplaceSrc = [&]() ->
SDValue {
3472 return Src.getOperand(0);
3477 Src.getValueType(),
LHS, Index);
3503 if (SelectVOP3ModsImpl(In, Src, Mods,
true,
3505 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3512bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
3515 if (SelectVOP3ModsImpl(In, Src, Mods,
false,
3517 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3524bool AMDGPUDAGToDAGISel::SelectVOP3BMods(
SDValue In,
SDValue &Src,
3527 if (SelectVOP3ModsImpl(In, Src, Mods,
3530 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3537bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(
SDValue In,
SDValue &Src)
const {
3545bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(
SDValue In,
SDValue &Src,
3549 if (SelectVOP3ModsImpl(In, Src, Mods,
3554 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3561bool AMDGPUDAGToDAGISel::SelectVINTERPMods(
SDValue In,
SDValue &Src,
3563 return SelectVINTERPModsImpl(In, Src, SrcMods,
false);
3566bool AMDGPUDAGToDAGISel::SelectVINTERPModsHi(
SDValue In,
SDValue &Src,
3568 return SelectVINTERPModsImpl(In, Src, SrcMods,
true);
3571bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(
SDValue In,
SDValue &Src,
3575 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3576 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3578 return SelectVOP3Mods(In, Src, SrcMods);
3581bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(
SDValue In,
SDValue &Src,
3585 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3586 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3588 return SelectVOP3BMods(In, Src, SrcMods);
3591bool AMDGPUDAGToDAGISel::SelectVOP3OMods(
SDValue In,
SDValue &Src,
3596 Clamp =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3597 Omod =
CurDAG->getTargetConstant(0,
DL, MVT::i1);
3602bool AMDGPUDAGToDAGISel::SelectVOP3PMods(
SDValue In,
SDValue &Src,
3603 SDValue &SrcMods,
bool IsDOT)
const {
3610 Src = Src.getOperand(0);
3614 (!IsDOT || !Subtarget->hasDOTOpSelHazard())) {
3615 unsigned VecMods = Mods;
3617 SDValue Lo = stripBitcast(Src.getOperand(0));
3618 SDValue Hi = stripBitcast(Src.getOperand(1));
3621 Lo = stripBitcast(
Lo.getOperand(0));
3626 Hi = stripBitcast(
Hi.getOperand(0));
3636 unsigned VecSize = Src.getValueSizeInBits();
3637 Lo = stripExtractLoElt(
Lo);
3638 Hi = stripExtractLoElt(
Hi);
3640 if (
Lo.getValueSizeInBits() > VecSize) {
3641 Lo =
CurDAG->getTargetExtractSubreg(
3642 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3646 if (
Hi.getValueSizeInBits() > VecSize) {
3647 Hi =
CurDAG->getTargetExtractSubreg(
3648 (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In),
3652 assert(
Lo.getValueSizeInBits() <= VecSize &&
3653 Hi.getValueSizeInBits() <= VecSize);
3655 if (
Lo ==
Hi && !isInlineImmediate(
Lo.getNode())) {
3659 if (VecSize ==
Lo.getValueSizeInBits()) {
3661 }
else if (VecSize == 32) {
3662 Src = createVOP3PSrc32FromLo16(
Lo, Src,
CurDAG, Subtarget);
3664 assert(
Lo.getValueSizeInBits() == 32 && VecSize == 64);
3668 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL,
3669 Lo.getValueType()), 0);
3670 auto RC =
Lo->isDivergent() ? AMDGPU::VReg_64RegClassID
3671 : AMDGPU::SReg_64RegClassID;
3673 CurDAG->getTargetConstant(RC, SL, MVT::i32),
3674 Lo,
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
3675 Undef,
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) };
3677 Src =
SDValue(
CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL,
3678 Src.getValueType(),
Ops), 0);
3680 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3686 .bitcastToAPInt().getZExtValue();
3688 Src =
CurDAG->getTargetConstant(
Lit, SDLoc(In), MVT::i64);
3689 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3696 Src.getNumOperands() == 2) {
3702 ArrayRef<int>
Mask = SVN->getMask();
3704 if (Mask[0] < 2 && Mask[1] < 2) {
3706 SDValue ShuffleSrc = SVN->getOperand(0);
3719 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3727 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3731bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(
SDValue In,
SDValue &Src,
3733 return SelectVOP3PMods(In, Src, SrcMods,
true);
3736bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsDOT(
SDValue In,
SDValue &Src)
const {
3738 SelectVOP3PMods(In, SrcTmp, SrcModsTmp,
true);
3747bool AMDGPUDAGToDAGISel::SelectVOP3PModsF32(
SDValue In,
SDValue &Src,
3749 SelectVOP3Mods(In, Src, SrcMods);
3752 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3756bool AMDGPUDAGToDAGISel::SelectVOP3PNoModsF32(
SDValue In,
SDValue &Src)
const {
3758 SelectVOP3PModsF32(In, SrcTmp, SrcModsTmp);
3767bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(
SDValue In,
3770 assert(
C->getAPIntValue().getBitWidth() == 1 &&
"expected i1 value");
3773 unsigned SrcVal =
C->getZExtValue();
3777 Src =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3784 unsigned DstRegClass;
3786 switch (Elts.
size()) {
3788 DstRegClass = AMDGPU::VReg_256RegClassID;
3792 DstRegClass = AMDGPU::VReg_128RegClassID;
3796 DstRegClass = AMDGPU::VReg_64RegClassID;
3804 Ops.push_back(
CurDAG->getTargetConstant(DstRegClass,
DL, MVT::i32));
3805 for (
unsigned i = 0; i < Elts.
size(); ++i) {
3806 Ops.push_back(Elts[i]);
3807 Ops.push_back(
CurDAG->getTargetConstant(
3810 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
DL, DstTy,
Ops);
3817 assert(
"unhandled Reg sequence size" &&
3818 (Elts.
size() == 8 || Elts.
size() == 16));
3822 for (
unsigned i = 0; i < Elts.
size(); i += 2) {
3823 SDValue LoSrc = stripExtractLoElt(stripBitcast(Elts[i]));
3828 if (Subtarget->useRealTrue16Insts()) {
3833 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
DL, MVT::i16),
3836 emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID, MVT::i32,
3837 {Elts[i],
Undef}, {AMDGPU::lo16, AMDGPU::hi16},
DL);
3838 Elts[i + 1] = emitRegSequence(*
CurDAG, AMDGPU::VGPR_32RegClassID,
3839 MVT::i32, {Elts[i + 1],
Undef},
3840 {AMDGPU::lo16, AMDGPU::hi16},
DL);
3842 SDValue PackLoLo =
CurDAG->getTargetConstant(0x05040100,
DL, MVT::i32);
3844 CurDAG->getMachineNode(AMDGPU::V_PERM_B32_e64,
DL, MVT::i32,
3845 {Elts[i + 1], Elts[i], PackLoLo});
3849 return buildRegSequence32(PackedElts,
DL);
3855 unsigned ElementSize)
const {
3856 if (ElementSize == 16)
3857 return buildRegSequence16(Elts,
DL);
3858 if (ElementSize == 32)
3859 return buildRegSequence32(Elts,
DL);
3863void AMDGPUDAGToDAGISel::selectWMMAModsNegAbs(
unsigned ModOpcode,
3867 unsigned ElementSize)
const {
3872 for (
auto El : Elts) {
3875 NegAbsElts.
push_back(El->getOperand(0));
3877 if (Elts.size() != NegAbsElts.
size()) {
3879 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3883 Src =
SDValue(buildRegSequence(NegAbsElts,
DL, ElementSize), 0);
3889 Src =
SDValue(buildRegSequence(Elts,
DL, ElementSize), 0);
3897 std::function<
bool(
SDValue)> ModifierCheck) {
3901 for (
unsigned i = 0; i < F16Pair->getNumOperands(); ++i) {
3902 SDValue ElF16 = stripBitcast(F16Pair->getOperand(i));
3903 if (!ModifierCheck(ElF16))
3910bool AMDGPUDAGToDAGISel::SelectWMMAModsF16Neg(
SDValue In,
SDValue &Src,
3928 Src =
SDValue(buildRegSequence16(EltsF16, SDLoc(In)), 0);
3947 Src =
SDValue(buildRegSequence32(EltsV2F16, SDLoc(In)), 0);
3953 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
3957bool AMDGPUDAGToDAGISel::SelectWMMAModsF16NegAbs(
SDValue In,
SDValue &Src,
3968 if (EltsF16.
empty())
3978 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF16, Src, SDLoc(In), 16);
3988 if (EltsV2F16.
empty())
3997 selectWMMAModsNegAbs(ModOpcode, Mods, EltsV2F16, Src, SDLoc(In), 32);
4000 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4004bool AMDGPUDAGToDAGISel::SelectWMMAModsF32NegAbs(
SDValue In,
SDValue &Src,
4014 unsigned ModOpcode =
4025 selectWMMAModsNegAbs(ModOpcode, Mods, EltsF32, Src, SDLoc(In), 32);
4028 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4032bool AMDGPUDAGToDAGISel::SelectWMMAVISrc(
SDValue In,
SDValue &Src)
const {
4034 BitVector UndefElements;
4036 if (isInlineImmediate(
Splat.getNode())) {
4038 unsigned Imm =
C->getAPIntValue().getSExtValue();
4039 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4043 unsigned Imm =
C->getValueAPF().bitcastToAPInt().getSExtValue();
4044 Src =
CurDAG->getTargetConstant(Imm, SDLoc(In), MVT::i32);
4052 SDValue SplatSrc32 = stripBitcast(In);
4054 if (
SDValue Splat32 = SplatSrc32BV->getSplatValue()) {
4055 SDValue SplatSrc16 = stripBitcast(Splat32);
4058 const SIInstrInfo *
TII = Subtarget->getInstrInfo();
4059 std::optional<APInt> RawValue;
4061 RawValue =
C->getValueAPF().bitcastToAPInt();
4063 RawValue =
C->getAPIntValue();
4065 if (RawValue.has_value()) {
4066 EVT VT =
In.getValueType().getScalarType();
4072 if (
TII->isInlineConstant(FloatVal)) {
4073 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4078 if (
TII->isInlineConstant(RawValue.value())) {
4079 Src =
CurDAG->getTargetConstant(RawValue.value(), SDLoc(In),
4092bool AMDGPUDAGToDAGISel::SelectSWMMACIndex8(
SDValue In,
SDValue &Src,
4098 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4107 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4111bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(
SDValue In,
SDValue &Src,
4117 const llvm::SDValue &ShiftSrc =
In.getOperand(0);
4126 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4130bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(
SDValue In,
SDValue &Src,
4138 const SDValue &ExtendSrc =
In.getOperand(0);
4142 const SDValue &CastSrc =
In.getOperand(0);
4146 if (Zero &&
Zero->getZExtValue() == 0)
4157 Src = ExtractVecEltSrc;
4161 IndexKey =
CurDAG->getTargetConstant(
Key, SDLoc(In), MVT::i32);
4165bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(
SDValue In,
SDValue &Src,
4169 SrcMods =
CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
4173bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(
SDValue In,
SDValue &Src,
4176 return SelectVOP3Mods(In, Src, SrcMods);
4188 Op =
Op.getOperand(0);
4190 IsExtractHigh =
false;
4193 if (!Low16 || !Low16->isZero())
4195 Op = stripBitcast(
Op.getOperand(1));
4196 if (
Op.getValueType() != MVT::bf16)
4201 if (
Op.getValueType() != MVT::i32)
4206 if (Mask->getZExtValue() == 0xffff0000) {
4207 IsExtractHigh =
true;
4208 return Op.getOperand(0);
4217 return Op.getOperand(0);
4226bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(
SDValue In,
SDValue &Src,
4230 SelectVOP3ModsImpl(In, Src, Mods);
4232 bool IsExtractHigh =
false;
4234 Src = Src.getOperand(0);
4235 }
else if (VT == MVT::bf16) {
4243 if (Src.getValueType() != VT &&
4244 (VT != MVT::bf16 || Src.getValueType() != MVT::i32))
4247 Src = stripBitcast(Src);
4253 SelectVOP3ModsImpl(Src, Src, ModsTmp);
4268 if (Src.getValueSizeInBits() == 16) {
4277 Src.getOperand(0).getValueType() == MVT::i32) {
4278 Src = Src.getOperand(0);
4282 if (Subtarget->useRealTrue16Insts())
4284 Src = createVOP3PSrc32FromLo16(Src, In,
CurDAG, Subtarget);
4285 }
else if (IsExtractHigh)
4291bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(
SDValue In,
SDValue &Src,
4294 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16))
4296 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4300bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(
SDValue In,
SDValue &Src,
4303 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::f16);
4304 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4308bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16ModsExt(
SDValue In,
SDValue &Src,
4311 if (!SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16))
4313 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4317bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixBF16Mods(
SDValue In,
SDValue &Src,
4320 SelectVOP3PMadMixModsImpl(In, Src, Mods, MVT::bf16);
4321 SrcMods =
CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
4329 unsigned NumOpcodes = 0;
4342 const uint8_t SrcBits[3] = { 0xf0, 0xcc, 0xaa };
4345 if (
C->isAllOnes()) {
4355 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4369 if (Src.size() == 3) {
4375 if (
C->isAllOnes()) {
4377 for (
unsigned I = 0;
I < Src.size(); ++
I) {
4378 if (Src[
I] ==
LHS) {
4390 Bits = SrcBits[Src.size()];
4395 switch (In.getOpcode()) {
4403 if (!getOperandBits(
LHS, LHSBits) ||
4404 !getOperandBits(
RHS, RHSBits)) {
4405 Src = std::move(Backup);
4406 return std::make_pair(0, 0);
4412 NumOpcodes +=
Op.first;
4413 LHSBits =
Op.second;
4418 NumOpcodes +=
Op.first;
4419 RHSBits =
Op.second;
4424 return std::make_pair(0, 0);
4428 switch (In.getOpcode()) {
4430 TTbl = LHSBits & RHSBits;
4433 TTbl = LHSBits | RHSBits;
4436 TTbl = LHSBits ^ RHSBits;
4442 return std::make_pair(NumOpcodes + 1, TTbl);
4449 unsigned NumOpcodes;
4451 std::tie(NumOpcodes, TTbl) =
BitOp3_Op(In, Src);
4455 if (NumOpcodes < 2 || Src.empty())
4461 if (NumOpcodes < 4 && !In->isDivergent())
4464 if (NumOpcodes == 2 &&
In.getValueType() == MVT::i32) {
4469 (
In.getOperand(0).getOpcode() ==
In.getOpcode() ||
4470 In.getOperand(1).getOpcode() ==
In.getOpcode()))
4484 while (Src.size() < 3)
4485 Src.push_back(Src[0]);
4491 Tbl =
CurDAG->getTargetConstant(TTbl, SDLoc(In), MVT::i32);
4497 return CurDAG->getUNDEF(MVT::i32);
4501 return CurDAG->getConstant(
C->getZExtValue() << 16, SL, MVT::i32);
4506 return CurDAG->getConstant(
4507 C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
4517bool AMDGPUDAGToDAGISel::isVGPRImm(
const SDNode *
N)
const {
4518 assert(
CurDAG->getTarget().getTargetTriple().isAMDGCN());
4520 const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
4521 const SIInstrInfo *SII = Subtarget->getInstrInfo();
4524 bool AllUsesAcceptSReg =
true;
4526 Limit < 10 && U !=
E; ++U, ++Limit) {
4527 const TargetRegisterClass *RC =
4528 getOperandRegClass(
U->getUser(),
U->getOperandNo());
4536 if (RC != &AMDGPU::VS_32RegClass && RC != &AMDGPU::VS_64RegClass &&
4537 RC != &AMDGPU::VS_64_Align2RegClass) {
4538 AllUsesAcceptSReg =
false;
4539 SDNode *
User =
U->getUser();
4540 if (
User->isMachineOpcode()) {
4541 unsigned Opc =
User->getMachineOpcode();
4542 const MCInstrDesc &
Desc = SII->get(
Opc);
4543 if (
Desc.isCommutable()) {
4544 unsigned OpIdx =
Desc.getNumDefs() +
U->getOperandNo();
4547 unsigned CommutedOpNo = CommuteIdx1 -
Desc.getNumDefs();
4548 const TargetRegisterClass *CommutedRC =
4549 getOperandRegClass(
U->getUser(), CommutedOpNo);
4550 if (CommutedRC == &AMDGPU::VS_32RegClass ||
4551 CommutedRC == &AMDGPU::VS_64RegClass ||
4552 CommutedRC == &AMDGPU::VS_64_Align2RegClass)
4553 AllUsesAcceptSReg =
true;
4561 if (!AllUsesAcceptSReg)
4565 return !AllUsesAcceptSReg && (Limit < 10);
4568bool AMDGPUDAGToDAGISel::isUniformLoad(
const SDNode *
N)
const {
4570 const MachineMemOperand *MMO = Ld->getMemOperand();
4588 (Subtarget->getScalarizeGlobalBehavior() &&
4592 ->isMemOpHasNoClobberedMemOperand(
N)));
4598 bool IsModified =
false;
4604 while (Position !=
CurDAG->allnodes_end()) {
4611 if (ResNode !=
Node) {
4617 CurDAG->RemoveDeadNodes();
4618 }
while (IsModified);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, SDValue &N0, SDValue &N1)
static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr)
static SDValue matchExtFromI32orI32(SDValue Op, bool IsSigned, const SelectionDAG *DAG)
static MemSDNode * findMemSDNode(SDNode *N)
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val)
static SDValue combineBallotPattern(SDValue VCMP, bool &Negate)
static SDValue matchBF16FPExtendLike(SDValue Op, bool &IsExtractHigh)
static void checkWMMAElementsModifiersF16(BuildVectorSDNode *BV, std::function< bool(SDValue)> ModifierCheck)
Defines an instruction selector for the AMDGPU target.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
static bool isNoUnsignedWrap(MachineInstr *Addr)
static bool isExtractHiElt(MachineRegisterInfo &MRI, Register In, Register &Out)
static std::pair< unsigned, uint8_t > BitOp3_Op(Register R, SmallVectorImpl< Register > &Src, const MachineRegisterInfo &MRI)
static unsigned gwsIntrinToOpcode(unsigned IntrID)
Provides AMDGPU specific target descriptions.
Base class for AMDGPU specific classes of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Register const TargetRegisterInfo * TRI
Promote Memory to Register
MachineInstr unsigned OpIdx
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Provides R600 specific target descriptions.
Interface definition for R600RegisterInfo.
const SmallVectorImpl< MachineOperand > & Cond
SI DAG Lowering interface definition.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
AMDGPUDAGToDAGISelLegacy(TargetMachine &TM, CodeGenOptLevel OptLevel)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
AMDGPU specific code to select AMDGPU machine instructions for SelectionDAG operations.
bool isSDWAOperand(const SDNode *N) const
void SelectBuildVector(SDNode *N, unsigned RegClassID)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
bool runOnMachineFunction(MachineFunction &MF) override
void SelectVectorShuffle(SDNode *N)
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
AMDGPUDAGToDAGISel()=delete
void PostprocessISelDAG() override
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
bool matchLoadD16FromBuildVector(SDNode *N) const
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
AMDGPUISelDAGToDAGPass(TargetMachine &TM)
static SDValue stripBitcast(SDValue Val)
static const fltSemantics & BFloat()
static const fltSemantics & IEEEhalf()
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
bool isSignMask() const
Check if the APInt's value is returned by getSignMask.
bool isMaxSignedValue() const
Determine if this is the largest signed value.
int64_t getSExtValue() const
Get sign extended value.
unsigned countr_one() const
Count the number of trailing one bits.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
LLVM Basic Block Representation.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI SDValue getSplatValue(const APInt &DemandedElts, BitVector *UndefElements=nullptr) const
Returns the demanded splatted value or a null value if this is not a splat.
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
FunctionPass class - This class is used to implement most global optimizations.
const SIInstrInfo * getInstrInfo() const override
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Generation getGeneration() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
TypeSize getValue() const
Analysis pass that exposes the LoopInfo for a function.
SmallVector< LoopT *, 4 > getLoopsInPreorder() const
Return all of the loops in the function in preorder across the loop nests, with siblings in forward p...
The legacy pass manager's analysis pass to compute loop information.
static MVT getIntegerVT(unsigned BitWidth)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
LocationSize getSize() const
Return the size in bytes of the memory reference.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This is an abstract virtual class for memory operations.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A set of analyses that are preserved following a run of a transformation pass.
Wrapper class representing virtual and physical registers.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool isAnyAdd() const
Returns true if the node type is ADD or PTRADD.
static use_iterator use_end()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
static unsigned getMaxMUBUFImmOffset(const GCNSubtarget &ST)
bool findCommutedOpIndices(const MachineInstr &MI, unsigned &SrcOpIdx0, unsigned &SrcOpIdx1) const override
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
static bool isSGPRClass(const TargetRegisterClass *RC)
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SelectionDAGISelLegacy(char &ID, std::unique_ptr< SelectionDAGISel > S)
SelectionDAGISelPass(std::unique_ptr< SelectionDAGISel > Selector)
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
std::unique_ptr< FunctionLoweringInfo > FuncInfo
const TargetLowering * TLI
const TargetInstrInfo * TII
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
SelectionDAGISel(TargetMachine &tm, CodeGenOptLevel OL=CodeGenOptLevel::Default)
virtual bool runOnMachineFunction(MachineFunction &mf)
const TargetLowering * getTargetLowering() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
static const unsigned CommuteAnyOperandIndex
Primary interface to the complete machine description for the target machine.
unsigned getID() const
Return the register class ID number.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ REGION_ADDRESS
Address space for region memory. (GDS)
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool isGFX12Plus(const MCSubtargetInfo &STI)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isUniformMMO(const MachineMemOperand *MMO)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ PTRADD
PTRADD represents pointer arithmetic semantics, for targets that opt in using shouldPreservePtrArith(...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
@ ADD
Simple integer binary arithmetic operators.
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
@ FADD
Simple binary floating point operators.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ CONVERGENCECTRL_GLUE
This does not correspond to any convergence control intrinsic.
@ SIGN_EXTEND
Conversion operators.
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
@ UNDEF
UNDEF - An undefined node.
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
@ SHL
Shift and rotation operations.
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
@ AND
Bitwise operators - logical and, logical or, logical xor.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
@ BRCOND
BRCOND - Conditional branch.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
bool isBoolSGPR(SDValue V)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool getConstantValue(SDValue N, uint32_t &Out)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionPass * createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel)
This pass converts a legalized DAG into a AMDGPU-specific.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Implement std::hash so that hash_code can be used in STL containers.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
uint64_t getScalarSizeInBits() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
static KnownBits add(const KnownBits &LHS, const KnownBits &RHS, bool NSW=false, bool NUW=false, bool SelfAdd=false)
Compute knownbits resulting from addition of LHS and RHS.
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
APInt getMinValue() const
Return the minimal unsigned value possible given these KnownBits.
static unsigned getSubRegFromChannel(unsigned Channel)
bool hasNoUnsignedWrap() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.