19#define DEBUG_TYPE "si-shrink-instructions"
22 "Number of 64-bit instruction reduced to 32-bit.");
24 "Number of literal constants folded into 32-bit instructions.");
30enum ChangeKind {
None, UpdateHint, UpdateInst };
32class SIShrinkInstructions {
34 MachineRegisterInfo *MRI;
35 const GCNSubtarget *ST;
36 const SIInstrInfo *TII;
37 const SIRegisterInfo *TRI;
40 bool foldImmediates(MachineInstr &
MI,
bool TryToCommute =
true)
const;
41 bool shouldShrinkTrue16(MachineInstr &
MI)
const;
43 bool isKUImmOperand(
const MachineOperand &Src)
const;
44 bool isKImmOrKUImmOperand(
const MachineOperand &Src,
bool &IsUnsigned)
const;
45 void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &
MI)
const;
46 bool shrinkScalarCompare(MachineInstr &
MI)
const;
47 bool shrinkMIMG(MachineInstr &
MI)
const;
48 bool shrinkMadFma(MachineInstr &
MI)
const;
49 ChangeKind shrinkScalarLogicOp(MachineInstr &
MI)
const;
50 bool tryReplaceDeadSDST(MachineInstr &
MI)
const;
53 bool instReadsReg(
const MachineInstr *
MI,
unsigned Reg,
54 unsigned SubReg)
const;
55 bool instModifiesReg(
const MachineInstr *
MI,
unsigned Reg,
56 unsigned SubReg)
const;
57 TargetInstrInfo::RegSubRegPair getSubRegForIndex(
Register Reg,
unsigned Sub,
59 void dropInstructionKeepingImpDefs(MachineInstr &
MI)
const;
60 MachineInstr *matchSwap(MachineInstr &MovT)
const;
63 SIShrinkInstructions() =
default;
64 bool run(MachineFunction &MF);
72 SIShrinkInstructionsLegacy() : MachineFunctionPass(ID) {}
74 bool runOnMachineFunction(MachineFunction &MF)
override;
76 StringRef getPassName()
const override {
return "SI Shrink Instructions"; }
78 void getAnalysisUsage(AnalysisUsage &AU)
const override {
87 "SI Shrink Instructions",
false,
false)
89char SIShrinkInstructionsLegacy::
ID = 0;
92 return new SIShrinkInstructionsLegacy();
99 bool TryToCommute)
const {
102 int Src0Idx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::src0);
105 MachineOperand &Src0 =
MI.getOperand(Src0Idx);
110 if (Def &&
Def->isMoveImmediate()) {
111 MachineOperand &MovSrc =
Def->getOperand(1);
112 bool ConstantFolded =
false;
114 if (
TII->isOperandLegal(
MI, Src0Idx, &MovSrc)) {
115 if (MovSrc.
isImm()) {
117 ConstantFolded =
true;
118 }
else if (MovSrc.
isFI()) {
120 ConstantFolded =
true;
124 ConstantFolded =
true;
128 if (ConstantFolded) {
130 Def->eraseFromParent();
131 ++NumLiteralConstantsFolded;
139 if (TryToCommute &&
MI.isCommutable()) {
140 if (
TII->commuteInstruction(
MI)) {
141 if (foldImmediates(
MI,
false))
145 TII->commuteInstruction(
MI);
154bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &
MI)
const {
155 for (
unsigned I = 0,
E =
MI.getNumExplicitOperands();
I !=
E; ++
I) {
156 const MachineOperand &MO =
MI.getOperand(
I);
160 "True16 Instructions post-RA");
173bool SIShrinkInstructions::isKImmOperand(
const MachineOperand &Src)
const {
175 !
TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
178bool SIShrinkInstructions::isKUImmOperand(
const MachineOperand &Src)
const {
180 !
TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
183bool SIShrinkInstructions::isKImmOrKUImmOperand(
const MachineOperand &Src,
184 bool &IsUnsigned)
const {
187 return !
TII->isInlineConstant(Src);
192 return !
TII->isInlineConstant(Src);
209 int32_t &ModifiedImm,
bool Scalar) {
210 if (
TII->isInlineConstant(Src))
212 int32_t SrcImm =
static_cast<int32_t
>(Src.getImm());
218 ModifiedImm = ~SrcImm;
219 if (
TII->isInlineConstant(
APInt(32, ModifiedImm,
true)))
220 return AMDGPU::V_NOT_B32_e32;
224 if (
TII->isInlineConstant(
APInt(32, ModifiedImm,
true)))
225 return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;
232void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
233 MachineInstr &
MI)
const {
234 MachineFunction &MF = *
MI.getMF();
235 for (
unsigned i =
MI.getDesc().getNumOperands() +
236 MI.getDesc().implicit_uses().size() +
237 MI.getDesc().implicit_defs().size(),
238 e =
MI.getNumOperands();
240 const MachineOperand &MO =
MI.getOperand(i);
246bool SIShrinkInstructions::shrinkScalarCompare(MachineInstr &
MI)
const {
253 if (!
MI.getOperand(0).isReg()) {
254 if (
TII->commuteInstruction(
MI,
false, 0, 1))
259 const MachineOperand &Src0 =
MI.getOperand(0);
263 MachineOperand &Src1 =
MI.getOperand(1);
273 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
275 if (isKImmOrKUImmOperand(Src1, HasUImm)) {
277 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
278 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
282 MI.setDesc(
TII->get(SOPKOpc));
289 const MCInstrDesc &NewDesc =
TII->get(SOPKOpc);
302bool SIShrinkInstructions::shrinkMIMG(MachineInstr &
MI)
const {
308 switch (
Info->MIMGEncoding) {
309 case AMDGPU::MIMGEncGfx10NSA:
310 NewEncoding = AMDGPU::MIMGEncGfx10Default;
312 case AMDGPU::MIMGEncGfx11NSA:
313 NewEncoding = AMDGPU::MIMGEncGfx11Default;
320 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vaddr0);
321 unsigned NewAddrDwords =
Info->VAddrDwords;
322 const TargetRegisterClass *RC;
324 if (
Info->VAddrDwords == 2) {
325 RC = &AMDGPU::VReg_64RegClass;
326 }
else if (
Info->VAddrDwords == 3) {
327 RC = &AMDGPU::VReg_96RegClass;
328 }
else if (
Info->VAddrDwords == 4) {
329 RC = &AMDGPU::VReg_128RegClass;
330 }
else if (
Info->VAddrDwords == 5) {
331 RC = &AMDGPU::VReg_160RegClass;
332 }
else if (
Info->VAddrDwords == 6) {
333 RC = &AMDGPU::VReg_192RegClass;
334 }
else if (
Info->VAddrDwords == 7) {
335 RC = &AMDGPU::VReg_224RegClass;
336 }
else if (
Info->VAddrDwords == 8) {
337 RC = &AMDGPU::VReg_256RegClass;
338 }
else if (
Info->VAddrDwords == 9) {
339 RC = &AMDGPU::VReg_288RegClass;
340 }
else if (
Info->VAddrDwords == 10) {
341 RC = &AMDGPU::VReg_320RegClass;
342 }
else if (
Info->VAddrDwords == 11) {
343 RC = &AMDGPU::VReg_352RegClass;
344 }
else if (
Info->VAddrDwords == 12) {
345 RC = &AMDGPU::VReg_384RegClass;
347 RC = &AMDGPU::VReg_512RegClass;
351 unsigned VgprBase = 0;
352 unsigned NextVgpr = 0;
354 bool IsKill = NewAddrDwords ==
Info->VAddrDwords;
356 const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;
357 const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize :
Info->VAddrOperands;
358 for (
unsigned Idx = 0; Idx < EndVAddr; ++Idx) {
359 const MachineOperand &
Op =
MI.getOperand(VAddr0Idx + Idx);
360 unsigned Vgpr =
TRI->getHWRegIndex(
Op.getReg());
361 unsigned Dwords =
TRI->getRegSizeInBits(
Op.getReg(), *MRI) / 32;
362 assert(Dwords > 0 &&
"Un-implemented for less than 32 bit regs");
366 NextVgpr = Vgpr + Dwords;
367 }
else if (Vgpr == NextVgpr) {
368 NextVgpr = Vgpr + Dwords;
379 if (VgprBase + NewAddrDwords > 256)
384 int TFEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::tfe);
385 int LWEIdx = AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::lwe);
386 unsigned TFEVal = (TFEIdx == -1) ? 0 :
MI.getOperand(TFEIdx).
getImm();
387 unsigned LWEVal = (LWEIdx == -1) ? 0 :
MI.getOperand(LWEIdx).
getImm();
389 if (TFEVal || LWEVal) {
391 for (
unsigned i = LWEIdx + 1, e =
MI.getNumOperands(); i != e; ++i) {
392 if (
MI.getOperand(i).isReg() &&
MI.getOperand(i).isTied() &&
393 MI.getOperand(i).isImplicit()) {
397 "found more than one tied implicit operand when expecting only 1");
399 MI.untieRegOperand(ToUntie);
405 Info->VDataDwords, NewAddrDwords);
406 MI.setDesc(
TII->get(NewOpcode));
408 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
409 MI.getOperand(VAddr0Idx).setIsKill(IsKill);
411 for (
unsigned i = 1; i < EndVAddr; ++i)
412 MI.removeOperand(VAddr0Idx + 1);
416 AMDGPU::getNamedOperandIdx(
MI.getOpcode(), AMDGPU::OpName::vdata),
417 ToUntie - (EndVAddr - 1));
423bool SIShrinkInstructions::shrinkMadFma(MachineInstr &
MI)
const {
426 if (!ST->hasVOP3Literal())
433 if (
TII->hasAnyModifiersSet(
MI))
436 const unsigned Opcode =
MI.getOpcode();
437 MachineOperand &Src0 = *
TII->getNamedOperand(
MI, AMDGPU::OpName::src0);
438 MachineOperand &Src1 = *
TII->getNamedOperand(
MI, AMDGPU::OpName::src1);
439 MachineOperand &Src2 = *
TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
440 unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
445 if (Src2.
isImm() && !
TII->isInlineConstant(Src2)) {
456 case AMDGPU::V_MAD_F32_e64:
457 NewOpcode = AMDGPU::V_MADAK_F32;
459 case AMDGPU::V_FMA_F32_e64:
460 NewOpcode = AMDGPU::V_FMAAK_F32;
462 case AMDGPU::V_MAD_F16_e64:
463 NewOpcode = AMDGPU::V_MADAK_F16;
465 case AMDGPU::V_FMA_F16_e64:
466 case AMDGPU::V_FMA_F16_gfx9_e64:
467 NewOpcode = AMDGPU::V_FMAAK_F16;
469 case AMDGPU::V_FMA_F16_gfx9_t16_e64:
470 NewOpcode = AMDGPU::V_FMAAK_F16_t16;
472 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
473 NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
475 case AMDGPU::V_FMA_F64_e64:
477 NewOpcode = AMDGPU::V_FMAAK_F64;
484 if (Src1.
isImm() && !
TII->isInlineConstant(Src1))
486 else if (Src0.
isImm() && !
TII->isInlineConstant(Src0))
494 case AMDGPU::V_MAD_F32_e64:
495 NewOpcode = AMDGPU::V_MADMK_F32;
497 case AMDGPU::V_FMA_F32_e64:
498 NewOpcode = AMDGPU::V_FMAMK_F32;
500 case AMDGPU::V_MAD_F16_e64:
501 NewOpcode = AMDGPU::V_MADMK_F16;
503 case AMDGPU::V_FMA_F16_e64:
504 case AMDGPU::V_FMA_F16_gfx9_e64:
505 NewOpcode = AMDGPU::V_FMAMK_F16;
507 case AMDGPU::V_FMA_F16_gfx9_t16_e64:
508 NewOpcode = AMDGPU::V_FMAMK_F16_t16;
510 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
511 NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
513 case AMDGPU::V_FMA_F64_e64:
515 NewOpcode = AMDGPU::V_FMAMK_F64;
520 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
529 MI.getOperand(0).getReg())
534 MI.eraseFromParent();
536 TII->removeModOperands(
MI);
537 MI.setDesc(
TII->get(NewOpcode));
549ChangeKind SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &
MI)
const {
550 unsigned Opc =
MI.getOpcode();
551 const MachineOperand *Dest = &
MI.getOperand(0);
552 MachineOperand *Src0 = &
MI.getOperand(1);
553 MachineOperand *Src1 = &
MI.getOperand(2);
554 MachineOperand *SrcReg = Src0;
555 MachineOperand *SrcImm = Src1;
557 if (!SrcImm->
isImm() ||
559 return ChangeKind::None;
561 uint32_t
Imm =
static_cast<uint32_t
>(SrcImm->
getImm());
564 if (
Opc == AMDGPU::S_AND_B32) {
566 MI.findRegisterDefOperand(AMDGPU::SCC,
nullptr)->isDead()) {
568 Opc = AMDGPU::S_BITSET0_B32;
571 Opc = AMDGPU::S_ANDN2_B32;
573 }
else if (
Opc == AMDGPU::S_OR_B32) {
575 MI.findRegisterDefOperand(AMDGPU::SCC,
nullptr)->isDead()) {
577 Opc = AMDGPU::S_BITSET1_B32;
580 Opc = AMDGPU::S_ORN2_B32;
582 }
else if (
Opc == AMDGPU::S_XOR_B32) {
585 Opc = AMDGPU::S_XNOR_B32;
595 return ChangeKind::UpdateHint;
599 const bool IsUndef = SrcReg->
isUndef();
600 const bool IsKill = SrcReg->
isKill();
602 if (
Opc == AMDGPU::S_BITSET0_B32 ||
603 Opc == AMDGPU::S_BITSET1_B32) {
606 MI.getOperand(2).ChangeToRegister(Dest->
getReg(),
false,
609 MI.tieOperands(0, 2);
613 return ChangeKind::UpdateInst;
617 return ChangeKind::None;
622bool SIShrinkInstructions::instAccessReg(
624 unsigned SubReg)
const {
625 for (
const MachineOperand &MO : R) {
633 LaneBitmask Overlap =
TRI->getSubRegIndexLaneMask(SubReg) &
642bool SIShrinkInstructions::instReadsReg(
const MachineInstr *
MI,
unsigned Reg,
643 unsigned SubReg)
const {
644 return instAccessReg(
MI->uses(),
Reg, SubReg);
647bool SIShrinkInstructions::instModifiesReg(
const MachineInstr *
MI,
unsigned Reg,
648 unsigned SubReg)
const {
649 return instAccessReg(
MI->defs(),
Reg, SubReg);
652TargetInstrInfo::RegSubRegPair
653SIShrinkInstructions::getSubRegForIndex(
Register Reg,
unsigned Sub,
655 if (
TRI->getRegSizeInBits(
Reg, *MRI) != 32) {
659 Sub =
TRI->getSubRegFromChannel(
I +
TRI->getChannelFromSubReg(
Sub));
662 return TargetInstrInfo::RegSubRegPair(
Reg,
Sub);
665void SIShrinkInstructions::dropInstructionKeepingImpDefs(
666 MachineInstr &
MI)
const {
667 for (
unsigned i =
MI.getDesc().getNumOperands() +
668 MI.getDesc().implicit_uses().size() +
669 MI.getDesc().implicit_defs().size(),
670 e =
MI.getNumOperands();
672 const MachineOperand &
Op =
MI.getOperand(i);
676 TII->get(AMDGPU::IMPLICIT_DEF),
Op.getReg());
679 MI.eraseFromParent();
701MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT)
const {
703 MovT.
getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
717 unsigned Size =
TII->getOpSize(MovT, 0);
721 if (
Size == 2 &&
X.isVirtual())
724 if (!
TRI->isVGPR(*MRI,
X))
727 const unsigned SearchLimit = 16;
730 MachineInstr *MovX =
nullptr;
731 MachineInstr *InsertionPt =
nullptr;
732 MachineInstr *MovY =
nullptr;
736 Iter !=
E &&
Count < SearchLimit; ++Iter) {
737 if (Iter->isDebugInstr())
741 if (instModifiesReg(&*Iter,
T, Tsub))
746 if ((Iter->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
747 Iter->getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
748 Iter->getOpcode() == AMDGPU::COPY) &&
749 Iter->getOperand(0).getReg() ==
X &&
750 Iter->getOperand(0).getSubReg() == Xsub &&
751 Iter->getOperand(1).isReg()) {
755 }
else if (instModifiesReg(&*Iter,
X, Xsub)) {
762 if ((Iter->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
763 Iter->getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
764 Iter->getOpcode() == AMDGPU::COPY) &&
765 Iter->getOperand(0).getReg() ==
Y &&
766 Iter->getOperand(0).getSubReg() == Ysub &&
767 Iter->getOperand(1).isReg() && Iter->getOperand(1).getReg() ==
T &&
768 Iter->getOperand(1).getSubReg() == Tsub) {
777 if (instModifiesReg(&*Iter,
Y, Ysub))
783 (instReadsReg(&*Iter,
X, Xsub) || instModifiesReg(&*Iter,
X, Xsub))) {
784 InsertionPt = &*Iter;
790 if (instReadsReg(&*Iter,
Y, Ysub))
796 LLVM_DEBUG(
dbgs() <<
"Matched v_swap:\n" << MovT << *MovX << *MovY);
799 SmallVector<MachineInstr *, 4> Swaps;
805 TII->get(AMDGPU::V_SWAP_B16))
814 for (
unsigned I = 0;
I <
Size / 4; ++
I) {
815 TargetInstrInfo::RegSubRegPair X1, Y1;
816 X1 = getSubRegForIndex(
X, Xsub,
I);
817 Y1 = getSubRegForIndex(
Y, Ysub,
I);
819 TII->get(AMDGPU::V_SWAP_B32))
830 for (MachineInstr *Swap : Swaps) {
831 Swap->removeOperand(Swap->getNumExplicitOperands());
836 dropInstructionKeepingImpDefs(*MovY);
840 dropInstructionKeepingImpDefs(MovT);
846 if (
Op.isKill() &&
TRI->regsOverlap(
X,
Op.getReg()))
857bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &
MI)
const {
858 if (!ST->hasGFX10_3Insts())
861 MachineOperand *
Op =
TII->getNamedOperand(
MI, AMDGPU::OpName::sdst);
868 Op->setReg(ST->
isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
872bool SIShrinkInstructions::run(MachineFunction &MF) {
881 unsigned VCCReg = ST->
isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
884 for (MachineBasicBlock &
MBB : MF) {
888 MachineInstr &
MI = *
I;
890 if (
MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
898 MachineOperand &Src =
MI.getOperand(1);
899 if (Src.isImm() && IsPostRA) {
903 if (ModOpcode != 0) {
904 MI.setDesc(
TII->get(ModOpcode));
905 Src.setImm(
static_cast<int64_t
>(ModImm));
912 if (ST->
hasSwap() && (
MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
913 MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
914 MI.getOpcode() == AMDGPU::COPY)) {
915 if (
auto *NextMI = matchSwap(
MI)) {
916 Next = NextMI->getIterator();
923 if (
MI.getOpcode() == AMDGPU::S_AND_B32 ||
924 MI.getOpcode() == AMDGPU::S_OR_B32 ||
925 MI.getOpcode() == AMDGPU::S_XOR_B32) {
926 ChangeKind CK = shrinkScalarLogicOp(
MI);
927 if (CK == ChangeKind::UpdateHint)
929 Changed |= (CK == ChangeKind::UpdateInst);
933 if (
MI.getOpcode() == AMDGPU::S_ADD_I32 ||
934 MI.getOpcode() == AMDGPU::S_MUL_I32 ||
935 (
MI.getOpcode() == AMDGPU::S_OR_B32 &&
936 MI.getFlag(MachineInstr::MIFlag::Disjoint))) {
937 const MachineOperand *Dest = &
MI.getOperand(0);
938 MachineOperand *Src0 = &
MI.getOperand(1);
939 MachineOperand *Src1 = &
MI.getOperand(2);
942 if (
TII->commuteInstruction(
MI,
false, 1, 2)) {
958 unsigned Opc = (
MI.getOpcode() == AMDGPU::S_MUL_I32)
960 : AMDGPU::S_ADDK_I32;
963 MI.tieOperands(0, 1);
970 if (
MI.isCompare() &&
TII->isSOPC(
MI)) {
976 if (
MI.getOpcode() == AMDGPU::S_MOV_B32) {
977 const MachineOperand &Dst =
MI.getOperand(0);
978 MachineOperand &Src =
MI.getOperand(1);
980 if (Src.isImm() && Dst.getReg().isPhysical()) {
984 MI.setDesc(
TII->get(AMDGPU::S_MOVK_I32));
989 MI.setDesc(
TII->get(ModOpc));
990 Src.setImm(
static_cast<int64_t
>(ModImm));
998 if (IsPostRA &&
TII->isMIMG(
MI.getOpcode()) &&
1004 if (!
TII->isVOP3(
MI))
1007 if (
MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
1008 MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
1009 MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
1010 MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
1011 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
1012 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
1013 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
1014 (
MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
1022 if (
TII->isVOP3(
MI.getOpcode())) {
1024 if (!
TII->hasVALU32BitEncoding(
MI.getOpcode())) {
1029 if (!
TII->canShrink(
MI, *MRI)) {
1032 if (!
MI.isCommutable() || !
TII->commuteInstruction(
MI) ||
1033 !
TII->canShrink(
MI, *MRI)) {
1044 if (
TII->isVOPC(Op32)) {
1045 MachineOperand &Op0 =
MI.getOperand(0);
1063 if (DstReg != VCCReg)
1068 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
1071 const MachineOperand *Src2 =
1072 TII->getNamedOperand(
MI, AMDGPU::OpName::src2);
1076 if (
SReg.isVirtual()) {
1085 const MachineOperand *SDst =
TII->getNamedOperand(
MI,
1086 AMDGPU::OpName::sdst);
1091 if (SDst->
getReg() != VCCReg) {
1099 const MachineOperand *Src2 =
TII->getNamedOperand(
MI,
1100 AMDGPU::OpName::src2);
1101 if (Src2 && Src2->
getReg() != VCCReg) {
1117 if (ST->hasVOP3Literal() &&
1123 !shouldShrinkTrue16(
MI))
1129 MachineInstr *Inst32 =
TII->buildShrunkInst(
MI, Op32);
1130 ++NumInstructionsShrunk;
1133 copyExtraImplicitOps(*Inst32,
MI);
1136 if (SDst && SDst->
isDead())
1139 MI.eraseFromParent();
1140 foldImmediates(*Inst32);
1149bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {
1153 return SIShrinkInstructions().run(MF);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ModifiedImm, bool Scalar)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Class for arbitrary precision integers.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
bool hasOptNone() const
Do not optimize this function (-O0).
bool hasFmaakFmamkF64Insts() const
const SIInstrInfo * getInstrInfo() const override
unsigned getNSAMaxSize(bool HasSampler=false) const
Generation getGeneration() const
const HexagonRegisterInfo & getRegisterInfo() const
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
static bool sopkIsZext(unsigned Opcode)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &)
void push_back(const T &Elt)
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
LLVM_READONLY int32_t getSOPKOp(uint32_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isTrue16Inst(unsigned Opc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
@ Sub
Subtraction of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
constexpr T reverseBits(T Val)
Reverse the bits in Val.
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
FunctionPass * createSIShrinkInstructionsLegacyPass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
constexpr bool any() const