doxygen/SIShrinkInstructions_8cpp_source.html

//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

/// The pass tries to use the 32-bit encoding for instructions when possible.

//===----------------------------------------------------------------------===//

//


#include "SIShrinkInstructions.h"

#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/CodeGen/MachineFunctionPass.h"


#define DEBUG_TYPE "si-shrink-instructions"


STATISTIC(NumInstructionsShrunk,

          "Number of 64-bit instruction reduced to 32-bit.");

STATISTIC(NumLiteralConstantsFolded,

          "Number of literal constants folded into 32-bit instructions.");


using namespace llvm;


namespace {


enum ChangeKind { None, UpdateHint, UpdateInst };


class SIShrinkInstructions {

  MachineFunction *MF;

  MachineRegisterInfo *MRI;

  const GCNSubtarget *ST;

  const SIInstrInfo *TII;

  const SIRegisterInfo *TRI;

  bool IsPostRA;


  bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;

  bool shouldShrinkTrue16(MachineInstr &MI) const;

  bool isKImmOperand(const MachineOperand &Src) const;

  bool isKUImmOperand(const MachineOperand &Src) const;

  bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;

  void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;

  bool shrinkScalarCompare(MachineInstr &MI) const;

  bool shrinkMIMG(MachineInstr &MI) const;

  bool shrinkMadFma(MachineInstr &MI) const;

  ChangeKind shrinkScalarLogicOp(MachineInstr &MI) const;

  bool tryReplaceDeadSDST(MachineInstr &MI) const;

  bool instAccessReg(MachineInstr::filtered_const_mop_range &&R, Register Reg,

                     unsigned SubReg) const;

  bool instReadsReg(const MachineInstr *MI, unsigned Reg,

                    unsigned SubReg) const;

  bool instModifiesReg(const MachineInstr *MI, unsigned Reg,

                       unsigned SubReg) const;

  TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,

                                                   unsigned I) const;

  void dropInstructionKeepingImpDefs(MachineInstr &MI) const;

  MachineInstr *matchSwap(MachineInstr &MovT) const;


public:

  SIShrinkInstructions() = default;

  bool run(MachineFunction &MF);

};


class SIShrinkInstructionsLegacy : public MachineFunctionPass {


public:

  static char ID;


  SIShrinkInstructionsLegacy() : MachineFunctionPass(ID) {}


  bool runOnMachineFunction(MachineFunction &MF) override;


  StringRef getPassName() const override { return "SI Shrink Instructions"; }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    MachineFunctionPass::getAnalysisUsage(AU);

  }

};


} // End anonymous namespace.


INITIALIZE_PASS(SIShrinkInstructionsLegacy, DEBUG_TYPE,

                "SI Shrink Instructions", false, false)


char SIShrinkInstructionsLegacy::ID = 0;


FunctionPass *llvm::createSIShrinkInstructionsLegacyPass() {

  return new SIShrinkInstructionsLegacy();

}


/// This function checks \p MI for operands defined by a move immediate

/// instruction and then folds the literal constant into the instruction if it

/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.

bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,

                                          bool TryToCommute) const {

  assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));


  int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);


  // Try to fold Src0

  MachineOperand &Src0 = MI.getOperand(Src0Idx);

  if (Src0.isReg()) {

    Register Reg = Src0.getReg();

    if (Reg.isVirtual()) {

      MachineInstr *Def = MRI->getUniqueVRegDef(Reg);

      if (Def && Def->isMoveImmediate()) {

        MachineOperand &MovSrc = Def->getOperand(1);

        bool ConstantFolded = false;


        if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {

          if (MovSrc.isImm()) {

            Src0.ChangeToImmediate(MovSrc.getImm());

            ConstantFolded = true;

          } else if (MovSrc.isFI()) {

            Src0.ChangeToFrameIndex(MovSrc.getIndex());

            ConstantFolded = true;

          } else if (MovSrc.isGlobal()) {

            Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),

                            MovSrc.getTargetFlags());

            ConstantFolded = true;

          }

        }


        if (ConstantFolded) {

          if (MRI->use_nodbg_empty(Reg))

            Def->eraseFromParent();

          ++NumLiteralConstantsFolded;

          return true;

        }

      }

    }

  }


  // We have failed to fold src0, so commute the instruction and try again.

  if (TryToCommute && MI.isCommutable()) {

    if (TII->commuteInstruction(MI)) {

      if (foldImmediates(MI, false))

        return true;


      // Commute back.

      TII->commuteInstruction(MI);

    }

  }


  return false;

}


/// Do not shrink the instruction if its registers are not expressible in the

/// shrunk encoding.

bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {

  for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {

    const MachineOperand &MO = MI.getOperand(I);

    if (MO.isReg()) {

      Register Reg = MO.getReg();

      assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "

                                 "True16 Instructions post-RA");

      if (AMDGPU::VGPR_32RegClass.contains(Reg) &&

          !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))

        return false;


      if (AMDGPU::VGPR_16RegClass.contains(Reg) &&

          !AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))

        return false;

    }

  }

  return true;

}


bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {

  return isInt<16>(SignExtend64(Src.getImm(), 32)) &&

         !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());

}


bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {

  return isUInt<16>(Src.getImm()) &&

         !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());

}


bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,

                                                bool &IsUnsigned) const {

  if (isInt<16>(SignExtend64(Src.getImm(), 32))) {

    IsUnsigned = false;

    return !TII->isInlineConstant(Src);

  }


  if (isUInt<16>(Src.getImm())) {

    IsUnsigned = true;

    return !TII->isInlineConstant(Src);

  }


  return false;

}


/// \returns the opcode of an instruction a move immediate of the constant \p

/// Src can be replaced with if the constant is replaced with \p ModifiedImm.

/// i.e.

///

/// If the bitreverse of a constant is an inline immediate, reverse the

/// immediate and return the bitreverse opcode.

///

/// If the bitwise negation of a constant is an inline immediate, reverse the

/// immediate and return the bitwise not opcode.


static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII,

                                         const MachineOperand &Src,

                                         int32_t &ModifiedImm, bool Scalar) {

  if (TII->isInlineConstant(Src))

    return 0;

  int32_t SrcImm = static_cast<int32_t>(Src.getImm());


  if (!Scalar) {

    // We could handle the scalar case with here, but we would need to check

    // that SCC is not live as S_NOT_B32 clobbers it. It's probably not worth

    // it, as the reasonable values are already covered by s_movk_i32.

    ModifiedImm = ~SrcImm;

    if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))

      return AMDGPU::V_NOT_B32_e32;

  }


  ModifiedImm = reverseBits<int32_t>(SrcImm);

  if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))

    return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;


  return 0;

}


/// Copy implicit register operands from specified instruction to this

/// instruction that are not part of the instruction definition.

void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,

                                                MachineInstr &MI) const {

  MachineFunction &MF = *MI.getMF();

  for (unsigned i = MI.getDesc().getNumOperands() +

                    MI.getDesc().implicit_uses().size() +

                    MI.getDesc().implicit_defs().size(),

                e = MI.getNumOperands();

       i != e; ++i) {

    const MachineOperand &MO = MI.getOperand(i);

    if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())

      NewMI.addOperand(MF, MO);

  }

}


bool SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {

  if (!ST->hasSCmpK())

    return false;


  // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to

  // get constants on the RHS.

  bool Changed = false;

  if (!MI.getOperand(0).isReg()) {

    if (TII->commuteInstruction(MI, false, 0, 1))

      Changed = true;

  }


  // cmpk requires src0 to be a register

  const MachineOperand &Src0 = MI.getOperand(0);

  if (!Src0.isReg())

    return Changed;


  MachineOperand &Src1 = MI.getOperand(1);

  if (!Src1.isImm())

    return Changed;


  int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());

  if (SOPKOpc == -1)

    return Changed;


  // eq/ne is special because the imm16 can be treated as signed or unsigned,

  // and initially selected to the unsigned versions.

  if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {

    bool HasUImm;

    if (isKImmOrKUImmOperand(Src1, HasUImm)) {

      if (!HasUImm) {

        SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?

          AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;

        Src1.setImm(SignExtend32(Src1.getImm(), 32));

      }


      MI.setDesc(TII->get(SOPKOpc));

      Changed = true;

    }


    return Changed;

  }


  const MCInstrDesc &NewDesc = TII->get(SOPKOpc);


  if ((SIInstrInfo::sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||

      (!SIInstrInfo::sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {

    if (!SIInstrInfo::sopkIsZext(SOPKOpc))

      Src1.setImm(SignExtend64(Src1.getImm(), 32));

    MI.setDesc(NewDesc);

    Changed = true;

  }

  return Changed;

}


// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.

bool SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {

  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());

  if (!Info)

    return false;


  uint8_t NewEncoding;

  switch (Info->MIMGEncoding) {

  case AMDGPU::MIMGEncGfx10NSA:

    NewEncoding = AMDGPU::MIMGEncGfx10Default;

    break;

  case AMDGPU::MIMGEncGfx11NSA:

    NewEncoding = AMDGPU::MIMGEncGfx11Default;

    break;

  default:

    return false;

  }


  int VAddr0Idx =

      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);

  unsigned NewAddrDwords = Info->VAddrDwords;

  const TargetRegisterClass *RC;


  if (Info->VAddrDwords == 2) {

    RC = &AMDGPU::VReg_64RegClass;

  } else if (Info->VAddrDwords == 3) {

    RC = &AMDGPU::VReg_96RegClass;

  } else if (Info->VAddrDwords == 4) {

    RC = &AMDGPU::VReg_128RegClass;

  } else if (Info->VAddrDwords == 5) {

    RC = &AMDGPU::VReg_160RegClass;

  } else if (Info->VAddrDwords == 6) {

    RC = &AMDGPU::VReg_192RegClass;

  } else if (Info->VAddrDwords == 7) {

    RC = &AMDGPU::VReg_224RegClass;

  } else if (Info->VAddrDwords == 8) {

    RC = &AMDGPU::VReg_256RegClass;

  } else if (Info->VAddrDwords == 9) {

    RC = &AMDGPU::VReg_288RegClass;

  } else if (Info->VAddrDwords == 10) {

    RC = &AMDGPU::VReg_320RegClass;

  } else if (Info->VAddrDwords == 11) {

    RC = &AMDGPU::VReg_352RegClass;

  } else if (Info->VAddrDwords == 12) {

    RC = &AMDGPU::VReg_384RegClass;

  } else {

    RC = &AMDGPU::VReg_512RegClass;

    NewAddrDwords = 16;

  }


  unsigned VgprBase = 0;

  unsigned NextVgpr = 0;

  bool IsUndef = true;

  bool IsKill = NewAddrDwords == Info->VAddrDwords;

  const unsigned NSAMaxSize = ST->getNSAMaxSize();

  const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;

  const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;

  for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) {

    const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);

    unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());

    unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;

    assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");


    if (Idx == 0) {

      VgprBase = Vgpr;

      NextVgpr = Vgpr + Dwords;

    } else if (Vgpr == NextVgpr) {

      NextVgpr = Vgpr + Dwords;

    } else {

      return false;

    }


    if (!Op.isUndef())

      IsUndef = false;

    if (!Op.isKill())

      IsKill = false;

  }


  if (VgprBase + NewAddrDwords > 256)

    return false;


  // Further check for implicit tied operands - this may be present if TFE is

  // enabled

  int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);

  int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);

  unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();

  unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();

  int ToUntie = -1;

  if (TFEVal || LWEVal) {

    // TFE/LWE is enabled so we need to deal with an implicit tied operand

    for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {

      if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&

          MI.getOperand(i).isImplicit()) {

        // This is the tied operand

        assert(

            ToUntie == -1 &&

            "found more than one tied implicit operand when expecting only 1");

        ToUntie = i;

        MI.untieRegOperand(ToUntie);

      }

    }

  }


  unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding,

                                             Info->VDataDwords, NewAddrDwords);

  MI.setDesc(TII->get(NewOpcode));

  MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));

  MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);

  MI.getOperand(VAddr0Idx).setIsKill(IsKill);


  for (unsigned i = 1; i < EndVAddr; ++i)

    MI.removeOperand(VAddr0Idx + 1);


  if (ToUntie >= 0) {

    MI.tieOperands(

        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),

        ToUntie - (EndVAddr - 1));

  }

  return true;

}


// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.

bool SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {

  // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so

  // there is no reason to try to shrink them.

  if (!ST->hasVOP3Literal())

    return false;


  // There is no advantage to doing this pre-RA.

  if (!IsPostRA)

    return false;


  if (TII->hasAnyModifiersSet(MI))

    return false;


  const unsigned Opcode = MI.getOpcode();

  MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);

  MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);

  MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);

  unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;


  bool Swap;


  // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form.

  if (Src2.isImm() && !TII->isInlineConstant(Src2)) {

    if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()))

      Swap = false;

    else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))

      Swap = true;

    else

      return false;


    switch (Opcode) {

    default:

      llvm_unreachable("Unexpected mad/fma opcode!");

    case AMDGPU::V_MAD_F32_e64:

      NewOpcode = AMDGPU::V_MADAK_F32;

      break;

    case AMDGPU::V_FMA_F32_e64:

      NewOpcode = AMDGPU::V_FMAAK_F32;

      break;

    case AMDGPU::V_MAD_F16_e64:

      NewOpcode = AMDGPU::V_MADAK_F16;

      break;

    case AMDGPU::V_FMA_F16_e64:

    case AMDGPU::V_FMA_F16_gfx9_e64:

      NewOpcode = AMDGPU::V_FMAAK_F16;

      break;

    case AMDGPU::V_FMA_F16_gfx9_t16_e64:

      NewOpcode = AMDGPU::V_FMAAK_F16_t16;

      break;

    case AMDGPU::V_FMA_F16_gfx9_fake16_e64:

      NewOpcode = AMDGPU::V_FMAAK_F16_fake16;

      break;

    case AMDGPU::V_FMA_F64_e64:

      if (ST->hasFmaakFmamkF64Insts())

        NewOpcode = AMDGPU::V_FMAAK_F64;

      break;

    }

  }


  // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form.

  if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) {

    if (Src1.isImm() && !TII->isInlineConstant(Src1))

      Swap = false;

    else if (Src0.isImm() && !TII->isInlineConstant(Src0))

      Swap = true;

    else

      return false;


    switch (Opcode) {

    default:

      llvm_unreachable("Unexpected mad/fma opcode!");

    case AMDGPU::V_MAD_F32_e64:

      NewOpcode = AMDGPU::V_MADMK_F32;

      break;

    case AMDGPU::V_FMA_F32_e64:

      NewOpcode = AMDGPU::V_FMAMK_F32;

      break;

    case AMDGPU::V_MAD_F16_e64:

      NewOpcode = AMDGPU::V_MADMK_F16;

      break;

    case AMDGPU::V_FMA_F16_e64:

    case AMDGPU::V_FMA_F16_gfx9_e64:

      NewOpcode = AMDGPU::V_FMAMK_F16;

      break;

    case AMDGPU::V_FMA_F16_gfx9_t16_e64:

      NewOpcode = AMDGPU::V_FMAMK_F16_t16;

      break;

    case AMDGPU::V_FMA_F16_gfx9_fake16_e64:

      NewOpcode = AMDGPU::V_FMAMK_F16_fake16;

      break;

    case AMDGPU::V_FMA_F64_e64:

      if (ST->hasFmaakFmamkF64Insts())

        NewOpcode = AMDGPU::V_FMAMK_F64;

      break;

    }

  }


  if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)

    return false;


  if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))

    return false;


  if (Swap) {

    // Swap Src0 and Src1 by building a new instruction.

    BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode),

            MI.getOperand(0).getReg())

        .add(Src1)

        .add(Src0)

        .add(Src2)

        .setMIFlags(MI.getFlags());

    MI.eraseFromParent();

  } else {

    TII->removeModOperands(MI);

    MI.setDesc(TII->get(NewOpcode));

  }

  return true;

}


/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.

/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.

/// If the inverse of the immediate is legal, use ANDN2, ORN2 or

/// XNOR (as a ^ b == ~(a ^ ~b)).

/// \return ChangeKind::None if no changes were made.

///         ChangeKind::UpdateHint if regalloc hints were updated.

///         ChangeKind::UpdateInst if the instruction was modified.

ChangeKind SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {

  unsigned Opc = MI.getOpcode();

  const MachineOperand *Dest = &MI.getOperand(0);

  MachineOperand *Src0 = &MI.getOperand(1);

  MachineOperand *Src1 = &MI.getOperand(2);

  MachineOperand *SrcReg = Src0;

  MachineOperand *SrcImm = Src1;


  if (!SrcImm->isImm() ||

      AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))

    return ChangeKind::None;


  uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());

  uint32_t NewImm = 0;


  if (Opc == AMDGPU::S_AND_B32) {

    if (isPowerOf2_32(~Imm) &&

        MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)->isDead()) {

      NewImm = llvm::countr_one(Imm);

      Opc = AMDGPU::S_BITSET0_B32;

    } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {

      NewImm = ~Imm;

      Opc = AMDGPU::S_ANDN2_B32;

    }

  } else if (Opc == AMDGPU::S_OR_B32) {

    if (isPowerOf2_32(Imm) &&

        MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)->isDead()) {

      NewImm = llvm::countr_zero(Imm);

      Opc = AMDGPU::S_BITSET1_B32;

    } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {

      NewImm = ~Imm;

      Opc = AMDGPU::S_ORN2_B32;

    }

  } else if (Opc == AMDGPU::S_XOR_B32) {

    if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {

      NewImm = ~Imm;

      Opc = AMDGPU::S_XNOR_B32;

    }

  } else {

    llvm_unreachable("unexpected opcode");

  }


  if (NewImm != 0) {

    if (Dest->getReg().isVirtual() && SrcReg->isReg()) {

      MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());

      MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());

      return ChangeKind::UpdateHint;

    }


    if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {

      const bool IsUndef = SrcReg->isUndef();

      const bool IsKill = SrcReg->isKill();

      TII->mutateAndCleanupImplicit(MI, TII->get(Opc));

      if (Opc == AMDGPU::S_BITSET0_B32 ||

          Opc == AMDGPU::S_BITSET1_B32) {

        Src0->ChangeToImmediate(NewImm);

        // Remove the immediate and add the tied input.

        MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false,

                                          /*isImp*/ false, IsKill,

                                          /*isDead*/ false, IsUndef);

        MI.tieOperands(0, 2);

      } else {

        SrcImm->setImm(NewImm);

      }

      return ChangeKind::UpdateInst;

    }

  }


  return ChangeKind::None;

}


// This is the same as MachineInstr::readsRegister/modifiesRegister except

// it takes subregs into account.

bool SIShrinkInstructions::instAccessReg(

    MachineInstr::filtered_const_mop_range &&R, Register Reg,

    unsigned SubReg) const {

  for (const MachineOperand &MO : R) {

    if (Reg.isPhysical() && MO.getReg().isPhysical()) {

      if (TRI->regsOverlap(Reg, MO.getReg()))

        return true;

    } else if (MO.getReg() == Reg && Reg.isVirtual()) {

      LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &

                            TRI->getSubRegIndexLaneMask(MO.getSubReg());

      if (Overlap.any())

        return true;

    }

  }

  return false;

}


bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,

                                        unsigned SubReg) const {

  return instAccessReg(MI->all_uses(), Reg, SubReg);

}


bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,

                                           unsigned SubReg) const {

  return instAccessReg(MI->all_defs(), Reg, SubReg);

}


TargetInstrInfo::RegSubRegPair

SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,

                                        unsigned I) const {

  if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {

    if (Reg.isPhysical()) {

      Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));

    } else {

      Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));

    }

  }

  return TargetInstrInfo::RegSubRegPair(Reg, Sub);

}


void SIShrinkInstructions::dropInstructionKeepingImpDefs(

    MachineInstr &MI) const {

  for (unsigned i = MI.getDesc().getNumOperands() +

                    MI.getDesc().implicit_uses().size() +

                    MI.getDesc().implicit_defs().size(),

                e = MI.getNumOperands();

       i != e; ++i) {

    const MachineOperand &Op = MI.getOperand(i);

    if (!Op.isDef())

      continue;

    BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),

            TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());

  }


  MI.eraseFromParent();

}


// Match:

// mov t, x

// mov x, y

// mov y, t

//

// =>

//

// mov t, x (t is potentially dead and move eliminated)

// v_swap_b32 x, y

//

// Returns next valid instruction pointer if was able to create v_swap_b32.

//

// This shall not be done too early not to prevent possible folding which may

// remove matched moves, and this should preferably be done before RA to

// release saved registers and also possibly after RA which can insert copies

// too.

//

// This is really just a generic peephole that is not a canonical shrinking,

// although requirements match the pass placement and it reduces code size too.

MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {

  assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||

         MovT.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||

         MovT.getOpcode() == AMDGPU::COPY);


  Register T = MovT.getOperand(0).getReg();

  unsigned Tsub = MovT.getOperand(0).getSubReg();

  MachineOperand &Xop = MovT.getOperand(1);


  if (!Xop.isReg())

    return nullptr;

  Register X = Xop.getReg();

  unsigned Xsub = Xop.getSubReg();

  Register Y;

  unsigned Ysub;


  unsigned Size = TII->getOpSize(MovT, 0);


  // We can't match v_swap_b16 pre-RA, because VGPR_16_Lo128 registers

  // are not allocatble.

  if (Size == 2 && X.isVirtual())

    return nullptr;


  if (!TRI->isVGPR(*MRI, X))

    return nullptr;


  const unsigned SearchLimit = 16;

  unsigned Count = 0;


  MachineInstr *MovX = nullptr;

  MachineInstr *InsertionPt = nullptr;

  MachineInstr *MovY = nullptr;


  for (auto Iter = std::next(MovT.getIterator()),

            E = MovT.getParent()->instr_end();

       Iter != E && Count < SearchLimit; ++Iter) {

    if (Iter->isDebugInstr())

      continue;

    ++Count;


    if (!MovX) {

      // Search for mov x, y.

      if ((Iter->getOpcode() == AMDGPU::V_MOV_B32_e32 ||

           Iter->getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||

           Iter->getOpcode() == AMDGPU::COPY) &&

          Iter->getOperand(0).getReg() == X &&

          Iter->getOperand(0).getSubReg() == Xsub &&

          Iter->getOperand(1).isReg()) {

        MovX = &*Iter;

        Y = MovX->getOperand(1).getReg();

        Ysub = MovX->getOperand(1).getSubReg();

      } else if (instModifiesReg(&*Iter, X, Xsub)) {

        // Writes to x are not allowed until mov x, y has been found

        return nullptr;

      }

    } else {

      // mov x, y has been found.

      // Search for mov y, t.

      if ((Iter->getOpcode() == AMDGPU::V_MOV_B32_e32 ||

           Iter->getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||

           Iter->getOpcode() == AMDGPU::COPY) &&

          Iter->getOperand(0).getReg() == Y &&

          Iter->getOperand(0).getSubReg() == Ysub &&

          Iter->getOperand(1).isReg() && Iter->getOperand(1).getReg() == T &&

          Iter->getOperand(1).getSubReg() == Tsub) {

        MovY = &*Iter;

        break;

      }


      // Effectively, mov x, y must be moved downward

      // and mov y, t must be moved upward so that they can be fused into a

      // swap. A write to y creates a barrier that prevents the two moves from

      // being moved adjacent to each other.

      if (instModifiesReg(&*Iter, Y, Ysub))

        return nullptr;


      // Reads or writes to x prevent mov x, y from being moved farther

      // downward. Select this to be the insertion point.

      if (!InsertionPt &&

          (instReadsReg(&*Iter, X, Xsub) || instModifiesReg(&*Iter, X, Xsub))) {

        InsertionPt = &*Iter;

      }

      // If the insertion point has been found, then mov y, t must be moved

      // upward past all subsequent instructions.  A read of y will block this

      // movement.

      if (InsertionPt) {

        if (instReadsReg(&*Iter, Y, Ysub))

          return nullptr;

      }

    }


    if (instModifiesReg(&*Iter, T, Tsub))

      return nullptr;

  }

  if (MovY) {

    LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << *MovX << *MovY);


    MachineBasicBlock &MBB = *MovT.getParent();

    SmallVector<MachineInstr *, 4> Swaps;


    if (!InsertionPt)

      InsertionPt = MovY;

    if (Size == 2) {

      auto *MIB = BuildMI(MBB, InsertionPt->getIterator(), MovT.getDebugLoc(),

                          TII->get(AMDGPU::V_SWAP_B16))

                      .addDef(X)

                      .addDef(Y)

                      .addReg(Y)

                      .addReg(X)

                      .getInstr();

      Swaps.push_back(MIB);

    } else {

      assert(Size > 0 && Size % 4 == 0);

      for (unsigned I = 0; I < Size / 4; ++I) {

        TargetInstrInfo::RegSubRegPair X1, Y1;

        X1 = getSubRegForIndex(X, Xsub, I);

        Y1 = getSubRegForIndex(Y, Ysub, I);

        auto *MIB = BuildMI(MBB, InsertionPt->getIterator(), MovT.getDebugLoc(),

                            TII->get(AMDGPU::V_SWAP_B32))

                        .addDef(X1.Reg, {}, X1.SubReg)

                        .addDef(Y1.Reg, {}, Y1.SubReg)

                        .addReg(Y1.Reg, {}, Y1.SubReg)

                        .addReg(X1.Reg, {}, X1.SubReg)

                        .getInstr();

        Swaps.push_back(MIB);

      }

    }

    // Drop implicit EXEC.

    if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {

      for (MachineInstr *Swap : Swaps) {

        Swap->removeOperand(Swap->getNumExplicitOperands());

        Swap->copyImplicitOps(*MBB.getParent(), *MovX);

      }

    }

    MovX->eraseFromParent();

    dropInstructionKeepingImpDefs(*MovY);

    MachineInstr *Next = &*std::next(MovT.getIterator());


    if (T.isVirtual() && MRI->use_nodbg_empty(T)) {

      dropInstructionKeepingImpDefs(MovT);

    } else {

      Xop.setIsKill(false);

      for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {

        unsigned OpNo = MovT.getNumExplicitOperands() + I;

        const MachineOperand &Op = MovT.getOperand(OpNo);

        if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))

          MovT.removeOperand(OpNo);

      }

    }


    return Next;

  }

  return nullptr;

}


// If an instruction has dead sdst replace it with NULL register on gfx1030+

bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {

  if (!ST->hasGFX10_3Insts())

    return false;


  MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);

  if (!Op)

    return false;

  Register SDstReg = Op->getReg();

  if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))

    return false;


  Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);

  return true;

}


bool SIShrinkInstructions::run(MachineFunction &MF) {


  this->MF = &MF;

  MRI = &MF.getRegInfo();

  ST = &MF.getSubtarget<GCNSubtarget>();

  TII = ST->getInstrInfo();

  TRI = &TII->getRegisterInfo();

  IsPostRA = MF.getProperties().hasNoVRegs();


  unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;

  bool Changed = false;


  for (MachineBasicBlock &MBB : MF) {

    MachineBasicBlock::iterator I, Next;

    for (I = MBB.begin(); I != MBB.end(); I = Next) {

      Next = std::next(I);

      MachineInstr &MI = *I;


      if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {

        // If this has a literal constant source that is the same as the

        // reversed bits of an inline immediate, replace with a bitreverse of

        // that constant. This saves 4 bytes in the common case of materializing

        // sign bits.


        // Test if we are after regalloc. We only want to do this after any

        // optimizations happen because this will confuse them.

        MachineOperand &Src = MI.getOperand(1);

        if (Src.isImm() && IsPostRA) {

          int32_t ModImm;

          unsigned ModOpcode =

              canModifyToInlineImmOp32(TII, Src, ModImm, /*Scalar=*/false);

          if (ModOpcode != 0) {

            MI.setDesc(TII->get(ModOpcode));

            Src.setImm(static_cast<int64_t>(ModImm));

            Changed = true;

            continue;

          }

        }

      }


      if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||

                            MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||

                            MI.getOpcode() == AMDGPU::COPY)) {

        if (auto *NextMI = matchSwap(MI)) {

          Next = NextMI->getIterator();

          Changed = true;

          continue;

        }

      }


      // Shrink scalar logic operations.

      if (MI.getOpcode() == AMDGPU::S_AND_B32 ||

          MI.getOpcode() == AMDGPU::S_OR_B32 ||

          MI.getOpcode() == AMDGPU::S_XOR_B32) {

        ChangeKind CK = shrinkScalarLogicOp(MI);

        if (CK == ChangeKind::UpdateHint)

          continue;

        Changed |= (CK == ChangeKind::UpdateInst);

      }


      // Try to use S_ADDK_I32 and S_MULK_I32.

      if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||

          MI.getOpcode() == AMDGPU::S_MUL_I32 ||

          (MI.getOpcode() == AMDGPU::S_OR_B32 &&

           MI.getFlag(MachineInstr::MIFlag::Disjoint))) {

        const MachineOperand *Dest = &MI.getOperand(0);

        MachineOperand *Src0 = &MI.getOperand(1);

        MachineOperand *Src1 = &MI.getOperand(2);


        if (!Src0->isReg() && Src1->isReg()) {

          if (TII->commuteInstruction(MI, false, 1, 2)) {

            std::swap(Src0, Src1);

            Changed = true;

          }

        }


        // FIXME: This could work better if hints worked with subregisters. If

        // we have a vector add of a constant, we usually don't get the correct

        // allocation due to the subregister usage.

        if (Dest->getReg().isVirtual() && Src0->isReg()) {

          MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());

          MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());

          continue;

        }

        if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {

          if (Src1->isImm() && isKImmOperand(*Src1)) {

            unsigned Opc = (MI.getOpcode() == AMDGPU::S_MUL_I32)

                               ? AMDGPU::S_MULK_I32

                               : AMDGPU::S_ADDK_I32;

            Src1->setImm(SignExtend64(Src1->getImm(), 32));

            MI.setDesc(TII->get(Opc));

            MI.tieOperands(0, 1);

            Changed = true;

          }

        }

      }


      // Try to use s_cmpk_*

      if (MI.isCompare() && TII->isSOPC(MI)) {

        Changed |= shrinkScalarCompare(MI);

        continue;

      }


      // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.

      if (MI.getOpcode() == AMDGPU::S_MOV_B32) {

        const MachineOperand &Dst = MI.getOperand(0);

        MachineOperand &Src = MI.getOperand(1);


        if (Src.isImm() && Dst.getReg().isPhysical()) {

          unsigned ModOpc;

          int32_t ModImm;

          if (isKImmOperand(Src)) {

            MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));

            Src.setImm(SignExtend64(Src.getImm(), 32));

            Changed = true;

          } else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,

                                                        /*Scalar=*/true))) {

            MI.setDesc(TII->get(ModOpc));

            Src.setImm(static_cast<int64_t>(ModImm));

            Changed = true;

          }

        }


        continue;

      }


      if (IsPostRA && TII->isMIMG(MI.getOpcode()) &&

          ST->getGeneration() >= AMDGPUSubtarget::GFX10) {

        Changed |= shrinkMIMG(MI);

        continue;

      }


      if (!TII->isVOP3(MI))

        continue;


      if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||

          MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||

          MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||

          MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||

          MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||

          MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||

          MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||

          (MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&

           ST->hasFmaakFmamkF64Insts())) {

        Changed |= shrinkMadFma(MI);

        continue;

      }


      // If there is no chance we will shrink it and use VCC as sdst to get

      // a 32 bit form try to replace dead sdst with NULL.

      if (TII->isVOP3(MI.getOpcode())) {

        Changed |= tryReplaceDeadSDST(MI);

        if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {

          continue;

        }

      }


      if (!TII->canShrink(MI, *MRI)) {

        // Try commuting the instruction and see if that enables us to shrink

        // it.

        if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||

            !TII->canShrink(MI, *MRI)) {

          Changed |= tryReplaceDeadSDST(MI);

          continue;

        }


        // Operands were commuted.

        Changed = true;

      }


      int Op32 = AMDGPU::getVOPe32(MI.getOpcode());


      if (TII->isVOPC(Op32)) {

        MachineOperand &Op0 = MI.getOperand(0);

        if (Op0.isReg()) {

          // Exclude VOPCX instructions as these don't explicitly write a

          // dst.

          Register DstReg = Op0.getReg();

          if (DstReg.isVirtual()) {

            // VOPC instructions can only write to the VCC register. We can't

            // force them to use VCC here, because this is only one register and

            // cannot deal with sequences which would require multiple copies of

            // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)

            //

            // So, instead of forcing the instruction to write to VCC, we

            // provide a hint to the register allocator to use VCC and then we

            // will run this pass again after RA and shrink it if it outputs to

            // VCC.

            MRI->setRegAllocationHint(DstReg, 0, VCCReg);

            continue;

          }

          if (DstReg != VCCReg)

            continue;

        }

      }


      if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {

        // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC

        // instructions.

        const MachineOperand *Src2 =

            TII->getNamedOperand(MI, AMDGPU::OpName::src2);

        if (!Src2->isReg())

          continue;

        Register SReg = Src2->getReg();

        if (SReg.isVirtual()) {

          MRI->setRegAllocationHint(SReg, 0, VCCReg);

          continue;

        }

        if (SReg != VCCReg)

          continue;

      }


      // Check for the bool flag output for instructions like V_ADD_I32_e64.

      const MachineOperand *SDst = TII->getNamedOperand(MI,

                                                        AMDGPU::OpName::sdst);


      if (SDst) {

        bool Next = false;


        if (SDst->getReg() != VCCReg) {

          if (SDst->getReg().isVirtual())

            MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);

          Next = true;

        }


        // All of the instructions with carry outs also have an SGPR input in

        // src2.

        const MachineOperand *Src2 = TII->getNamedOperand(MI,

                                                          AMDGPU::OpName::src2);

        if (Src2 && Src2->getReg() != VCCReg) {

          if (Src2->getReg().isVirtual())

            MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);

          Next = true;

        }


        if (Next)

          continue;

      }


      // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to

      // fold an immediate into the shrunk instruction as a literal operand. In

      // GFX10 VOP3 instructions can take a literal operand anyway, so there is

      // no advantage to doing this.

      // However, if 64-bit literals are allowed we still need to shrink it

      // for such literal to be able to fold.

      if (ST->hasVOP3Literal() &&

          (!ST->has64BitLiterals() || AMDGPU::isTrue16Inst(MI.getOpcode())) &&

          !IsPostRA)

        continue;


      if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&

          !shouldShrinkTrue16(MI))

        continue;


      // We can shrink this instruction

      LLVM_DEBUG(dbgs() << "Shrinking " << MI);


      MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);

      ++NumInstructionsShrunk;


      // Copy extra operands not present in the instruction definition.

      copyExtraImplicitOps(*Inst32, MI);


      // Copy deadness from the old explicit vcc def to the new implicit def.

      if (SDst && SDst->isDead())

        Inst32->findRegisterDefOperand(VCCReg, /*TRI=*/nullptr)->setIsDead();


      MI.eraseFromParent();

      foldImmediates(*Inst32);


      LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');

      Changed = true;

    }

  }

  return Changed;

}


bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {

  if (skipFunction(MF.getFunction()))

    return false;


  return SIShrinkInstructions().run(MF);

}


PreservedAnalyses


SIShrinkInstructionsPass::run(MachineFunction &MF,

                              MachineFunctionAnalysisManager &) {

  if (MF.getFunction().hasOptNone() || !SIShrinkInstructions().run(MF))

    return PreservedAnalyses::all();


  auto PA = getMachineFunctionPassPreservedAnalyses();

  PA.preserveSet<CFGAnalyses>();

  return PA;

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUBaseInfo.h

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

X
#define X(NUM, ENUM, NAME)
Definition ELF.h:853

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

I
#define I(x, y, z)
Definition MD5.cpp:57

MachineFunctionPass.h

Reg
Register Reg
Definition MachineSink.cpp:2126

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2127

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

T
#define T
Definition Mips16ISelLowering.cpp:282

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:77

canModifyToInlineImmOp32
static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ModifiedImm, bool Scalar)
Definition SIShrinkInstructions.cpp:207

SIShrinkInstructions.h

contains
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

Y
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")

char

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::Function::hasOptNone
bool hasOptNone() const
Do not optimize this function (-O0).
Definition Function.h:708

llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition GCNSubtarget.h:244

llvm::GCNSubtarget::hasFmaakFmamkF64Insts
bool hasFmaakFmamkF64Insts() const
Definition GCNSubtarget.h:531

llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition GCNSubtarget.h:126

llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition GCNSubtarget.h:909

llvm::GCNSubtarget::getNSAMaxSize
unsigned getNSAMaxSize(bool HasSampler=false) const
Definition GCNSubtarget.h:535

llvm::GCNSubtarget::hasSCmpK
bool hasSCmpK() const
Definition GCNSubtarget.h:463

llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition GCNSubtarget.h:170

llvm::HexagonInstrInfo::getRegisterInfo
const HexagonRegisterInfo & getRegisterInfo() const
Definition HexagonInstrInfo.h:53

llvm::MachineBasicBlock::begin
iterator begin()
Definition MachineBasicBlock.h:381

llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition MachineBasicBlock.h:367

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:383

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition MachineBasicBlock.h:327

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:345

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:188

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:788

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:798

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:749

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition MachineFunction.h:877

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
Definition MachineInstrBuilder.h:199

llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition MachineInstrBuilder.h:326

llvm::MachineInstrBuilder::addDef
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
Definition MachineInstrBuilder.h:218

llvm::MachineInstrBuilder::setMIFlags
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
Definition MachineInstrBuilder.h:379

llvm::MachineInstrBuilder::getInstr
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Definition MachineInstrBuilder.h:191

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:601

llvm::MachineInstr::getNumImplicitOperands
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
Definition MachineInstr.h:664

llvm::MachineInstr::filtered_const_mop_range
iterator_range< filter_iterator< const_mop_iterator, bool(*)(const MachineOperand &)> > filtered_const_mop_range
Definition MachineInstr.h:763

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition MachineInstr.h:373

llvm::MachineInstr::addOperand
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
Definition MachineInstr.cpp:202

llvm::MachineInstr::getNumExplicitOperands
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
Definition MachineInstr.cpp:847

llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition MachineInstr.h:525

llvm::MachineInstr::removeOperand
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
Definition MachineInstr.cpp:292

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:609

llvm::MachineInstr::hasRegisterImplicitUseOperand
LLVM_ABI bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
Definition MachineInstr.cpp:1096

llvm::MachineInstr::eraseFromParent
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition MachineInstr.cpp:796

llvm::MachineInstr::findRegisterDefOperand
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
Definition MachineInstr.h:1604

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:49

llvm::MachineOperand::getSubReg
unsigned getSubReg() const
Definition MachineOperand.h:377

llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition MachineOperand.h:586

llvm::MachineOperand::isUndef
bool isUndef() const
Definition MachineOperand.h:407

llvm::MachineOperand::ChangeToFrameIndex
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
Definition MachineOperand.cpp:245

llvm::MachineOperand::setImm
void setImm(int64_t immVal)
Definition MachineOperand.h:694

llvm::MachineOperand::getImm
int64_t getImm() const
Definition MachineOperand.h:560

llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition MachineOperand.h:392

llvm::MachineOperand::isKill
bool isKill() const
Definition MachineOperand.h:402

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:331

llvm::MachineOperand::isRegMask
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
Definition MachineOperand.h:355

llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition MachineOperand.h:529

llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition MachineOperand.h:333

llvm::MachineOperand::ChangeToImmediate
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
Definition MachineOperand.cpp:161

llvm::MachineOperand::ChangeToGA
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
Definition MachineOperand.cpp:195

llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition MachineOperand.h:523

llvm::MachineOperand::getIndex
int getIndex() const
Definition MachineOperand.h:580

llvm::MachineOperand::isDead
bool isDead() const
Definition MachineOperand.h:397

llvm::MachineOperand::getTargetFlags
unsigned getTargetFlags() const
Definition MachineOperand.h:229

llvm::MachineOperand::isGlobal
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Definition MachineOperand.h:349

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:372

llvm::MachineOperand::isFI
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
Definition MachineOperand.h:341

llvm::MachineOperand::getOffset
int64_t getOffset() const
Return the offset from the symbol in this operand.
Definition MachineOperand.h:638

llvm::MachineRegisterInfo::use_nodbg_empty
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
Definition MachineRegisterInfo.h:569

llvm::MachineRegisterInfo::setRegAllocationHint
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
Definition MachineRegisterInfo.h:804

llvm::MachineRegisterInfo::getUniqueVRegDef
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Definition MachineRegisterInfo.cpp:417

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::Register::isVirtual
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79

llvm::Register::isPhysical
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83

llvm::SIInstrInfo
Definition SIInstrInfo.h:101

llvm::SIInstrInfo::sopkIsZext
static bool sopkIsZext(unsigned Opcode)
Definition SIInstrInfo.h:1091

llvm::SIShrinkInstructionsPass::run
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &)
Definition SIShrinkInstructions.cpp:1154

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::TargetRegisterClass::getRegister
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
Definition TargetRegisterInfo.h:89

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

Changed
Changed
Definition ObjCARCOpts.cpp:2366

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AMDGPU::getMIMGInfo
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)

llvm::AMDGPU::getSOPKOp
LLVM_READONLY int32_t getSOPKOp(uint32_t Opcode)

llvm::AMDGPU::getMIMGOpcode
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
Definition AMDGPUBaseInfo.cpp:314

llvm::AMDGPU::Imm
@ Imm
Definition AMDGPURegBankLegalizeRules.h:152

llvm::AMDGPU::isKImmOperand
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
Definition AMDGPUBaseInfo.cpp:2870

llvm::AMDGPU::isTrue16Inst
bool isTrue16Inst(unsigned Opc)
Definition AMDGPUBaseInfo.cpp:821

llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:3090

llvm::AMDGPU::getVOPe32
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::dxil::DXILDebugInfoPass::run
DXILDebugInfoMap run(Module &M)
Definition DXILDebugInfo.cpp:66

llvm::hlsl::rootsig::RegisterType::SReg
@ SReg
Definition HLSLRootSignature.h:31

llvm::lsp::MessageType::Info
@ Info
Definition Protocol.h:1295

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:449

llvm::isInt
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165

llvm::countr_one
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315

llvm::MachineFunctionAnalysisManager
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
Definition MachineFunctionAnalysisManager.h:24

llvm::countr_zero
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204

llvm::getMachineFunctionPassPreservedAnalyses
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
Definition MachinePassManager.cpp:162

llvm::getImm
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
Definition SPIRVUtils.cpp:1127

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::None
@ None
Definition CodeGenData.h:107

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209

llvm::Count
FunctionAddr VTableAddr Count
Definition InstrProf.h:139

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:39

llvm::Next
FunctionAddr VTableAddr Next
Definition InstrProf.h:141

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:25

llvm::SignExtend32
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554

llvm::reverseBits
constexpr T reverseBits(T Val)
Reverse the bits in Val.
Definition MathExtras.h:118

llvm::SignExtend64
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572

llvm::createSIShrinkInstructionsLegacyPass
FunctionPass * createSIShrinkInstructionsLegacyPass()

std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863

llvm::LaneBitmask::any
constexpr bool any() const
Definition LaneBitmask.h:53

llvm::TargetInstrInfo::RegSubRegPair::SubReg
unsigned SubReg
Definition TargetInstrInfo.h:593

llvm::TargetInstrInfo::RegSubRegPair::Reg
Register Reg
Definition TargetInstrInfo.h:592