doxygen/SIFrameLowering_8cpp_source.html

//===----------------------- SIFrameLowering.cpp --------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//==-----------------------------------------------------------------------===//


#include "SIFrameLowering.h"

#include "AMDGPU.h"

#include "AMDGPULaneMaskUtils.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIMachineFunctionInfo.h"

#include "SISpillUtils.h"

#include "llvm/BinaryFormat/Dwarf.h"

#include "llvm/CodeGen/LiveRegUnits.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineModuleInfo.h"

#include "llvm/CodeGen/RegisterScavenging.h"

#include "llvm/Support/LEB128.h"

#include "llvm/Target/TargetMachine.h"


using namespace llvm;


#define DEBUG_TYPE "frame-info"


static cl::opt<bool> EnableSpillVGPRToAGPR(

  "amdgpu-spill-vgpr-to-agpr",

  cl::desc("Enable spilling VGPRs to AGPRs"),

  cl::ReallyHidden,

  cl::init(true));


static constexpr unsigned SGPRBitSize = 32;

static constexpr unsigned SGPRByteSize = SGPRBitSize / 8;

static constexpr unsigned VGPRLaneBitSize = 32;


// Find a register matching \p RC from \p LiveUnits which is unused and

// available throughout the function. On failure, returns AMDGPU::NoRegister.

// TODO: Rewrite the loop here to iterate over MCRegUnits instead of

// MCRegisters. This should reduce the number of iterations and avoid redundant

// checking.


static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,

                                     const LiveRegUnits &LiveUnits,

                                     const TargetRegisterClass &RC) {

  for (MCRegister Reg : RC) {

    if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&

        !MRI.isReserved(Reg))

      return Reg;

  }

  return MCRegister();

}


static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS) {

  assert(DwarfReg >= 0);

  if (DwarfReg < 32) {

    OS << uint8_t(dwarf::DW_OP_reg0 + DwarfReg);

  } else {

    OS << uint8_t(dwarf::DW_OP_regx);

    encodeULEB128(DwarfReg, OS);

  }

}


static MCCFIInstruction


createScaledCFAInPrivateWave(const GCNSubtarget &ST,

                             MCRegister DwarfStackPtrReg) {

  assert(ST.enableFlatScratch());


  // When flat scratch is enabled, the stack pointer is an address in the

  // private_lane DWARF address space (i.e. swizzled), but in order to

  // accurately and efficiently describe things like masked spills of vector

  // registers we want to define the CFA to be an address in the private_wave

  // DWARF address space (i.e. unswizzled). To achieve this we scale the stack

  // pointer by the wavefront size, implemented as (SP << wave_size_log2).

  const unsigned WavefrontSizeLog2 = ST.getWavefrontSizeLog2();

  assert(WavefrontSizeLog2 < 32);


  SmallString<20> Block;

  raw_svector_ostream OSBlock(Block);

  encodeDwarfRegisterLocation(DwarfStackPtrReg, OSBlock);

  OSBlock << uint8_t(dwarf::DW_OP_deref_size) << uint8_t(SGPRByteSize)

          << uint8_t(dwarf::DW_OP_lit0 + WavefrontSizeLog2)

          << uint8_t(dwarf::DW_OP_shl)

          << uint8_t(dwarf::DW_OP_lit0 +

                     dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave)

          << uint8_t(dwarf::DW_OP_LLVM_user)

          << uint8_t(dwarf::DW_OP_LLVM_form_aspace_address);


  SmallString<20> CFIInst;

  raw_svector_ostream OSCFIInst(CFIInst);

  OSCFIInst << uint8_t(dwarf::DW_CFA_def_cfa_expression);

  encodeULEB128(Block.size(), OSCFIInst);

  OSCFIInst << Block;


  return MCCFIInstruction::createEscape(nullptr, OSCFIInst.str());

}


void SIFrameLowering::emitDefCFA(MachineBasicBlock &MBB,

                                 MachineBasicBlock::iterator MBBI,

                                 DebugLoc const &DL, MCRegister StackPtrReg,

                                 bool AspaceAlreadyDefined,

                                 MachineInstr::MIFlag Flags) const {

  MachineFunction &MF = *MBB.getParent();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();


  MCRegister DwarfStackPtrReg = TRI->getDwarfRegNum(StackPtrReg, false);

  MCCFIInstruction CFIInst =

      ST.enableFlatScratch()

          ? createScaledCFAInPrivateWave(ST, DwarfStackPtrReg)

          : (AspaceAlreadyDefined

                 ? MCCFIInstruction::createLLVMDefAspaceCfa(

                       nullptr, DwarfStackPtrReg, 0,

                       dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave, SMLoc())

                 : MCCFIInstruction::createDefCfaRegister(nullptr,

                                                          DwarfStackPtrReg));

  buildCFI(MBB, MBBI, DL, CFIInst, Flags);

}


// Find a scratch register that we can use in the prologue. We avoid using

// callee-save registers since they may appear to be free when this is called

// from canUseAsPrologue (during shrink wrapping), but then no longer be free

// when this is called from emitPrologue.


static MCRegister findScratchNonCalleeSaveRegister(

    MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,

    const TargetRegisterClass &RC, bool Unused = false) {

  // Mark callee saved registers as used so we will not choose them.

  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();

  for (unsigned i = 0; CSRegs[i]; ++i)

    LiveUnits.addReg(CSRegs[i]);


  // We are looking for a register that can be used throughout the entire

  // function, so any use is unacceptable.

  if (Unused)

    return findUnusedRegister(MRI, LiveUnits, RC);


  for (MCRegister Reg : RC) {

    if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))

      return Reg;

  }


  return MCRegister();

}


/// Query target location for spilling SGPRs

/// \p IncludeScratchCopy : Also look for free scratch SGPRs


static void getVGPRSpillLaneOrTempRegister(

    MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,

    const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,

    bool IncludeScratchCopy = true) {

  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  unsigned Size = TRI->getSpillSize(RC);

  Align Alignment = TRI->getSpillAlign(RC);


  // We need to save and restore the given SGPR.


  Register ScratchSGPR;

  // 1: Try to save the given register into an unused scratch SGPR. The

  // LiveUnits should have all the callee saved registers marked as used. For

  // certain cases we skip copy to scratch SGPR.

  if (IncludeScratchCopy)

    ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);


  if (!ScratchSGPR) {

    int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,

                                         TargetStackID::SGPRSpill);


    if (TRI->spillSGPRToVGPR() &&

        MFI->allocateSGPRSpillToVGPRLane(MF, FI, /*SpillToPhysVGPRLane=*/true,

                                         /*IsPrologEpilog=*/true)) {

      // 2: There's no free lane to spill, and no free register to save the

      // SGPR, so we're forced to take another VGPR to use for the spill.

      MFI->addToPrologEpilogSGPRSpills(

          SGPR, PrologEpilogSGPRSaveRestoreInfo(

                    SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));


      LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();

                 dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "

                        << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane

                        << '\n';);

    } else {

      // Remove dead <FI> index

      MF.getFrameInfo().RemoveStackObject(FI);

      // 3: If all else fails, spill the register to memory.

      FI = FrameInfo.CreateSpillStackObject(Size, Alignment);

      MFI->addToPrologEpilogSGPRSpills(

          SGPR,

          PrologEpilogSGPRSaveRestoreInfo(SGPRSaveKind::SPILL_TO_MEM, FI));

      LLVM_DEBUG(dbgs() << "Reserved FI " << FI << " for spilling "

                        << printReg(SGPR, TRI) << '\n');

    }

  } else {

    MFI->addToPrologEpilogSGPRSpills(

        SGPR, PrologEpilogSGPRSaveRestoreInfo(

                  SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));

    LiveUnits.addReg(ScratchSGPR);

    LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "

                      << printReg(ScratchSGPR, TRI) << '\n');

  }

}


// We need to specially emit stack operations here because a different frame

// register is used than in the rest of the function, as getFrameRegister would

// use.


static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,

                             const SIMachineFunctionInfo &FuncInfo,

                             LiveRegUnits &LiveUnits, MachineFunction &MF,

                             MachineBasicBlock &MBB,

                             MachineBasicBlock::iterator I, const DebugLoc &DL,

                             Register SpillReg, int FI, Register FrameReg,

                             int64_t DwordOff = 0) {

  unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR

                                            : AMDGPU::BUFFER_STORE_DWORD_OFFSET;


  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);

  MachineMemOperand *MMO = MF.getMachineMemOperand(

      PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),

      FrameInfo.getObjectAlign(FI));

  LiveUnits.addReg(SpillReg);

  bool IsKill = !MBB.isLiveIn(SpillReg);

  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,

                          DwordOff, MMO, nullptr, &LiveUnits);

  if (IsKill)

    LiveUnits.removeReg(SpillReg);

}


static void buildEpilogRestore(const GCNSubtarget &ST,

                               const SIRegisterInfo &TRI,

                               const SIMachineFunctionInfo &FuncInfo,

                               LiveRegUnits &LiveUnits, MachineFunction &MF,

                               MachineBasicBlock &MBB,

                               MachineBasicBlock::iterator I,

                               const DebugLoc &DL, Register SpillReg, int FI,

                               Register FrameReg, int64_t DwordOff = 0) {

  unsigned Opc = ST.hasFlatScratchEnabled() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR

                                            : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;


  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);

  MachineMemOperand *MMO = MF.getMachineMemOperand(

      PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),

      FrameInfo.getObjectAlign(FI));

  TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,

                          DwordOff, MMO, nullptr, &LiveUnits);

}


static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

                        const DebugLoc &DL, const SIInstrInfo *TII,

                        Register TargetReg) {

  MachineFunction *MF = MBB.getParent();

  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);

  Register TargetLo = TRI->getSubReg(TargetReg, AMDGPU::sub0);

  Register TargetHi = TRI->getSubReg(TargetReg, AMDGPU::sub1);


  if (MFI->getGITPtrHigh() != 0xffffffff) {

    BuildMI(MBB, I, DL, SMovB32, TargetHi)

        .addImm(MFI->getGITPtrHigh())

        .addReg(TargetReg, RegState::ImplicitDefine);

  } else {

    const MCInstrDesc &GetPC64 = TII->get(AMDGPU::S_GETPC_B64_pseudo);

    BuildMI(MBB, I, DL, GetPC64, TargetReg);

  }

  Register GitPtrLo = MFI->getGITPtrLoReg(*MF);

  MF->getRegInfo().addLiveIn(GitPtrLo);

  MBB.addLiveIn(GitPtrLo);

  BuildMI(MBB, I, DL, SMovB32, TargetLo)

    .addReg(GitPtrLo);

}


static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,

                          const SIMachineFunctionInfo *FuncInfo,

                          MachineFunction &MF, MachineBasicBlock &MBB,

                          MachineBasicBlock::iterator MBBI, bool IsProlog) {

  if (LiveUnits.empty()) {

    LiveUnits.init(TRI);

    if (IsProlog) {

      LiveUnits.addLiveIns(MBB);

    } else {

      // In epilog.

      LiveUnits.addLiveOuts(MBB);

      LiveUnits.stepBackward(*MBBI);

    }

  }

}


namespace llvm {


// SpillBuilder to save/restore special SGPR spills like the one needed for FP,

// BP, etc. These spills are delayed until the current function's frame is

// finalized. For a given register, the builder uses the

// PrologEpilogSGPRSaveRestoreInfo to decide the spill method.


class PrologEpilogSGPRSpillBuilder {

  MachineBasicBlock::iterator MI;

  MachineBasicBlock &MBB;

  MachineFunction &MF;

  const GCNSubtarget &ST;

  MachineFrameInfo &MFI;

  SIMachineFunctionInfo *FuncInfo;

  const SIInstrInfo *TII;

  const SIRegisterInfo &TRI;

  const MCRegisterInfo *MCRI;

  const SIFrameLowering *TFI;

  Register SuperReg;

  const PrologEpilogSGPRSaveRestoreInfo SI;

  LiveRegUnits &LiveUnits;

  const DebugLoc &DL;

  Register FrameReg;

  ArrayRef<int16_t> SplitParts;

  unsigned NumSubRegs;

  unsigned EltSize = 4;

  bool IsFramePtrPrologSpill;

  bool NeedsFrameMoves;


  static bool isExec(Register Reg) {

    return Reg == AMDGPU::EXEC_LO || Reg == AMDGPU::EXEC;

  }


  /// If this builder requires SuperReg-based CFI, which is emitted after all

  /// SubRegs are actually spilled, return the Register which should be used

  /// as input to getDwarfRegNum. Otherwise, CFI should be generated per-SubReg.

  ///

  /// Note: Most spills handled by this builder generate CFI after each

  /// SubReg spill, as each SubReg maps directly to a CFI register via

  /// getDwarfRegNum(SubReg, false). All other cases currently currently

  /// correspond to the SuperReg directly.

  MCRegister getCFISuperReg() const {

    if (IsFramePtrPrologSpill)

      return FuncInfo->getFrameOffsetReg();

    // FIXME: CFI for EXEC needs a fix by accurately computing the spill

    // offset for both the low and high components.

    if (isExec(SuperReg))

      return AMDGPU::EXEC;

    return {};

  }


  void saveToMemory(const int FI) const {

    MachineRegisterInfo &MRI = MF.getRegInfo();

    const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

    assert(!MFI.isDeadObjectIndex(FI));


    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);


    MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(

        MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

    if (!TmpVGPR)

      report_fatal_error("failed to find free scratch register");


    auto BuildCFI = [&](Register Reg) {

      TFI->buildCFI(MBB, MI, DL,

                    MCCFIInstruction::createOffset(

                        nullptr, MCRI->getDwarfRegNum(Reg, false),

                        MFI.getObjectOffset(FI) * ST.getWavefrontSize()));

    };

    MCRegister CFISuperReg = getCFISuperReg();

    for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

      Register SubReg = NumSubRegs == 1

                            ? SuperReg

                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)

          .addReg(SubReg);


      buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,

                       FI, FrameReg, DwordOff);

      if (NeedsFrameMoves && !CFISuperReg)

        BuildCFI(SubReg);

      DwordOff += 4;

    }

    if (NeedsFrameMoves && CFISuperReg)

      BuildCFI(CFISuperReg);

  }


  void saveToVGPRLane(const int FI) const {

    assert(!MFI.isDeadObjectIndex(FI));


    assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);

    ArrayRef<SIRegisterInfo::SpilledReg> Spill =

        FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);

    assert(Spill.size() == NumSubRegs);


    MCRegister CFISuperReg = getCFISuperReg();

    for (unsigned I = 0; I < NumSubRegs; ++I) {

      Register SubReg = NumSubRegs == 1

                            ? SuperReg

                            : Register(TRI.getSubReg(SuperReg, SplitParts[I]));

      BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_S32_TO_VGPR),

              Spill[I].VGPR)

          .addReg(SubReg)

          .addImm(Spill[I].Lane)

          .addReg(Spill[I].VGPR, RegState::Undef);

      if (NeedsFrameMoves && !CFISuperReg)

        TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, SubReg, Spill[I].VGPR,

                                        Spill[I].Lane);

    }

    if (NeedsFrameMoves && CFISuperReg)

      TFI->buildCFIForSGPRToVGPRSpill(MBB, MI, DL, CFISuperReg, Spill);

  }


  void copyToScratchSGPR(Register DstReg) const {

    BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), DstReg)

        .addReg(SuperReg)

        .setMIFlag(MachineInstr::FrameSetup);

    if (NeedsFrameMoves) {

      const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(DstReg);

      ArrayRef<int16_t> DstSplitParts = TRI.getRegSplitParts(RC, EltSize);

      assert(NumSubRegs == (DstSplitParts.empty() ? 1 : DstSplitParts.size()));

      MCRegister CFISuperReg = getCFISuperReg();

      if (NumSubRegs == 1) {

        TFI->buildCFI(

            MBB, MI, DL,

            MCCFIInstruction::createRegister(

                nullptr,

                MCRI->getDwarfRegNum(

                    CFISuperReg ? CFISuperReg : SuperReg.asMCReg(), false),

                MCRI->getDwarfRegNum(DstReg, false)));

      } else if (isExec(CFISuperReg)) {

        assert(NumSubRegs == 2 && "EXEC larger than 64-bit");

        TFI->buildCFIForRegToSGPRPairSpill(MBB, MI, DL, CFISuperReg, DstReg);

      } else {

        for (unsigned I = 0; I < NumSubRegs; ++I) {

          MCRegister SrcSubReg = TRI.getSubReg(SuperReg, SplitParts[I]);

          MCRegister DstSubReg = TRI.getSubReg(DstReg, DstSplitParts[I]);

          TFI->buildCFI(MBB, MI, DL,

                        MCCFIInstruction::createRegister(

                            nullptr, MCRI->getDwarfRegNum(SrcSubReg, false),

                            MCRI->getDwarfRegNum(DstSubReg, false)));

        }

      }

    }

  }


  void restoreFromMemory(const int FI) {

    MachineRegisterInfo &MRI = MF.getRegInfo();

    const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();


    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);

    MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(

        MRI, LiveUnits, AMDGPU::VGPR_32RegClass);

    if (!TmpVGPR)

      report_fatal_error("failed to find free scratch register");


    for (unsigned I = 0, DwordOff = 0; I < NumSubRegs; ++I) {

      MCRegister SubReg = NumSubRegs == 1

                              ? SuperReg.asMCReg()

                              : TRI.getSubReg(SuperReg, SplitParts[I]);


      buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,

                         TmpVGPR, FI, FrameReg, DwordOff);

      assert(SubReg.isPhysical());


      BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)

          .addReg(TmpVGPR, RegState::Kill);

      DwordOff += 4;

    }

  }


  void restoreFromVGPRLane(const int FI) {

    assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);

    ArrayRef<SIRegisterInfo::SpilledReg> Spill =

        FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);

    assert(Spill.size() == NumSubRegs);


    for (unsigned I = 0; I < NumSubRegs; ++I) {

      MCRegister SubReg = NumSubRegs == 1

                              ? SuperReg.asMCReg()

                              : TRI.getSubReg(SuperReg, SplitParts[I]);

      BuildMI(MBB, MI, DL, TII->get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)

          .addReg(Spill[I].VGPR)

          .addImm(Spill[I].Lane);

    }

  }


  void copyFromScratchSGPR(Register SrcReg) const {

    BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), SuperReg)

        .addReg(SrcReg)

        .setMIFlag(MachineInstr::FrameDestroy);

  }


public:


  PrologEpilogSGPRSpillBuilder(Register Reg,

                               const PrologEpilogSGPRSaveRestoreInfo SI,

                               MachineBasicBlock &MBB,

                               MachineBasicBlock::iterator MI,

                               const DebugLoc &DL, const SIInstrInfo *TII,

                               const SIRegisterInfo &TRI,

                               LiveRegUnits &LiveUnits, Register FrameReg,

                               bool IsFramePtrPrologSpill = false)

      : MI(MI), MBB(MBB), MF(*MBB.getParent()),

        ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),

        FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),

        MCRI(MF.getContext().getRegisterInfo()), TFI(ST.getFrameLowering()),

        SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL), FrameReg(FrameReg),

        IsFramePtrPrologSpill(IsFramePtrPrologSpill),

        NeedsFrameMoves(MF.needsFrameMoves()) {

    const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);

    SplitParts = TRI.getRegSplitParts(RC, EltSize);

    NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();


    assert(SuperReg != AMDGPU::M0 && "m0 should never spill");

  }


  void save() {

    switch (SI.getKind()) {

    case SGPRSaveKind::SPILL_TO_MEM:

      return saveToMemory(SI.getIndex());

    case SGPRSaveKind::SPILL_TO_VGPR_LANE:

      return saveToVGPRLane(SI.getIndex());

    case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:

      return copyToScratchSGPR(SI.getReg());

    }

  }


  void restore() {

    switch (SI.getKind()) {

    case SGPRSaveKind::SPILL_TO_MEM:

      return restoreFromMemory(SI.getIndex());

    case SGPRSaveKind::SPILL_TO_VGPR_LANE:

      return restoreFromVGPRLane(SI.getIndex());

    case SGPRSaveKind::COPY_TO_SCRATCH_SGPR:

      return copyFromScratchSGPR(SI.getReg());

    }

  }


};


} // namespace llvm


// Emit flat scratch setup code, assuming `MFI->hasFlatScratchInit()`

void SIFrameLowering::emitEntryFunctionFlatScratchInit(

    MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

    const DebugLoc &DL, Register ScratchWaveOffsetReg) const {

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();


  // We don't need this if we only have spills since there is no user facing

  // scratch.


  // TODO: If we know we don't have flat instructions earlier, we can omit

  // this from the input registers.

  //

  // TODO: We only need to know if we access scratch space through a flat

  // pointer. Because we only detect if flat instructions are used at all,

  // this will be used more often than necessary on VI.


  Register FlatScrInitLo;

  Register FlatScrInitHi;


  if (ST.isAmdPalOS()) {

    // Extract the scratch offset from the descriptor in the GIT

    LiveRegUnits LiveUnits;

    LiveUnits.init(*TRI);

    LiveUnits.addLiveIns(MBB);


    // Find unused reg to load flat scratch init into

    MachineRegisterInfo &MRI = MF.getRegInfo();

    Register FlatScrInit = AMDGPU::NoRegister;

    ArrayRef<MCPhysReg> AllSGPR64s = TRI->getAllSGPR64(MF);

    unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 1) / 2;

    AllSGPR64s = AllSGPR64s.slice(

        std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));

    Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);

    for (MCPhysReg Reg : AllSGPR64s) {

      if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&

          MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {

        FlatScrInit = Reg;

        break;

      }

    }

    assert(FlatScrInit && "Failed to find free register for scratch init");


    FlatScrInitLo = TRI->getSubReg(FlatScrInit, AMDGPU::sub0);

    FlatScrInitHi = TRI->getSubReg(FlatScrInit, AMDGPU::sub1);


    buildGitPtr(MBB, I, DL, TII, FlatScrInit);


    // We now have the GIT ptr - now get the scratch descriptor from the entry

    // at offset 0 (or offset 16 for a compute shader).

    MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

    const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);

    auto *MMO = MF.getMachineMemOperand(

        PtrInfo,

        MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |

            MachineMemOperand::MODereferenceable,

        8, Align(4));

    unsigned Offset =

        MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;

    const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();

    unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);

    BuildMI(MBB, I, DL, LoadDwordX2, FlatScrInit)

        .addReg(FlatScrInit)

        .addImm(EncodedOffset) // offset

        .addImm(0)             // cpol

        .addMemOperand(MMO);


    // Mask the offset in [47:0] of the descriptor

    const MCInstrDesc &SAndB32 = TII->get(AMDGPU::S_AND_B32);

    auto And = BuildMI(MBB, I, DL, SAndB32, FlatScrInitHi)

        .addReg(FlatScrInitHi)

        .addImm(0xffff);

    And->getOperand(3).setIsDead(); // Mark SCC as dead.

  } else {

    Register FlatScratchInitReg =

        MFI->getPreloadedReg(AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT);

    assert(FlatScratchInitReg);


    MachineRegisterInfo &MRI = MF.getRegInfo();

    MRI.addLiveIn(FlatScratchInitReg);

    MBB.addLiveIn(FlatScratchInitReg);


    FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);

    FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);

  }


  // Do a 64-bit pointer add.

  if (ST.flatScratchIsPointer()) {

    if (ST.getGeneration() >= AMDGPUSubtarget::GFX10) {

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)

        .addReg(FlatScrInitLo)

        .addReg(ScratchWaveOffsetReg);

      auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),

                          FlatScrInitHi)

        .addReg(FlatScrInitHi)

        .addImm(0);

      Addc->getOperand(3).setIsDead(); // Mark SCC as dead.


      using namespace AMDGPU::Hwreg;

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))

          .addReg(FlatScrInitLo)

          .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_LO, 0, 32)));

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_B32))

          .addReg(FlatScrInitHi)

          .addImm(int16_t(HwregEncoding::encode(ID_FLAT_SCR_HI, 0, 32)));

      return;

    }


    // For GFX9.

    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)

      .addReg(FlatScrInitLo)

      .addReg(ScratchWaveOffsetReg);

    auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32),

                        AMDGPU::FLAT_SCR_HI)

      .addReg(FlatScrInitHi)

      .addImm(0);

    Addc->getOperand(3).setIsDead(); // Mark SCC as dead.


    return;

  }


  assert(ST.getGeneration() < AMDGPUSubtarget::GFX9);


  // Copy the size in bytes.

  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)

    .addReg(FlatScrInitHi, RegState::Kill);


  // Add wave offset in bytes to private base offset.

  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.

  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), FlatScrInitLo)

      .addReg(FlatScrInitLo)

      .addReg(ScratchWaveOffsetReg);


  // Convert offset to 256-byte units.

  auto LShr = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32),

                      AMDGPU::FLAT_SCR_HI)

    .addReg(FlatScrInitLo, RegState::Kill)

    .addImm(8);

  LShr->getOperand(3).setIsDead(); // Mark SCC as dead.

}


// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not

// memory. They should have been removed by now.


static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {

  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();

       I != E; ++I) {

    if (!MFI.isDeadObjectIndex(I))

      return false;

  }


  return true;

}


// Shift down registers reserved for the scratch RSRC.

Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(

    MachineFunction &MF) const {


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();


  assert(MFI->isEntryFunction());


  Register ScratchRsrcReg = MFI->getScratchRSrcReg();


  if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&

                          allStackObjectsAreDead(MF.getFrameInfo())))

    return Register();


  if (ST.hasSGPRInitBug() ||

      ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))

    return ScratchRsrcReg;


  // We reserved the last registers for this. Shift it down to the end of those

  // which were actually used.

  //

  // FIXME: It might be safer to use a pseudoregister before replacement.


  // FIXME: We should be able to eliminate unused input registers. We only

  // cannot do this for the resources required for scratch access. For now we

  // skip over user SGPRs and may leave unused holes.


  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;

  ArrayRef<MCPhysReg> AllSGPR128s = TRI->getAllSGPR128(MF);

  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));


  // Skip the last N reserved elements because they should have already been

  // reserved for VCC etc.

  Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);

  for (MCPhysReg Reg : AllSGPR128s) {

    // Pick the first unallocated one. Make sure we don't clobber the other

    // reserved input we needed. Also for PAL, make sure we don't clobber

    // the GIT pointer passed in SGPR0 or SGPR8.

    if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

        (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {

      MRI.replaceRegWith(ScratchRsrcReg, Reg);

      MFI->setScratchRSrcReg(Reg);

      MRI.reserveReg(Reg, TRI);

      return Reg;

    }

  }


  return ScratchRsrcReg;

}


static unsigned getScratchScaleFactor(const GCNSubtarget &ST) {

  return ST.hasFlatScratchEnabled() ? 1 : ST.getWavefrontSize();

}


void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,

                                                MachineBasicBlock &MBB) const {

  assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");


  // FIXME: If we only have SGPR spills, we won't actually be using scratch

  // memory since these spill to VGPRs. We should be cleaning up these unused

  // SGPR spill frame indices somewhere.


  // FIXME: We still have implicit uses on SGPR spill instructions in case they

  // need to spill to vector memory. It's likely that will not happen, but at

  // this point it appears we need the setup. This part of the prolog should be

  // emitted after frame indices are eliminated.


  // FIXME: Remove all of the isPhysRegUsed checks


  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  const Function &F = MF.getFunction();

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();


  assert(MFI->isEntryFunction());


  // Debug location must be unknown since the first debug location is used to

  // determine the end of the prologue.

  DebugLoc DL;

  MachineBasicBlock::iterator I = MBB.begin();


  if (MF.needsFrameMoves()) {

    // On entry the SP/FP are not set up, so we need to define the CFA in terms

    // of a literal location expression.

    static const char CFAEncodedInstUserOpsArr[] = {

        dwarf::DW_CFA_def_cfa_expression,

        4, // length

        static_cast<char>(dwarf::DW_OP_lit0),

        static_cast<char>(dwarf::DW_OP_lit0 +

                          dwarf::DW_ASPACE_LLVM_AMDGPU_private_wave),

        static_cast<char>(dwarf::DW_OP_LLVM_user),

        static_cast<char>(dwarf::DW_OP_LLVM_form_aspace_address)};

    static StringRef CFAEncodedInstUserOps =

        StringRef(CFAEncodedInstUserOpsArr, sizeof(CFAEncodedInstUserOpsArr));

    buildCFI(MBB, I, DL,

             MCCFIInstruction::createEscape(nullptr, CFAEncodedInstUserOps,

                                            SMLoc(),

                                            "CFA is 0 in private_wave aspace"));

    // Unwinding halts when the return address (PC) is undefined.

    buildCFI(MBB, I, DL,

             MCCFIInstruction::createUndefined(

                 nullptr, TRI->getDwarfRegNum(AMDGPU::PC_REG, false)));

  }


  Register PreloadedScratchWaveOffsetReg = MFI->getPreloadedReg(

      AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);


  // We need to do the replacement of the private segment buffer register even

  // if there are no stack objects. There could be stores to undef or a

  // constant without an associated object.

  //

  // This will return `Register()` in cases where there are no actual

  // uses of the SRSRC.

  Register ScratchRsrcReg;

  if (!ST.hasFlatScratchEnabled())

    ScratchRsrcReg = getEntryFunctionReservedScratchRsrcReg(MF);


  // Make the selected register live throughout the function.

  if (ScratchRsrcReg) {

    for (MachineBasicBlock &OtherBB : MF) {

      if (&OtherBB != &MBB) {

        OtherBB.addLiveIn(ScratchRsrcReg);

      }

    }

  }


  // Now that we have fixed the reserved SRSRC we need to locate the

  // (potentially) preloaded SRSRC.

  Register PreloadedScratchRsrcReg;

  if (ST.isAmdHsaOrMesa(F)) {

    PreloadedScratchRsrcReg =

        MFI->getPreloadedReg(AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER);

    if (ScratchRsrcReg && PreloadedScratchRsrcReg) {

      // We added live-ins during argument lowering, but since they were not

      // used they were deleted. We're adding the uses now, so add them back.

      MRI.addLiveIn(PreloadedScratchRsrcReg);

      MBB.addLiveIn(PreloadedScratchRsrcReg);

    }

  }


  // We found the SRSRC first because it needs four registers and has an

  // alignment requirement. If the SRSRC that we found is clobbering with

  // the scratch wave offset, which may be in a fixed SGPR or a free SGPR

  // chosen by SITargetLowering::allocateSystemSGPRs, COPY the scratch

  // wave offset to a free SGPR.

  Register ScratchWaveOffsetReg;

  if (PreloadedScratchWaveOffsetReg &&

      TRI->isSubRegisterEq(ScratchRsrcReg, PreloadedScratchWaveOffsetReg)) {

    ArrayRef<MCPhysReg> AllSGPRs = TRI->getAllSGPR32(MF);

    unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();

    AllSGPRs = AllSGPRs.slice(

        std::min(static_cast<unsigned>(AllSGPRs.size()), NumPreloaded));

    Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);

    for (MCPhysReg Reg : AllSGPRs) {

      if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&

          !TRI->isSubRegisterEq(ScratchRsrcReg, Reg) && GITPtrLoReg != Reg) {

        ScratchWaveOffsetReg = Reg;

        BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)

            .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);

        break;

      }

    }


    // FIXME: We can spill incoming arguments and restore at the end of the

    // prolog.

    if (!ScratchWaveOffsetReg)

      report_fatal_error(

          "could not find temporary scratch offset register in prolog");

  } else {

    ScratchWaveOffsetReg = PreloadedScratchWaveOffsetReg;

  }

  assert(ScratchWaveOffsetReg || !PreloadedScratchWaveOffsetReg);


  unsigned Offset = FrameInfo.getStackSize() * getScratchScaleFactor(ST);

  if (!mayReserveScratchForCWSR(MF)) {

    if (hasFP(MF)) {

      Register FPReg = MFI->getFrameOffsetReg();

      assert(FPReg != AMDGPU::FP_REG);

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), FPReg).addImm(0);

    }


    if (requiresStackPointerReference(MF)) {

      Register SPReg = MFI->getStackPtrOffsetReg();

      assert(SPReg != AMDGPU::SP_REG);

      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);

    }

  } else {

    // We need to check if we're on a compute queue - if we are, then the CWSR

    // trap handler may need to store some VGPRs on the stack. The first VGPR

    // block is saved separately, so we only need to allocate space for any

    // additional VGPR blocks used. For now, we will make sure there's enough

    // room for the theoretical maximum number of VGPRs that can be allocated.

    // FIXME: Figure out if the shader uses fewer VGPRs in practice.

    assert(hasFP(MF));

    Register FPReg = MFI->getFrameOffsetReg();

    assert(FPReg != AMDGPU::FP_REG);

    unsigned VGPRSize = llvm::alignTo(

        (ST.getAddressableNumVGPRs(MFI->getDynamicVGPRBlockSize()) -

         AMDGPU::IsaInfo::getVGPRAllocGranule(ST,

                                              MFI->getDynamicVGPRBlockSize())) *

            4,

        FrameInfo.getMaxAlign());

    MFI->setScratchReservedForDynamicVGPRs(VGPRSize);


    BuildMI(MBB, I, DL, TII->get(AMDGPU::GET_STACK_BASE), FPReg);

    if (requiresStackPointerReference(MF)) {

      Register SPReg = MFI->getStackPtrOffsetReg();

      assert(SPReg != AMDGPU::SP_REG);


      // If at least one of the constants can be inlined, then we can use

      // s_cselect. Otherwise, use a mov and cmovk.

      if (AMDGPU::isInlinableLiteral32(Offset, ST.hasInv2PiInlineImm()) ||

          AMDGPU::isInlinableLiteral32(Offset + VGPRSize,

                                       ST.hasInv2PiInlineImm())) {

        BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CSELECT_B32), SPReg)

            .addImm(Offset + VGPRSize)

            .addImm(Offset);

      } else {

        BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), SPReg).addImm(Offset);

        BuildMI(MBB, I, DL, TII->get(AMDGPU::S_CMOVK_I32), SPReg)

            .addImm(Offset + VGPRSize);

      }

    }

  }


  bool NeedsFlatScratchInit =

      MFI->getUserSGPRInfo().hasFlatScratchInit() &&

      (MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||

       (!allStackObjectsAreDead(FrameInfo) && ST.hasFlatScratchEnabled()));


  if ((NeedsFlatScratchInit || ScratchRsrcReg) &&

      PreloadedScratchWaveOffsetReg && !ST.hasArchitectedFlatScratch()) {

    MRI.addLiveIn(PreloadedScratchWaveOffsetReg);

    MBB.addLiveIn(PreloadedScratchWaveOffsetReg);

  }


  if (NeedsFlatScratchInit) {

    emitEntryFunctionFlatScratchInit(MF, MBB, I, DL, ScratchWaveOffsetReg);

  }


  if (ScratchRsrcReg) {

    emitEntryFunctionScratchRsrcRegSetup(MF, MBB, I, DL,

                                         PreloadedScratchRsrcReg,

                                         ScratchRsrcReg, ScratchWaveOffsetReg);

  }


  if (ST.hasWaitXcnt()) {

    // Set REPLAY_MODE (bit 25) in MODE register to enable multi-group XNACK

    // replay. This aligns hardware behavior with the compiler's s_wait_xcnt

    // insertion logic, which assumes multi-group mode by default.

    unsigned RegEncoding =

        AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 25, 1);

    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SETREG_IMM32_B32))

        .addImm(1)

        .addImm(RegEncoding);

  }

}


// Emit scratch RSRC setup code, assuming `ScratchRsrcReg != AMDGPU::NoReg`

void SIFrameLowering::emitEntryFunctionScratchRsrcRegSetup(

    MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I,

    const DebugLoc &DL, Register PreloadedScratchRsrcReg,

    Register ScratchRsrcReg, Register ScratchWaveOffsetReg) const {


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = &TII->getRegisterInfo();

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const Function &Fn = MF.getFunction();


  if (ST.isAmdPalOS()) {

    // The pointer to the GIT is formed from the offset passed in and either

    // the amdgpu-git-ptr-high function attribute or the top part of the PC

    Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);

    Register Rsrc03 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);


    buildGitPtr(MBB, I, DL, TII, Rsrc01);


    // We now have the GIT ptr - now get the scratch descriptor from the entry

    // at offset 0 (or offset 16 for a compute shader).

    MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

    const MCInstrDesc &LoadDwordX4 = TII->get(AMDGPU::S_LOAD_DWORDX4_IMM);

    auto *MMO = MF.getMachineMemOperand(

        PtrInfo,

        MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |

            MachineMemOperand::MODereferenceable,

        16, Align(4));

    unsigned Offset = Fn.getCallingConv() == CallingConv::AMDGPU_CS ? 16 : 0;

    const GCNSubtarget &Subtarget = MF.getSubtarget<GCNSubtarget>();

    unsigned EncodedOffset = AMDGPU::convertSMRDOffsetUnits(Subtarget, Offset);

    BuildMI(MBB, I, DL, LoadDwordX4, ScratchRsrcReg)

      .addReg(Rsrc01)

      .addImm(EncodedOffset) // offset

      .addImm(0) // cpol

      .addReg(ScratchRsrcReg, RegState::ImplicitDefine)

      .addMemOperand(MMO);


    // The driver will always set the SRD for wave 64 (bits 118:117 of

    // descriptor / bits 22:21 of third sub-reg will be 0b11)

    // If the shader is actually wave32 we have to modify the const_index_stride

    // field of the descriptor 3rd sub-reg (bits 22:21) to 0b10 (stride=32). The

    // reason the driver does this is that there can be cases where it presents

    // 2 shaders with different wave size (e.g. VsFs).

    // TODO: convert to using SCRATCH instructions or multiple SRD buffers

    if (ST.isWave32()) {

      const MCInstrDesc &SBitsetB32 = TII->get(AMDGPU::S_BITSET0_B32);

      BuildMI(MBB, I, DL, SBitsetB32, Rsrc03)

          .addImm(21)

          .addReg(Rsrc03);

    }

  } else if (ST.isMesaGfxShader(Fn) || !PreloadedScratchRsrcReg) {

    assert(!ST.isAmdHsaOrMesa(Fn));

    const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);


    Register Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);

    Register Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);


    // Use relocations to get the pointer, and setup the other bits manually.

    uint64_t Rsrc23 = TII->getScratchRsrcWords23();


    if (MFI->getUserSGPRInfo().hasImplicitBufferPtr()) {

      Register Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);


      if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {

        const MCInstrDesc &Mov64 = TII->get(AMDGPU::S_MOV_B64);


        BuildMI(MBB, I, DL, Mov64, Rsrc01)

          .addReg(MFI->getImplicitBufferPtrUserSGPR())

          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

      } else {

        const MCInstrDesc &LoadDwordX2 = TII->get(AMDGPU::S_LOAD_DWORDX2_IMM);


        MachinePointerInfo PtrInfo(AMDGPUAS::CONSTANT_ADDRESS);

        auto *MMO = MF.getMachineMemOperand(

            PtrInfo,

            MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |

                MachineMemOperand::MODereferenceable,

            8, Align(4));

        BuildMI(MBB, I, DL, LoadDwordX2, Rsrc01)

          .addReg(MFI->getImplicitBufferPtrUserSGPR())

          .addImm(0) // offset

          .addImm(0) // cpol

          .addMemOperand(MMO)

          .addReg(ScratchRsrcReg, RegState::ImplicitDefine);


        MF.getRegInfo().addLiveIn(MFI->getImplicitBufferPtrUserSGPR());

        MBB.addLiveIn(MFI->getImplicitBufferPtrUserSGPR());

      }

    } else {

      Register Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

      Register Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);


      BuildMI(MBB, I, DL, SMovB32, Rsrc0)

        .addExternalSymbol("SCRATCH_RSRC_DWORD0")

        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);


      BuildMI(MBB, I, DL, SMovB32, Rsrc1)

        .addExternalSymbol("SCRATCH_RSRC_DWORD1")

        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

    }


    BuildMI(MBB, I, DL, SMovB32, Rsrc2)

        .addImm(Lo_32(Rsrc23))

        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);


    BuildMI(MBB, I, DL, SMovB32, Rsrc3)

        .addImm(Hi_32(Rsrc23))

        .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

  } else if (ST.isAmdHsaOrMesa(Fn)) {

    assert(PreloadedScratchRsrcReg);


    if (ScratchRsrcReg != PreloadedScratchRsrcReg) {

      BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)

          .addReg(PreloadedScratchRsrcReg, RegState::Kill);

    }

  }


  // Add the scratch wave offset into the scratch RSRC.

  //

  // We only want to update the first 48 bits, which is the base address

  // pointer, without touching the adjacent 16 bits of flags. We know this add

  // cannot carry-out from bit 47, otherwise the scratch allocation would be

  // impossible to fit in the 48-bit global address space.

  //

  // TODO: Evaluate if it is better to just construct an SRD using the flat

  // scratch init and some constants rather than update the one we are passed.

  Register ScratchRsrcSub0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);

  Register ScratchRsrcSub1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);


  // We cannot Kill ScratchWaveOffsetReg here because we allow it to be used in

  // the kernel body via inreg arguments.

  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), ScratchRsrcSub0)

      .addReg(ScratchRsrcSub0)

      .addReg(ScratchWaveOffsetReg)

      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

  auto Addc = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), ScratchRsrcSub1)

      .addReg(ScratchRsrcSub1)

      .addImm(0)

      .addReg(ScratchRsrcReg, RegState::ImplicitDefine);

  Addc->getOperand(3).setIsDead(); // Mark SCC as dead.

}


bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {

  switch (ID) {

  case TargetStackID::Default:

  case TargetStackID::NoAlloc:

  case TargetStackID::SGPRSpill:

    return true;

  case TargetStackID::ScalableVector:

  case TargetStackID::ScalablePredicateVector:

  case TargetStackID::WasmLocal:

    return false;

  }

  llvm_unreachable("Invalid TargetStackID::Value");

}


void SIFrameLowering::emitPrologueEntryCFI(MachineBasicBlock &MBB,

                                           MachineBasicBlock::iterator MBBI,

                                           const DebugLoc &DL) const {

  const MachineFunction &MF = *MBB.getParent();

  const MachineRegisterInfo &MRI = MF.getRegInfo();

  const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo &TRI = ST.getInstrInfo()->getRegisterInfo();

  MCRegister StackPtrReg =

      MF.getInfo<SIMachineFunctionInfo>()->getStackPtrOffsetReg();


  emitDefCFA(MBB, MBBI, DL, StackPtrReg, /*AspaceAlreadyDefined=*/true,

             MachineInstr::FrameSetup);


  buildCFIForRegToSGPRPairSpill(MBB, MBBI, DL, AMDGPU::PC_REG,

                                TRI.getReturnAddressReg(MF));


  BitVector IsCalleeSaved(TRI.getNumRegs());

  const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();

  for (unsigned I = 0; CSRegs[I]; ++I) {

    IsCalleeSaved.set(CSRegs[I]);

  }

  auto ProcessReg = [&](MCPhysReg Reg) {

    // VCC is not preserved across calls.

    if (Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI)

      return;

    if (IsCalleeSaved.test(Reg) || !MRI.isPhysRegModified(Reg))

      return;

    MCRegister DwarfReg = MCRI->getDwarfRegNum(Reg, false);

    buildCFI(MBB, MBBI, DL,

             MCCFIInstruction::createUndefined(nullptr, DwarfReg));

  };


  // Emit CFI rules for caller saved Arch VGPRs which are clobbered

  unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;

  for_each(AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs),

           ProcessReg);


  // Emit CFI rules for caller saved Accum VGPRs which are clobbered

  if (ST.hasMAIInsts()) {

    for_each(AMDGPU::AGPR_32RegClass.getRegisters(), ProcessReg);

  }


  // Emit CFI rules for caller saved SGPRs which are clobbered

  for_each(AMDGPU::SGPR_32RegClass.getRegisters(), ProcessReg);

}


// Activate only the inactive lanes when \p EnableInactiveLanes is true.

// Otherwise, activate all lanes. It returns the saved exec.


static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,

                                     MachineFunction &MF,

                                     MachineBasicBlock &MBB,

                                     MachineBasicBlock::iterator MBBI,

                                     const DebugLoc &DL, bool IsProlog,

                                     bool EnableInactiveLanes) {

  Register ScratchExecCopy;

  MachineRegisterInfo &MRI = MF.getRegInfo();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();


  initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);


  if (FuncInfo->isWholeWaveFunction()) {

    // Whole wave functions already have a copy of the original EXEC mask that

    // we can use.

    assert(IsProlog && "Epilog should look at return, not setup");

    ScratchExecCopy =

        TII->getWholeWaveFunctionSetup(MF)->getOperand(0).getReg();

    assert(ScratchExecCopy && "Couldn't find copy of EXEC");

  } else {

    ScratchExecCopy = findScratchNonCalleeSaveRegister(

        MRI, LiveUnits, *TRI.getWaveMaskRegClass());

  }


  if (!ScratchExecCopy)

    report_fatal_error("failed to find free scratch register");


  LiveUnits.addReg(ScratchExecCopy);


  const unsigned SaveExecOpc =

      ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32

                                           : AMDGPU::S_OR_SAVEEXEC_B32)

                    : (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B64

                                           : AMDGPU::S_OR_SAVEEXEC_B64);

  auto SaveExec =

      BuildMI(MBB, MBBI, DL, TII->get(SaveExecOpc), ScratchExecCopy).addImm(-1);

  SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.


  return ScratchExecCopy;

}


void SIFrameLowering::emitCSRSpillStores(

    MachineFunction &MF, MachineBasicBlock &MBB,

    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,

    LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy,

    const bool NeedsFrameMoves) const {

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  const MCRegisterInfo *MCRI = MF.getContext().getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST);


  // Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch

  // registers. However, save all lanes of callee-saved VGPRs. Due to this, we

  // might end up flipping the EXEC bits twice.

  Register ScratchExecCopy;

  SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;

  FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);

  if (!WWMScratchRegs.empty())

    ScratchExecCopy =

        buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

                             /*IsProlog*/ true, /*EnableInactiveLanes*/ true);


  auto StoreWWMRegisters =

      [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {

        for (const auto &Reg : WWMRegs) {

          Register VGPR = Reg.first;

          int FI = Reg.second;

          buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,

                           VGPR, FI, FrameReg);

          if (NeedsFrameMoves) {

            // We spill the entire VGPR, so we can get away with just cfi_offset

            buildCFI(MBB, MBBI, DL,

                     MCCFIInstruction::createOffset(

                         nullptr, MCRI->getDwarfRegNum(VGPR, false),

                         MFI.getObjectOffset(FI) * ST.getWavefrontSize()));

          }

        }

      };


  for (const Register Reg : make_first_range(WWMScratchRegs)) {

    if (!MRI.isReserved(Reg)) {

      MRI.addLiveIn(Reg);

      MBB.addLiveIn(Reg);

    }

  }

  StoreWWMRegisters(WWMScratchRegs);


  auto EnableAllLanes = [&]() {

    BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);

  };


  if (!WWMCalleeSavedRegs.empty()) {

    if (ScratchExecCopy) {

      EnableAllLanes();

    } else {

      ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

                                             /*IsProlog*/ true,

                                             /*EnableInactiveLanes*/ false);

    }

  }


  StoreWWMRegisters(WWMCalleeSavedRegs);

  if (FuncInfo->isWholeWaveFunction()) {

    // If we have already saved some WWM CSR registers, then the EXEC is already

    // -1 and we don't need to do anything else. Otherwise, set EXEC to -1 here.

    if (!ScratchExecCopy)

      buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL, /*IsProlog*/ true,

                           /*EnableInactiveLanes*/ true);

    else if (WWMCalleeSavedRegs.empty())

      EnableAllLanes();

  } else if (ScratchExecCopy) {

    // FIXME: Split block and make terminator.

    BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)

        .addReg(ScratchExecCopy, RegState::Kill);

    LiveUnits.addReg(ScratchExecCopy);

  }


  Register FramePtrReg = FuncInfo->getFrameOffsetReg();


  for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {

    // Special handle FP spill:

    // Skip if FP is saved to a scratch SGPR, the save has already been emitted.

    // Otherwise, FP has been moved to a temporary register and spill it

    // instead.

    bool IsFramePtrPrologSpill = Spill.first == FramePtrReg;

    Register Reg = IsFramePtrPrologSpill ? FramePtrRegScratchCopy : Spill.first;

    if (!Reg)

      continue;


    PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,

                                    LiveUnits, FrameReg, IsFramePtrPrologSpill);

    SB.save();

  }


  // If a copy to scratch SGPR has been chosen for any of the SGPR spills, make

  // such scratch registers live throughout the function.

  SmallVector<Register, 1> ScratchSGPRs;

  FuncInfo->getAllScratchSGPRCopyDstRegs(ScratchSGPRs);

  if (!ScratchSGPRs.empty()) {

    for (MachineBasicBlock &MBB : MF) {

      for (MCPhysReg Reg : ScratchSGPRs)

        MBB.addLiveIn(Reg);


      MBB.sortUniqueLiveIns();

    }

    if (!LiveUnits.empty()) {

      for (MCPhysReg Reg : ScratchSGPRs)

        LiveUnits.addReg(Reg);

    }

  }


  // Remove the spill entry created for EXEC. It is needed only for CFISaves in

  // the prologue.

  if (TRI.isCFISavedRegsSpillEnabled())

    FuncInfo->removePrologEpilogSGPRSpillEntry(TRI.getExec());

}


void SIFrameLowering::emitCSRSpillRestores(

    MachineFunction &MF, MachineBasicBlock &MBB,

    MachineBasicBlock::iterator MBBI, const DebugLoc &DL,

    LiveRegUnits &LiveUnits, Register FrameReg,

    Register FramePtrRegScratchCopy) const {

  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST);

  Register FramePtrReg = FuncInfo->getFrameOffsetReg();


  for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills()) {

    // Special handle FP restore:

    // Skip if FP needs to be restored from the scratch SGPR. Otherwise, restore

    // the FP value to a temporary register. The frame pointer should be

    // overwritten only at the end when all other spills are restored from

    // current frame.

    Register Reg =

        Spill.first == FramePtrReg ? FramePtrRegScratchCopy : Spill.first;

    if (!Reg)

      continue;


    PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,

                                    LiveUnits, FrameReg);

    SB.restore();

  }


  // Restore Whole-Wave Mode VGPRs. Restore only the inactive lanes of the

  // scratch registers. However, restore all lanes of callee-saved VGPRs. Due to

  // this, we might end up flipping the EXEC bits twice.

  Register ScratchExecCopy;

  SmallVector<std::pair<Register, int>, 2> WWMCalleeSavedRegs, WWMScratchRegs;

  FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);

  auto RestoreWWMRegisters =

      [&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {

        for (const auto &Reg : WWMRegs) {

          Register VGPR = Reg.first;

          int FI = Reg.second;

          buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,

                             VGPR, FI, FrameReg);

        }

      };


  if (FuncInfo->isWholeWaveFunction()) {

    // For whole wave functions, the EXEC is already -1 at this point.

    // Therefore, we can restore the CSR WWM registers right away.

    RestoreWWMRegisters(WWMCalleeSavedRegs);


    // The original EXEC is the first operand of the return instruction.

    MachineInstr &Return = MBB.instr_back();

    unsigned Opcode = Return.getOpcode();

    switch (Opcode) {

    case AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN:

      Opcode = AMDGPU::SI_RETURN;

      break;

    case AMDGPU::SI_TCRETURN_GFX_WholeWave:

      Opcode = AMDGPU::SI_TCRETURN_GFX;

      break;

    default:

      llvm_unreachable("Unexpected return inst");

    }

    Register OrigExec = Return.getOperand(0).getReg();


    if (!WWMScratchRegs.empty()) {

      BuildMI(MBB, MBBI, DL, TII->get(LMC.XorOpc), LMC.ExecReg)

          .addReg(OrigExec)

          .addImm(-1);

      RestoreWWMRegisters(WWMScratchRegs);

    }


    // Restore original EXEC.

    BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addReg(OrigExec);


    // Drop the first operand and update the opcode.

    Return.removeOperand(0);

    Return.setDesc(TII->get(Opcode));


    return;

  }


  if (!WWMScratchRegs.empty()) {

    ScratchExecCopy =

        buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

                             /*IsProlog=*/false, /*EnableInactiveLanes=*/true);

  }

  RestoreWWMRegisters(WWMScratchRegs);

  if (!WWMCalleeSavedRegs.empty()) {

    if (ScratchExecCopy) {

      BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg).addImm(-1);

    } else {

      ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,

                                             /*IsProlog*/ false,

                                             /*EnableInactiveLanes*/ false);

    }

  }


  RestoreWWMRegisters(WWMCalleeSavedRegs);

  if (ScratchExecCopy) {

    // FIXME: Split block and make terminator.

    BuildMI(MBB, MBBI, DL, TII->get(LMC.MovOpc), LMC.ExecReg)

        .addReg(ScratchExecCopy, RegState::Kill);

  }

}


void SIFrameLowering::emitPrologue(MachineFunction &MF,

                                   MachineBasicBlock &MBB) const {

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  if (FuncInfo->isEntryFunction()) {

    emitEntryFunctionPrologue(MF, MBB);

    return;

  }


  MachineFrameInfo &MFI = MF.getFrameInfo();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();


  Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();

  Register FramePtrReg = FuncInfo->getFrameOffsetReg();

  Register BasePtrReg =

      TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();

  LiveRegUnits LiveUnits;


  MachineBasicBlock::iterator MBBI = MBB.begin();

  // DebugLoc must be unknown since the first instruction with DebugLoc is used

  // to determine the end of the prologue.

  DebugLoc DL;


  bool HasFP = false;

  bool HasBP = false;

  uint32_t NumBytes = MFI.getStackSize();

  uint32_t RoundedSize = NumBytes;


  // Functions that never return don't need to save and restore the FP or BP.

  const Function &F = MF.getFunction();

  bool SavesStackRegs =

      !F.hasFnAttribute(Attribute::NoReturn) && !FuncInfo->isChainFunction();


  const bool NeedsFrameMoves = MF.needsFrameMoves();


  if (NeedsFrameMoves)

    emitPrologueEntryCFI(MBB, MBBI, DL);


  if (TRI.hasStackRealignment(MF))

    HasFP = true;


  Register FramePtrRegScratchCopy;

  if (!HasFP && !hasFP(MF)) {

    // Emit the CSR spill stores with SP base register.

    emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,

                       FramePtrRegScratchCopy, NeedsFrameMoves);

  } else if (SavesStackRegs) {

    // CSR spill stores will use FP as base register.

    Register SGPRForFPSaveRestoreCopy =

        FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);


    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);

    if (SGPRForFPSaveRestoreCopy) {

      // Copy FP to the scratch register now and emit the CFI entry. It avoids

      // the extra FP copy needed in the other two cases when FP is spilled to

      // memory or to a VGPR lane.

      PrologEpilogSGPRSpillBuilder SB(

          FramePtrReg,

          FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,

          DL, TII, TRI, LiveUnits, FramePtrReg,

          /*IsFramePtrPrologSpill*/ true);

      SB.save();

      LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

    } else {

      // Copy FP into a new scratch register so that its previous value can be

      // spilled after setting up the new frame.

      FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(

          MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

      if (!FramePtrRegScratchCopy)

        report_fatal_error("failed to find free scratch register");


      LiveUnits.addReg(FramePtrRegScratchCopy);

      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)

          .addReg(FramePtrReg);

    }

  }


  if (HasFP) {

    const unsigned Alignment = MFI.getMaxAlign().value();


    RoundedSize += Alignment;

    if (LiveUnits.empty()) {

      LiveUnits.init(TRI);

      LiveUnits.addLiveIns(MBB);

    }


    // s_add_i32 s33, s32, NumBytes

    // s_and_b32 s33, s33, 0b111...0000

    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), FramePtrReg)

        .addReg(StackPtrReg)

        .addImm((Alignment - 1) * getScratchScaleFactor(ST))

        .setMIFlag(MachineInstr::FrameSetup);

    auto And = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_AND_B32), FramePtrReg)

        .addReg(FramePtrReg, RegState::Kill)

        .addImm(-Alignment * getScratchScaleFactor(ST))

        .setMIFlag(MachineInstr::FrameSetup);

    And->getOperand(3).setIsDead(); // Mark SCC as dead.

    FuncInfo->setIsStackRealigned(true);

  } else if ((HasFP = hasFP(MF))) {

    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)

        .addReg(StackPtrReg)

        .setMIFlag(MachineInstr::FrameSetup);

  }


  // If FP is used, emit the CSR spills with FP base register.

  if (HasFP) {

    emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,

                       FramePtrRegScratchCopy, NeedsFrameMoves);

    if (FramePtrRegScratchCopy)

      LiveUnits.removeReg(FramePtrRegScratchCopy);

  }


  // If we need a base pointer, set it up here. It's whatever the value of

  // the stack pointer is at this point. Any variable size objects will be

  // allocated after this, so we can still use the base pointer to reference

  // the incoming arguments.

  if ((HasBP = TRI.hasBasePointer(MF))) {

    BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), BasePtrReg)

        .addReg(StackPtrReg)

        .setMIFlag(MachineInstr::FrameSetup);

  }


  if (HasFP) {

    if (NeedsFrameMoves)

      emitDefCFA(MBB, MBBI, DL, FramePtrReg, /*AspaceAlreadyDefined=*/false,

                 MachineInstr::FrameSetup);

  }


  if (HasFP && RoundedSize != 0) {

    auto Add = BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::S_ADD_I32), StackPtrReg)

        .addReg(StackPtrReg)

        .addImm(RoundedSize * getScratchScaleFactor(ST))

        .setMIFlag(MachineInstr::FrameSetup);

    Add->getOperand(3).setIsDead(); // Mark SCC as dead.

  }


  bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);

  (void)FPSaved;

  assert((!HasFP || FPSaved || !SavesStackRegs) &&

         "Needed to save FP but didn't save it anywhere");


  // If we allow spilling to AGPRs we may have saved FP but then spill

  // everything into AGPRs instead of the stack.

  assert((HasFP || !FPSaved || !SavesStackRegs || EnableSpillVGPRToAGPR) &&

         "Saved FP but didn't need it");


  bool BPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(BasePtrReg);

  (void)BPSaved;

  assert((!HasBP || BPSaved || !SavesStackRegs) &&

         "Needed to save BP but didn't save it anywhere");


  assert((HasBP || !BPSaved) && "Saved BP but didn't need it");


  if (FuncInfo->isWholeWaveFunction()) {

    // SI_WHOLE_WAVE_FUNC_SETUP has outlived its purpose.

    TII->getWholeWaveFunctionSetup(MF)->eraseFromParent();

  }

}


void SIFrameLowering::emitEpilogue(MachineFunction &MF,

                                   MachineBasicBlock &MBB) const {

  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  if (FuncInfo->isEntryFunction())

    return;


  const MachineFrameInfo &MFI = MF.getFrameInfo();

  if (FuncInfo->isChainFunction() && !MFI.hasTailCall())

    return;


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo &TRI = TII->getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  LiveRegUnits LiveUnits;

  // Get the insert location for the epilogue. If there were no terminators in

  // the block, get the last instruction.

  MachineBasicBlock::iterator MBBI = MBB.end();

  DebugLoc DL;

  if (!MBB.empty()) {

    MBBI = MBB.getLastNonDebugInstr();

    if (MBBI != MBB.end())

      DL = MBBI->getDebugLoc();


    MBBI = MBB.getFirstTerminator();

  }


  uint32_t NumBytes = MFI.getStackSize();

  uint32_t RoundedSize = FuncInfo->isStackRealigned()

                             ? NumBytes + MFI.getMaxAlign().value()

                             : NumBytes;

  const Register StackPtrReg = FuncInfo->getStackPtrOffsetReg();

  Register FramePtrReg = FuncInfo->getFrameOffsetReg();

  bool FPSaved = FuncInfo->hasPrologEpilogSGPRSpillEntry(FramePtrReg);


  if (RoundedSize != 0) {

    if (TRI.hasBasePointer(MF)) {

      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)

          .addReg(TRI.getBaseRegister())

          .setMIFlag(MachineInstr::FrameDestroy);

    } else if (hasFP(MF)) {

      BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), StackPtrReg)

          .addReg(FramePtrReg)

          .setMIFlag(MachineInstr::FrameDestroy);

    }

  }


  Register FramePtrRegScratchCopy;

  Register SGPRForFPSaveRestoreCopy =

      FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);

  if (FPSaved) {

    // CSR spill restores should use FP as base register. If

    // SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP

    // into a new scratch register and copy to FP later when other registers are

    // restored from the current stack frame.

    initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);

    if (SGPRForFPSaveRestoreCopy) {

      LiveUnits.addReg(SGPRForFPSaveRestoreCopy);

    } else {

      FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(

          MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);

      if (!FramePtrRegScratchCopy)

        report_fatal_error("failed to find free scratch register");


      LiveUnits.addReg(FramePtrRegScratchCopy);

    }


    emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,

                         FramePtrRegScratchCopy);

  }


  if (hasFP(MF) && MF.needsFrameMoves()) {

    emitDefCFA(MBB, MBBI, DL, StackPtrReg, /*AspaceAlreadyDefined=*/false,

               MachineInstr::FrameDestroy);

  }


  if (FPSaved) {

    // Insert the copy to restore FP.

    Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy

                                               : FramePtrRegScratchCopy;

    MachineInstrBuilder MIB =

        BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrReg)

            .addReg(SrcReg);

    if (SGPRForFPSaveRestoreCopy)

      MIB.setMIFlag(MachineInstr::FrameDestroy);

  } else {

    // Insert the CSR spill restores with SP as the base register.

    emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,

                         FramePtrRegScratchCopy);

  }

}


#ifndef NDEBUG


static bool allSGPRSpillsAreDead(const MachineFunction &MF) {

  const MachineFrameInfo &MFI = MF.getFrameInfo();

  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();

       I != E; ++I) {

    if (!MFI.isDeadObjectIndex(I) &&

        MFI.getStackID(I) == TargetStackID::SGPRSpill &&

        !FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {

      return false;

    }

  }


  return true;

}


#endif


StackOffset SIFrameLowering::getFrameIndexReference(const MachineFunction &MF,

                                                    int FI,

                                                    Register &FrameReg) const {

  const SIRegisterInfo *RI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();


  FrameReg = RI->getFrameRegister(MF);

  return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI));

}


void SIFrameLowering::processFunctionBeforeFrameFinalized(

  MachineFunction &MF,

  RegScavenger *RS) const {

  MachineFrameInfo &MFI = MF.getFrameInfo();


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();


  const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()

                               && EnableSpillVGPRToAGPR;


  if (SpillVGPRToAGPR) {

    // To track the spill frame indices handled in this pass.

    BitVector SpillFIs(MFI.getObjectIndexEnd(), false);

    BitVector NonVGPRSpillFIs(MFI.getObjectIndexEnd(), false);


    bool SeenDbgInstr = false;


    for (MachineBasicBlock &MBB : MF) {

      for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {

        int FrameIndex;

        if (MI.isDebugInstr())

          SeenDbgInstr = true;


        if (TII->isVGPRSpill(MI)) {

          // Try to eliminate stack used by VGPR spills before frame

          // finalization.

          unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),

                                                     AMDGPU::OpName::vaddr);

          int FI = MI.getOperand(FIOp).getIndex();

          Register VReg =

            TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();

          if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,

                                                TRI->isAGPR(MRI, VReg))) {

            assert(RS != nullptr);

            RS->enterBasicBlockEnd(MBB);

            RS->backward(std::next(MI.getIterator()));

            TRI->eliminateFrameIndex(MI, 0, FIOp, RS);

            SpillFIs.set(FI);

            continue;

          }

        } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||

                   TII->isLoadFromStackSlot(MI, FrameIndex))

          if (!MFI.isFixedObjectIndex(FrameIndex))

            NonVGPRSpillFIs.set(FrameIndex);

      }

    }


    // Stack slot coloring may assign different objects to the same stack slot.

    // If not, then the VGPR to AGPR spill slot is dead.

    for (unsigned FI : SpillFIs.set_bits())

      if (!NonVGPRSpillFIs.test(FI))

        FuncInfo->setVGPRToAGPRSpillDead(FI);


    for (MachineBasicBlock &MBB : MF) {

      for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())

        MBB.addLiveIn(Reg);


      for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())

        MBB.addLiveIn(Reg);


      MBB.sortUniqueLiveIns();


      if (!SpillFIs.empty() && SeenDbgInstr)

        clearDebugInfoForSpillFIs(MFI, MBB, SpillFIs);

    }

  }


  // At this point we've already allocated all spilled SGPRs to VGPRs if we

  // can. Any remaining SGPR spills will go to memory, so move them back to the

  // default stack.

  bool HaveSGPRToVMemSpill =

      FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);

  assert(allSGPRSpillsAreDead(MF) &&

         "SGPR spill should have been removed in SILowerSGPRSpills");


  // FIXME: The other checks should be redundant with allStackObjectsAreDead,

  // but currently hasNonSpillStackObjects is set only from source

  // allocas. Stack temps produced from legalization are not counted currently.

  if (!allStackObjectsAreDead(MFI)) {

    assert(RS && "RegScavenger required if spilling");


    // Add an emergency spill slot

    RS->addScavengingFrameIndex(FuncInfo->getScavengeFI(MFI, *TRI));


    // If we are spilling SGPRs to memory with a large frame, we may need a

    // second VGPR emergency frame index.

    if (HaveSGPRToVMemSpill &&

        allocateScavengingFrameIndexesNearIncomingSP(MF)) {

      RS->addScavengingFrameIndex(MFI.CreateSpillStackObject(4, Align(4)));

    }

  }

}


void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(

    MachineFunction &MF, RegScavenger *RS) const {

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();


  if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {

    // On gfx908, we had initially reserved highest available VGPR for AGPR

    // copy. Now since we are done with RA, check if there exist an unused VGPR

    // which is lower than the eariler reserved VGPR before RA. If one exist,

    // use it for AGPR copy instead of one reserved before RA.

    Register VGPRForAGPRCopy = FuncInfo->getVGPRForAGPRCopy();

    Register UnusedLowVGPR =

        TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);

    if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <

                          TRI->getHWRegIndex(VGPRForAGPRCopy))) {

      // Reserve this newly identified VGPR (for AGPR copy)

      // reserved registers should already be frozen at this point

      // so we can avoid calling MRI.freezeReservedRegs and just use

      // MRI.reserveReg

      FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);

      MRI.reserveReg(UnusedLowVGPR, TRI);

    }

  }

  // We initally reserved the highest available SGPR pair for long branches

  // now, after RA, we shift down to a lower unused one if one exists

  Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();

  Register UnusedLowSGPR =

      TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);

  // If LongBranchReservedReg is null then we didn't find a long branch

  // and never reserved a register to begin with so there is nothing to

  // shift down. Then if UnusedLowSGPR is null, there isn't available lower

  // register to use so just keep the original one we set.

  if (LongBranchReservedReg && UnusedLowSGPR) {

    FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);

    MRI.reserveReg(UnusedLowSGPR, TRI);

  }

}


// The special SGPR spills like the one needed for FP, BP or any reserved

// registers delayed until frame lowering.


void SIFrameLowering::determinePrologEpilogSGPRSaves(

    MachineFunction &MF, BitVector &SavedVGPRs,

    bool NeedExecCopyReservedReg) const {

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  MachineRegisterInfo &MRI = MF.getRegInfo();

  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  LiveRegUnits LiveUnits;

  LiveUnits.init(*TRI);

  // Initially mark callee saved registers as used so we will not choose them

  // while looking for scratch SGPRs.

  const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();

  for (unsigned I = 0; CSRegs[I]; ++I)

    LiveUnits.addReg(CSRegs[I]);


  const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();


  Register ReservedRegForExecCopy = MFI->getSGPRForEXECCopy();

  if (NeedExecCopyReservedReg ||

      (ReservedRegForExecCopy &&

       MRI.isPhysRegUsed(ReservedRegForExecCopy, /*SkipRegMaskTest=*/true))) {

    MRI.reserveReg(ReservedRegForExecCopy, TRI);

    Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);

    if (UnusedScratchReg) {

      // If found any unused scratch SGPR, reserve the register itself for Exec

      // copy and there is no need for any spills in that case.

      MFI->setSGPRForEXECCopy(UnusedScratchReg);

      MRI.replaceRegWith(ReservedRegForExecCopy, UnusedScratchReg);

      LiveUnits.addReg(UnusedScratchReg);

    } else {

      // Needs spill.

      assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedRegForExecCopy) &&

             "Re-reserving spill slot for EXEC copy register");

      getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedRegForExecCopy, RC,

                                     /*IncludeScratchCopy=*/false);

    }

  } else if (ReservedRegForExecCopy) {

    // Reset it at this point. There are no whole-wave copies and spills

    // encountered.

    MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);

  }


  if (TRI->isCFISavedRegsSpillEnabled()) {

    Register Exec = TRI->getExec();

    assert(!MFI->hasPrologEpilogSGPRSpillEntry(Exec) &&

           "Re-reserving spill slot for EXEC");

    getVGPRSpillLaneOrTempRegister(MF, LiveUnits, Exec, RC);

  }


  // Functions that don't return to the caller don't need to preserve

  // the FP and BP.

  const Function &F = MF.getFunction();

  if (F.hasFnAttribute(Attribute::NoReturn) ||

      AMDGPU::isChainCC(F.getCallingConv()))

    return;


  // hasFP only knows about stack objects that already exist. We're now

  // determining the stack slots that will be created, so we have to predict

  // them. Stack objects force FP usage with calls.

  //

  // Note a new VGPR CSR may be introduced if one is used for the spill, but we

  // don't want to report it here.

  //

  // FIXME: Is this really hasReservedCallFrame?

  const bool WillHaveFP =

      FrameInfo.hasCalls() &&

      (SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));


  if (WillHaveFP || hasFP(MF)) {

    Register FramePtrReg = MFI->getFrameOffsetReg();

    assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&

           "Re-reserving spill slot for FP");

    getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);

  }


  if (TRI->hasBasePointer(MF)) {

    Register BasePtrReg = TRI->getBaseRegister();

    assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&

           "Re-reserving spill slot for BP");

    getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);

  }

}


// Only report VGPRs to generic code.


void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,

                                           BitVector &SavedVGPRs,

                                           RegScavenger *RS) const {

  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();


  // If this is a function with the amdgpu_cs_chain[_preserve] calling

  // convention and it doesn't contain any calls to llvm.amdgcn.cs.chain, then

  // we don't need to save and restore anything.

  if (MFI->isChainFunction() && !MF.getFrameInfo().hasTailCall())

    return;


  TargetFrameLowering::determineCalleeSaves(MF, SavedVGPRs, RS);


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();

  const SIInstrInfo *TII = ST.getInstrInfo();

  bool NeedExecCopyReservedReg = false;


  MachineInstr *ReturnMI = nullptr;

  for (MachineBasicBlock &MBB : MF) {

    for (MachineInstr &MI : MBB) {

      // TODO: Walking through all MBBs here would be a bad heuristic. Better

      // handle them elsewhere.

      if (TII->isWWMRegSpillOpcode(MI.getOpcode()))

        NeedExecCopyReservedReg = true;

      else if (MI.getOpcode() == AMDGPU::SI_RETURN ||

               MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||

               MI.getOpcode() == AMDGPU::SI_WHOLE_WAVE_FUNC_RETURN ||

               (MFI->isChainFunction() &&

                TII->isChainCallOpcode(MI.getOpcode()))) {

        // We expect all return to be the same size.

        assert(!ReturnMI ||

               (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==

                count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));

        ReturnMI = &MI;

      }

    }

  }


  SmallVector<Register> SortedWWMVGPRs;

  for (Register Reg : MFI->getWWMReservedRegs()) {

    // The shift-back is needed only for the VGPRs used for SGPR spills and they

    // are of 32-bit size. SIPreAllocateWWMRegs pass can add tuples into WWM

    // reserved registers.

    const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);

    if (TRI->getRegSizeInBits(*RC) != 32)

      continue;

    SortedWWMVGPRs.push_back(Reg);

  }


  sort(SortedWWMVGPRs, std::greater<Register>());

  MFI->shiftWwmVGPRsToLowestRange(MF, SortedWWMVGPRs, SavedVGPRs);


  if (MFI->isEntryFunction())

    return;


  if (MFI->isWholeWaveFunction()) {

    // In practice, all the VGPRs are WWM registers, and we will need to save at

    // least their inactive lanes. Add them to WWMReservedRegs.

    assert(!NeedExecCopyReservedReg &&

           "Whole wave functions can use the reg mapped for their i1 argument");


    unsigned NumArchVGPRs = ST.getAddressableNumArchVGPRs();

    for (MCRegister Reg :

         AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))

      if (MF.getRegInfo().isPhysRegModified(Reg)) {

        MFI->reserveWWMRegister(Reg);

        MF.begin()->addLiveIn(Reg);

      }

    MF.begin()->sortUniqueLiveIns();

  }


  // Remove any VGPRs used in the return value because these do not need to be saved.

  // This prevents CSR restore from clobbering return VGPRs.

  if (ReturnMI) {

    for (auto &Op : ReturnMI->operands()) {

      if (Op.isReg())

        SavedVGPRs.reset(Op.getReg());

    }

  }


  // Create the stack objects for WWM registers now.

  for (Register Reg : MFI->getWWMReservedRegs()) {

    const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);

    MFI->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),

                          TRI->getSpillAlign(*RC));

  }


  // Ignore the SGPRs the default implementation found.

  SavedVGPRs.clearBitsNotInMask(TRI->getAllVectorRegMask());


  // Do not save AGPRs prior to GFX90A because there was no easy way to do so.

  // In gfx908 there was do AGPR loads and stores and thus spilling also

  // require a temporary VGPR.

  if (!ST.hasGFX90AInsts())

    SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());


  determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);


  // The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't

  // allow the default insertion to handle them.

  for (auto &Reg : MFI->getWWMSpills())

    SavedVGPRs.reset(Reg.first);

}


void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,

                                               BitVector &SavedRegs,

                                               RegScavenger *RS) const {

  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  if (MFI->isEntryFunction())

    return;


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();


  // The SP is specifically managed and we don't want extra spills of it.

  SavedRegs.reset(MFI->getStackPtrOffsetReg());


  const BitVector AllSavedRegs = SavedRegs;

  SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());


  // We have to anticipate introducing CSR VGPR spills or spill of caller

  // save VGPR reserved for SGPR spills as we now always create stack entry

  // for it, if we don't have any stack objects already, since we require a FP

  // if there is a call and stack. We will allocate a VGPR for SGPR spills if

  // there are any SGPR spills. Whether they are CSR spills or otherwise.

  MachineFrameInfo &FrameInfo = MF.getFrameInfo();

  const bool WillHaveFP =

      FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());


  // FP will be specially managed like SP.

  if (WillHaveFP || hasFP(MF))

    SavedRegs.reset(MFI->getFrameOffsetReg());


  // Return address use with return instruction is hidden through the SI_RETURN

  // pseudo. Given that and since the IPRA computes actual register usage and

  // does not use CSR list, the clobbering of return address by function calls

  // (D117243) or otherwise (D120922) is ignored/not seen by the IPRA's register

  // usage collection. This will ensure save/restore of return address happens

  // in those scenarios.

  const MachineRegisterInfo &MRI = MF.getRegInfo();

  Register RetAddrReg = TRI->getReturnAddressReg(MF);

  if (!MFI->isEntryFunction() &&

      (FrameInfo.hasCalls() || MRI.isPhysRegModified(RetAddrReg))) {

    SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub0));

    SavedRegs.set(TRI->getSubReg(RetAddrReg, AMDGPU::sub1));

  }

}


static void assignSlotsUsingVGPRBlocks(MachineFunction &MF,

                                       const GCNSubtarget &ST,

                                       std::vector<CalleeSavedInfo> &CSI) {

  SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  MachineFrameInfo &MFI = MF.getFrameInfo();

  const SIRegisterInfo *TRI = ST.getRegisterInfo();


  assert(

      llvm::is_sorted(CSI,

                      [](const CalleeSavedInfo &A, const CalleeSavedInfo &B) {

                        return A.getReg() < B.getReg();

                      }) &&

      "Callee saved registers not sorted");


  auto CanUseBlockOps = [&](const CalleeSavedInfo &CSI) {

    return !CSI.isSpilledToReg() &&

           TRI->getPhysRegBaseClass(CSI.getReg()) == &AMDGPU::VGPR_32RegClass &&

           !FuncInfo->isWWMReservedRegister(CSI.getReg());

  };


  auto CSEnd = CSI.end();

  for (auto CSIt = CSI.begin(); CSIt != CSEnd; ++CSIt) {

    Register Reg = CSIt->getReg();

    if (!CanUseBlockOps(*CSIt))

      continue;


    // Find all the regs that will fit in a 32-bit mask starting at the current

    // reg and build said mask. It should have 1 for every register that's

    // included, with the current register as the least significant bit.

    uint32_t Mask = 1;

    CSEnd = std::remove_if(

        CSIt + 1, CSEnd, [&](const CalleeSavedInfo &CSI) -> bool {

          if (CanUseBlockOps(CSI) && CSI.getReg() < Reg + 32) {

            Mask |= 1 << (CSI.getReg() - Reg);

            return true;

          } else {

            return false;

          }

        });


    const TargetRegisterClass *BlockRegClass = TRI->getRegClassForBlockOp(MF);

    Register RegBlock =

        TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, BlockRegClass);

    if (!RegBlock) {

      // We couldn't find a super register for the block. This can happen if

      // the register we started with is too high (e.g. v232 if the maximum is

      // v255). We therefore try to get the last register block and figure out

      // the mask from there.

      Register LastBlockStart =

          AMDGPU::VGPR0 + alignDown(Reg - AMDGPU::VGPR0, 32);

      RegBlock =

          TRI->getMatchingSuperReg(LastBlockStart, AMDGPU::sub0, BlockRegClass);

      assert(RegBlock && TRI->isSubRegister(RegBlock, Reg) &&

             "Couldn't find super register");

      int RegDelta = Reg - LastBlockStart;

      assert(RegDelta > 0 && llvm::countl_zero(Mask) >= RegDelta &&

             "Bad shift amount");

      Mask <<= RegDelta;

    }


    FuncInfo->setMaskForVGPRBlockOps(RegBlock, Mask);


    // The stack objects can be a bit smaller than the register block if we know

    // some of the high bits of Mask are 0. This may happen often with calling

    // conventions where the caller and callee-saved VGPRs are interleaved at

    // a small boundary (e.g. 8 or 16).

    int UnusedBits = llvm::countl_zero(Mask);

    unsigned BlockSize = TRI->getSpillSize(*BlockRegClass) - UnusedBits * 4;

    int FrameIdx =

        MFI.CreateStackObject(BlockSize, TRI->getSpillAlign(*BlockRegClass),

                              /*isSpillSlot=*/true);

    MFI.setIsCalleeSavedObjectIndex(FrameIdx, true);


    CSIt->setFrameIdx(FrameIdx);

    CSIt->setReg(RegBlock);

  }

  CSI.erase(CSEnd, CSI.end());

}


bool SIFrameLowering::assignCalleeSavedSpillSlots(

    MachineFunction &MF, const TargetRegisterInfo *TRI,

    std::vector<CalleeSavedInfo> &CSI) const {

  if (CSI.empty())

    return true; // Early exit if no callee saved registers are modified!


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  bool UseVGPRBlocks = ST.useVGPRBlockOpsForCSR();


  if (UseVGPRBlocks)

    assignSlotsUsingVGPRBlocks(MF, ST, CSI);


  return assignCalleeSavedSpillSlotsImpl(MF, TRI, CSI) || UseVGPRBlocks;

}


bool SIFrameLowering::assignCalleeSavedSpillSlotsImpl(

    MachineFunction &MF, const TargetRegisterInfo *TRI,

    std::vector<CalleeSavedInfo> &CSI) const {

  if (CSI.empty())

    return true; // Early exit if no callee saved registers are modified!


  const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *RI = ST.getRegisterInfo();

  Register FramePtrReg = FuncInfo->getFrameOffsetReg();

  Register BasePtrReg = RI->getBaseRegister();

  Register SGPRForFPSaveRestoreCopy =

      FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);

  Register SGPRForBPSaveRestoreCopy =

      FuncInfo->getScratchSGPRCopyDstReg(BasePtrReg);

  if (!SGPRForFPSaveRestoreCopy && !SGPRForBPSaveRestoreCopy)

    return false;


  unsigned NumModifiedRegs = 0;


  if (SGPRForFPSaveRestoreCopy)

    NumModifiedRegs++;

  if (SGPRForBPSaveRestoreCopy)

    NumModifiedRegs++;


  for (auto &CS : CSI) {

    if (CS.getReg() == FramePtrReg.asMCReg() && SGPRForFPSaveRestoreCopy) {

      CS.setDstReg(SGPRForFPSaveRestoreCopy);

      if (--NumModifiedRegs)

        break;

    } else if (CS.getReg() == BasePtrReg.asMCReg() &&

               SGPRForBPSaveRestoreCopy) {

      CS.setDstReg(SGPRForBPSaveRestoreCopy);

      if (--NumModifiedRegs)

        break;

    }

  }


  return false;

}


bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(

  const MachineFunction &MF) const {


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const MachineFrameInfo &MFI = MF.getFrameInfo();

  const SIInstrInfo *TII = ST.getInstrInfo();

  uint64_t EstStackSize = MFI.estimateStackSize(MF);

  uint64_t MaxOffset = EstStackSize - 1;


  // We need the emergency stack slots to be allocated in range of the

  // MUBUF/flat scratch immediate offset from the base register, so assign these

  // first at the incoming SP position.

  //

  // TODO: We could try sorting the objects to find a hole in the first bytes

  // rather than allocating as close to possible. This could save a lot of space

  // on frames with alignment requirements.

  if (ST.hasFlatScratchEnabled()) {

    if (TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,

                               AMDGPU::FlatAddrSpace::FlatScratch))

      return false;

  } else {

    if (TII->isLegalMUBUFImmOffset(MaxOffset))

      return false;

  }


  return true;

}


/// Return the set of all root registers of regunits live-in to @p MBB.

///

/// Intended to avoid using the expensive @c MCRegAliasIterator when deciding

/// if a register to be spilled is already live-in (see @c isAnyRootLiveIn).


static SparseBitVector<> buildLiveInRoots(const MachineBasicBlock &MBB,

                                          const SIRegisterInfo &TRI) {

  SparseBitVector<> LiveInRoots;

  for (const auto &LI : MBB.liveins()) {

    for (MCRegUnitMaskIterator MI(LI.PhysReg, &TRI); MI.isValid(); ++MI) {

      auto [Unit, UnitLaneMask] = *MI;

      if ((LI.LaneMask & UnitLaneMask).none())

        continue;

      for (MCRegUnitRootIterator RI(Unit, &TRI); RI.isValid(); ++RI)

        LiveInRoots.set(*RI);

    }

  }

  return LiveInRoots;

}


/// Returns true iff any root of @p Reg is in @p LiveInRoots

/// (see @c buildLiveInRoots).


static bool isAnyRootLiveIn(const SparseBitVector<> &LiveInRoots,

                            const SIRegisterInfo &TRI, MCRegister Reg) {

  for (MCRegUnitIterator UI(Reg, &TRI); UI.isValid(); ++UI) {

    for (MCRegUnitRootIterator RI(*UI, &TRI); RI.isValid(); ++RI) {

      if (LiveInRoots.test(*RI))

        return true;

    }

  }

  return false;

}


void SIFrameLowering::spillCalleeSavedRegisterWithoutBlockOps(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,

    const CalleeSavedInfo &CS, const SIInstrInfo *TII,

    const SIRegisterInfo &TRI,

    const std::optional<SparseBitVector<>> &LiveInRoots) const {

  MCRegister Reg = CS.getReg();


  // We assume a sortUniqueLiveIns later

  MBB.addLiveIn(Reg);


  if (CS.isSpilledToReg()) {

    BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY), CS.getDstReg())

        .addReg(Reg, getKillRegState(true));

  } else {

    const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);

    bool IsKill = true;

    // If this value was already livein, we probably have a direct use of

    // the incoming register value, so don't kill at the spill point. This

    // happens since we pass some special inputs (workgroup IDs) in the

    // callee saved range.

    if (LiveInRoots)

      IsKill = !isAnyRootLiveIn(*LiveInRoots, TRI, Reg);

    TII->storeRegToStackSlotCFI(MBB, MI, Reg, IsKill, CS.getFrameIdx(), RC);

  }

}


bool SIFrameLowering::spillCalleeSavedRegisters(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,

    ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *OrigTRI) const {

  auto &TRI = *static_cast<const SIRegisterInfo *>(OrigTRI);

  MachineFunction *MF = MBB.getParent();

  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();


  std::optional<SparseBitVector<>> LiveInRoots;

  if (MBB.getParent()->getRegInfo().tracksLiveness())

    LiveInRoots = buildLiveInRoots(MBB, TRI);


  if (!ST.useVGPRBlockOpsForCSR()) {

    for (const CalleeSavedInfo &CS : CSI)

      spillCalleeSavedRegisterWithoutBlockOps(MBB, MI, CS, TII, TRI,

                                              LiveInRoots);

    if (LiveInRoots)

      MBB.sortUniqueLiveIns();

    return true;

  }


  MachineFrameInfo &FrameInfo = MF->getFrameInfo();

  SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();


  const TargetRegisterClass *BlockRegClass = TRI.getRegClassForBlockOp(*MF);

  for (const CalleeSavedInfo &CS : CSI) {

    Register Reg = CS.getReg();

    if (!BlockRegClass->contains(Reg) ||

        !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {

      spillCalleeSavedRegisterWithoutBlockOps(MBB, MI, CS, TII, TRI,

                                              LiveInRoots);

      continue;

    }


    // Build a scratch block store.

    uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);

    int FrameIndex = CS.getFrameIdx();

    MachinePointerInfo PtrInfo =

        MachinePointerInfo::getFixedStack(*MF, FrameIndex);

    MachineMemOperand *MMO =

        MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,

                                 FrameInfo.getObjectSize(FrameIndex),

                                 FrameInfo.getObjectAlign(FrameIndex));


    BuildMI(MBB, MI, MI->getDebugLoc(),

            TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE))

        .addReg(Reg, getKillRegState(false))

        .addFrameIndex(FrameIndex)

        .addReg(FuncInfo->getStackPtrOffsetReg())

        .addImm(0)

        .addImm(Mask)

        .addMemOperand(MMO);


    FuncInfo->setHasSpilledVGPRs();


    // Add the register to the liveins. This is necessary because if any of the

    // VGPRs in the register block is reserved (e.g. if it's a WWM register),

    // then the whole block will be marked as reserved and `updateLiveness` will

    // skip it.

    if (LiveInRoots)

      MBB.addLiveIn(Reg);

  }

  if (LiveInRoots)

    MBB.sortUniqueLiveIns();


  return true;

}


bool SIFrameLowering::restoreCalleeSavedRegisters(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,

    MutableArrayRef<CalleeSavedInfo> CSI,

    const TargetRegisterInfo *OrigTRI) const {

  auto &TRI = *static_cast<const SIRegisterInfo *>(OrigTRI);

  MachineFunction *MF = MBB.getParent();

  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();

  if (!ST.useVGPRBlockOpsForCSR())

    return false;


  SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();

  MachineFrameInfo &MFI = MF->getFrameInfo();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const TargetRegisterClass *BlockRegClass = TRI.getRegClassForBlockOp(*MF);

  for (const CalleeSavedInfo &CS : reverse(CSI)) {

    Register Reg = CS.getReg();

    if (!BlockRegClass->contains(Reg) ||

        !FuncInfo->hasMaskForVGPRBlockOps(Reg)) {

      restoreCalleeSavedRegister(MBB, MI, CS, TII, &TRI);

      continue;

    }


    // Build a scratch block load.

    uint32_t Mask = FuncInfo->getMaskForVGPRBlockOps(Reg);

    int FrameIndex = CS.getFrameIdx();

    MachinePointerInfo PtrInfo =

        MachinePointerInfo::getFixedStack(*MF, FrameIndex);

    MachineMemOperand *MMO = MF->getMachineMemOperand(

        PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),

        MFI.getObjectAlign(FrameIndex));


    auto MIB = BuildMI(MBB, MI, MI->getDebugLoc(),

                       TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_RESTORE), Reg)

                   .addFrameIndex(FrameIndex)

                   .addReg(FuncInfo->getStackPtrOffsetReg())

                   .addImm(0)

                   .addImm(Mask)

                   .addMemOperand(MMO);

    TRI.addImplicitUsesForBlockCSRLoad(MIB, Reg);


    // Add the register to the liveins. This is necessary because if any of the

    // VGPRs in the register block is reserved (e.g. if it's a WWM register),

    // then the whole block will be marked as reserved and `updateLiveness` will

    // skip it.

    MBB.addLiveIn(Reg);

  }


  MBB.sortUniqueLiveIns();

  return true;

}


MachineBasicBlock::iterator SIFrameLowering::eliminateCallFramePseudoInstr(

  MachineFunction &MF,

  MachineBasicBlock &MBB,

  MachineBasicBlock::iterator I) const {

  int64_t Amount = I->getOperand(0).getImm();

  if (Amount == 0)

    return MBB.erase(I);


  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = ST.getInstrInfo();

  const DebugLoc &DL = I->getDebugLoc();

  unsigned Opc = I->getOpcode();

  bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();

  uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;


  if (!hasReservedCallFrame(MF)) {

    Amount = alignTo(Amount, getStackAlign());

    assert(isUInt<32>(Amount) && "exceeded stack address space size");

    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

    Register SPReg = MFI->getStackPtrOffsetReg();


    Amount *= getScratchScaleFactor(ST);

    if (IsDestroy)

      Amount = -Amount;

    auto Add = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), SPReg)

        .addReg(SPReg)

        .addImm(Amount);

    Add->getOperand(3).setIsDead(); // Mark SCC as dead.

  } else if (CalleePopAmount != 0) {

    llvm_unreachable("is this used?");

  }


  return MBB.erase(I);

}


/// Returns true if the frame will require a reference to the stack pointer.

///

/// This is the set of conditions common to setting up the stack pointer in a

/// kernel, and for using a frame pointer in a callable function.

///

/// FIXME: Should also check hasOpaqueSPAdjustment and if any inline asm

/// references SP.


static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI) {

  return MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint();

}


// The FP for kernels is always known 0, so we never really need to setup an

// explicit register for it. However, DisableFramePointerElim will force us to

// use a register for it.


bool SIFrameLowering::hasFPImpl(const MachineFunction &MF) const {

  const MachineFrameInfo &MFI = MF.getFrameInfo();


  // For entry functions we can use an immediate offset in most cases,

  // so the presence of calls doesn't imply we need a distinct frame pointer.

  if (MFI.hasCalls() &&

      !MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction()) {

    // All offsets are unsigned, so need to be addressed in the same direction

    // as stack growth.


    // FIXME: This function is pretty broken, since it can be called before the

    // frame layout is determined or CSR spills are inserted.

    return MFI.getStackSize() != 0;

  }


  return frameTriviallyRequiresSP(MFI) || MFI.isFrameAddressTaken() ||

         MF.getSubtarget<GCNSubtarget>().getRegisterInfo()->hasStackRealignment(

             MF) ||

         mayReserveScratchForCWSR(MF) ||

         MF.getTarget().Options.DisableFramePointerElim(MF);

}


bool SIFrameLowering::mayReserveScratchForCWSR(

    const MachineFunction &MF) const {

  return MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&

         AMDGPU::isEntryFunctionCC(MF.getFunction().getCallingConv()) &&

         AMDGPU::isCompute(MF.getFunction().getCallingConv());

}


// This is essentially a reduced version of hasFP for entry functions. Since the

// stack pointer is known 0 on entry to kernels, we never really need an FP

// register. We may need to initialize the stack pointer depending on the frame

// properties, which logically overlaps many of the cases where an ordinary

// function would require an FP.


bool SIFrameLowering::requiresStackPointerReference(

    const MachineFunction &MF) const {

  // Callable functions always require a stack pointer reference.

  assert(MF.getInfo<SIMachineFunctionInfo>()->isEntryFunction() &&

         "only expected to call this for entry points functions");


  const MachineFrameInfo &MFI = MF.getFrameInfo();


  // Entry points ordinarily don't need to initialize SP. We have to set it up

  // for callees if there are any. Also note tail calls are only possible via

  // the `llvm.amdgcn.cs.chain` intrinsic.

  if (MFI.hasCalls() || MFI.hasTailCall())

    return true;


  // We still need to initialize the SP if we're doing anything weird that

  // references the SP, like variable sized stack objects.

  return frameTriviallyRequiresSP(MFI);

}


MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB,

                                        MachineBasicBlock::iterator MBBI,

                                        const DebugLoc &DL,

                                        const MCCFIInstruction &CFIInst,

                                        MachineInstr::MIFlag Flag) const {

  MachineFunction &MF = *MBB.getParent();

  const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();

  return BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))

      .addCFIIndex(MF.addFrameInst(CFIInst))

      .setMIFlag(Flag);

}


MachineInstr *SIFrameLowering::buildCFIForVRegToVRegSpill(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

    const DebugLoc &DL, const MCRegister Reg, const MCRegister RegCopy) const {

  MachineFunction &MF = *MBB.getParent();

  const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();


  MCRegister MaskReg = MCRI.getDwarfRegNum(

      ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);

  auto CFIInst = MCCFIInstruction::createLLVMVectorRegisterMask(

      nullptr, MCRI.getDwarfRegNum(Reg, false),

      MCRI.getDwarfRegNum(RegCopy, false), VGPRLaneBitSize, MaskReg,

      ST.getWavefrontSize());

  return buildCFI(MBB, MBBI, DL, std::move(CFIInst));

}


MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

    const DebugLoc &DL, const MCRegister SGPR, const MCRegister VGPR,

    const int Lane) const {

  const MachineFunction &MF = *MBB.getParent();

  const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();


  int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false);

  int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false);

  assert(DwarfSGPR != -1 && DwarfVGPR != -1);

  assert(Lane != -1 && "Expected a lane to be present");


  // Build a CFI instruction that represents a SGPR spilled to a single lane of

  // a VGPR.

  MCCFIInstruction::VectorRegisterWithLane VR{unsigned(DwarfVGPR),

                                              unsigned(Lane), VGPRLaneBitSize};

  auto CFIInst =

      MCCFIInstruction::createLLVMVectorRegisters(nullptr, DwarfSGPR, {VR});

  return buildCFI(MBB, MBBI, DL, std::move(CFIInst));

}


MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

    const DebugLoc &DL, MCRegister SGPR,

    ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills) const {

  if (VGPRSpills.size() == 1u)

    return buildCFIForSGPRToVGPRSpill(MBB, MBBI, DL, SGPR, VGPRSpills[0].VGPR,

                                      VGPRSpills[0].Lane);

  const MachineFunction &MF = *MBB.getParent();

  const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();


  int DwarfSGPR = MCRI.getDwarfRegNum(SGPR, false);

  assert(DwarfSGPR != -1);


  // Build a CFI instruction that represents a SGPR spilled to multiple lanes of

  // multiple VGPRs.


  SmallVector<MCCFIInstruction::VectorRegisterWithLane> VGPRs;

  for (SIRegisterInfo::SpilledReg Spill : VGPRSpills) {

    int DwarfVGPR = MCRI.getDwarfRegNum(Spill.VGPR, false);

    assert(DwarfVGPR != -1);

    assert(Spill.hasLane() && "Expected a lane to be present");

    VGPRs.push_back(

        {unsigned(DwarfVGPR), unsigned(Spill.Lane), VGPRLaneBitSize});

  }


  auto CFIInst = MCCFIInstruction::createLLVMVectorRegisters(nullptr, DwarfSGPR,

                                                             std::move(VGPRs));

  return buildCFI(MBB, MBBI, DL, std::move(CFIInst));

}


MachineInstr *SIFrameLowering::buildCFIForSGPRToVMEMSpill(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

    const DebugLoc &DL, MCRegister SGPR, int64_t Offset) const {

  MachineFunction &MF = *MBB.getParent();

  const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();

  return buildCFI(MBB, MBBI, DL,

                  llvm::MCCFIInstruction::createOffset(

                      nullptr, MCRI.getDwarfRegNum(SGPR, false), Offset));

}


MachineInstr *SIFrameLowering::buildCFIForVGPRToVMEMSpill(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

    const DebugLoc &DL, MCRegister VGPR, int64_t Offset) const {

  const MachineFunction &MF = *MBB.getParent();

  const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();


  int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false);

  assert(DwarfVGPR != -1);


  MCRegister MaskReg = MCRI.getDwarfRegNum(

      ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);

  auto CFIInst = MCCFIInstruction::createLLVMVectorOffset(

      nullptr, DwarfVGPR, VGPRLaneBitSize, MaskReg, ST.getWavefrontSize(),

      Offset);

  return buildCFI(MBB, MBBI, DL, std::move(CFIInst));

}


MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

    const DebugLoc &DL, const MCRegister Reg, const MCRegister SGPRPair) const {

  const MachineFunction &MF = *MBB.getParent();

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  const SIRegisterInfo &TRI = *ST.getRegisterInfo();


  MCRegister SGPR0 = TRI.getSubReg(SGPRPair, AMDGPU::sub0);

  MCRegister SGPR1 = TRI.getSubReg(SGPRPair, AMDGPU::sub1);


  int DwarfReg = TRI.getDwarfRegNum(Reg, false);

  int DwarfSGPR0 = TRI.getDwarfRegNum(SGPR0, false);

  int DwarfSGPR1 = TRI.getDwarfRegNum(SGPR1, false);

  assert(DwarfReg != -1 && DwarfSGPR0 != -1 && DwarfSGPR1 != -1);


  auto CFIInst = MCCFIInstruction::createLLVMRegisterPair(

      nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize);

  return buildCFI(MBB, MBBI, DL, std::move(CFIInst));

}


MachineInstr *SIFrameLowering::buildCFIForSameValue(

    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

    const DebugLoc &DL, MCRegister Reg) const {

  const MachineFunction &MF = *MBB.getParent();

  const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();

  int DwarfReg = MCRI.getDwarfRegNum(Reg, /*isEH=*/false);

  auto CFIInst = MCCFIInstruction::createSameValue(nullptr, DwarfReg);

  return buildCFI(MBB, MBBI, DL, std::move(CFIInst));

}


assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPULaneMaskUtils.h

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

MBBI
MachineBasicBlock MachineBasicBlock::iterator MBBI
Definition ARMSLSHardening.cpp:72

getParent
static const Function * getParent(const Value *V)
Definition BasicAliasAnalysis.cpp:894

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Dwarf.h
This file contains constants used for implementing Dwarf debug support.

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

LEB128.h

LiveRegUnits.h
A set of register units.

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MachineFrameInfo.h

MachineModuleInfo.h

Reg
Register Reg
Definition MachineSink.cpp:2126

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2127

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

FPReg
static constexpr MCPhysReg FPReg
Definition RISCVFrameLowering.cpp:51

SPReg
static constexpr MCPhysReg SPReg
Definition RISCVFrameLowering.cpp:54

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:77

RegisterScavenging.h
This file declares the machine register scavenger class.

buildEpilogRestore
static void buildEpilogRestore(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
Definition SIFrameLowering.cpp:232

EnableSpillVGPRToAGPR
static cl::opt< bool > EnableSpillVGPRToAGPR("amdgpu-spill-vgpr-to-agpr", cl::desc("Enable spilling VGPRs to AGPRs"), cl::ReallyHidden, cl::init(true))

getVGPRSpillLaneOrTempRegister
static void getVGPRSpillLaneOrTempRegister(MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR, const TargetRegisterClass &RC=AMDGPU::SReg_32_XM0_XEXECRegClass, bool IncludeScratchCopy=true)
Query target location for spilling SGPRs IncludeScratchCopy : Also look for free scratch SGPRs.
Definition SIFrameLowering.cpp:147

buildGitPtr
static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, const SIInstrInfo *TII, Register TargetReg)
Definition SIFrameLowering.cpp:252

allStackObjectsAreDead
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI)
Definition SIFrameLowering.cpp:678

buildPrologSpill
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI, const SIMachineFunctionInfo &FuncInfo, LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register SpillReg, int FI, Register FrameReg, int64_t DwordOff=0)
Definition SIFrameLowering.cpp:209

buildScratchExecCopy
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool IsProlog, bool EnableInactiveLanes)
Definition SIFrameLowering.cpp:1160

encodeDwarfRegisterLocation
static void encodeDwarfRegisterLocation(int DwarfReg, raw_ostream &OS)
Definition SIFrameLowering.cpp:54

SGPRBitSize
static constexpr unsigned SGPRBitSize
Definition SIFrameLowering.cpp:34

frameTriviallyRequiresSP
static bool frameTriviallyRequiresSP(const MachineFrameInfo &MFI)
Returns true if the frame will require a reference to the stack pointer.
Definition SIFrameLowering.cpp:2464

buildLiveInRoots
static SparseBitVector buildLiveInRoots(const MachineBasicBlock &MBB, const SIRegisterInfo &TRI)
Return the set of all root registers of regunits live-in to MBB.
Definition SIFrameLowering.cpp:2249

initLiveUnits
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI, const SIMachineFunctionInfo *FuncInfo, MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsProlog)
Definition SIFrameLowering.cpp:277

VGPRLaneBitSize
static constexpr unsigned VGPRLaneBitSize
Definition SIFrameLowering.cpp:36

allSGPRSpillsAreDead
static bool allSGPRSpillsAreDead(const MachineFunction &MF)
Definition SIFrameLowering.cpp:1683

findScratchNonCalleeSaveRegister
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits, const TargetRegisterClass &RC, bool Unused=false)
Definition SIFrameLowering.cpp:124

createScaledCFAInPrivateWave
static MCCFIInstruction createScaledCFAInPrivateWave(const GCNSubtarget &ST, MCRegister DwarfStackPtrReg)
Definition SIFrameLowering.cpp:65

findUnusedRegister
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI, const LiveRegUnits &LiveUnits, const TargetRegisterClass &RC)
Definition SIFrameLowering.cpp:43

SGPRByteSize
static constexpr unsigned SGPRByteSize
Definition SIFrameLowering.cpp:35

assignSlotsUsingVGPRBlocks
static void assignSlotsUsingVGPRBlocks(MachineFunction &MF, const GCNSubtarget &ST, std::vector< CalleeSavedInfo > &CSI)
Definition SIFrameLowering.cpp:2082

isAnyRootLiveIn
static bool isAnyRootLiveIn(const SparseBitVector<> &LiveInRoots, const SIRegisterInfo &TRI, MCRegister Reg)
Returns true iff any root of Reg is in LiveInRoots (see buildLiveInRoots).
Definition SIFrameLowering.cpp:2266

getScratchScaleFactor
static unsigned getScratchScaleFactor(const GCNSubtarget &ST)
Definition SIFrameLowering.cpp:742

SIFrameLowering.h

SIMachineFunctionInfo.h

SISpillUtils.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

BlockSize
static const int BlockSize
Definition TarWriter.cpp:33

llvm::AMDGPUMachineFunctionInfo::isChainFunction
bool isChainFunction() const
Definition AMDGPUMachineFunctionInfo.h:93

llvm::AMDGPUMachineFunctionInfo::isEntryFunction
bool isEntryFunction() const
Definition AMDGPUMachineFunctionInfo.h:89

llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition AMDGPUSubtarget.h:41

llvm::AMDGPU::LaneMaskConstants
Definition AMDGPULaneMaskUtils.h:21

llvm::AMDGPU::LaneMaskConstants::XorOpc
const unsigned XorOpc
Definition AMDGPULaneMaskUtils.h:41

llvm::AMDGPU::LaneMaskConstants::MovOpc
const unsigned MovOpc
Definition AMDGPULaneMaskUtils.h:35

llvm::AMDGPU::LaneMaskConstants::get
static const LaneMaskConstants & get(const GCNSubtarget &ST)
Definition AMDGPULaneMaskUtils.h:81

llvm::AMDGPU::LaneMaskConstants::ExecReg
const Register ExecReg
Definition AMDGPULaneMaskUtils.h:23

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::size
size_t size() const
Get the array size.
Definition ArrayRef.h:141

llvm::ArrayRef::empty
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136

llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185

llvm::BitVector
Definition BitVector.h:101

llvm::BitVector::test
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482

llvm::BitVector::reset
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409

llvm::BitVector::clearBitsNotInMask
void clearBitsNotInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
Clear a bit in this vector for every '0' bit in Mask.
Definition BitVector.h:742

llvm::BitVector::set
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366

llvm::BitVector::any
bool any() const
Returns true if any bit is set.
Definition BitVector.h:189

llvm::BitVector::clearBitsInMask
void clearBitsInMask(const uint32_t *Mask, unsigned MaskWords=~0u)
Clear any bits in this vector that are set in Mask.
Definition BitVector.h:730

llvm::BitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159

llvm::BitVector::empty
bool empty() const
Returns whether there are no bits in this bitvector.
Definition BitVector.h:175

llvm::CalleeSavedInfo
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
Definition MachineFrameInfo.h:36

llvm::CalleeSavedInfo::getFrameIdx
int getFrameIdx() const
Definition MachineFrameInfo.h:64

llvm::CalleeSavedInfo::isSpilledToReg
bool isSpilledToReg() const
Definition MachineFrameInfo.h:77

llvm::CalleeSavedInfo::getReg
MCRegister getReg() const
Definition MachineFrameInfo.h:63

llvm::CalleeSavedInfo::getDstReg
MCRegister getDstReg() const
Definition MachineFrameInfo.h:65

llvm::DebugLoc
A debug info location.
Definition DebugLoc.h:124

llvm::Function
Definition Function.h:65

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272

llvm::GCNSubtarget
Definition GCNSubtarget.h:45

llvm::GCNUserSGPRUsageInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition GCNSubtarget.h:1047

llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition GCNSubtarget.h:1059

llvm::HexagonInstrInfo::getRegisterInfo
const HexagonRegisterInfo & getRegisterInfo() const
Definition HexagonInstrInfo.h:53

llvm::LiveRegUnits
A set of register units used to track register liveness.
Definition LiveRegUnits.h:31

llvm::LiveRegUnits::available
bool available(MCRegister Reg) const
Returns true if no part of physical register Reg is live.
Definition LiveRegUnits.h:117

llvm::LiveRegUnits::init
void init(const TargetRegisterInfo &TRI)
Initialize and clear the set.
Definition LiveRegUnits.h:74

llvm::LiveRegUnits::addReg
void addReg(MCRegister Reg)
Adds register units covered by physical register Reg.
Definition LiveRegUnits.h:87

llvm::LiveRegUnits::stepBackward
LLVM_ABI void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
Definition LiveRegUnits.cpp:44

llvm::LiveRegUnits::addLiveOuts
LLVM_ABI void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
Definition LiveRegUnits.cpp:148

llvm::LiveRegUnits::removeReg
void removeReg(MCRegister Reg)
Removes all register units covered by physical register Reg.
Definition LiveRegUnits.h:103

llvm::LiveRegUnits::empty
bool empty() const
Returns true if the set is empty.
Definition LiveRegUnits.h:84

llvm::LiveRegUnits::addLiveIns
LLVM_ABI void addLiveIns(const MachineBasicBlock &MBB)
Adds registers living into block MBB.
Definition LiveRegUnits.cpp:161

llvm::MCCFIInstruction
Definition MCDwarf.h:510

llvm::MCCFIInstruction::createLLVMVectorOffset
static MCCFIInstruction createLLVMVectorOffset(MCSymbol *L, unsigned Register, unsigned RegisterSizeInBits, unsigned MaskRegister, unsigned MaskRegisterSizeInBits, int64_t Offset, SMLoc Loc={})
.cfi_llvm_vector_offset Previous value of Register is saved at Offset from CFA.
Definition MCDwarf.h:768

llvm::MCCFIInstruction::createUndefined
static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_undefined From now on the previous value of Register can't be restored anymore.
Definition MCDwarf.h:703

llvm::MCCFIInstruction::createLLVMVectorRegisters
static MCCFIInstruction createLLVMVectorRegisters(MCSymbol *L, unsigned Register, ArrayRef< VectorRegisterWithLane > VectorRegisters, SMLoc Loc={})
.cfi_llvm_vector_registers Previous value of Register is saved in lanes of vector registers.
Definition MCDwarf.h:758

llvm::MCCFIInstruction::createLLVMVectorRegisterMask
static MCCFIInstruction createLLVMVectorRegisterMask(MCSymbol *L, unsigned Register, unsigned SpillRegister, unsigned SpillRegisterLaneSizeInBits, unsigned MaskRegister, unsigned MaskRegisterSizeInBits, SMLoc Loc={})
.cfi_llvm_vector_register_mask Previous value of Register is saved in SpillRegister,...
Definition MCDwarf.h:779

llvm::MCCFIInstruction::createRegister
static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1, unsigned Register2, SMLoc Loc={})
.cfi_register Previous value of Register1 is saved in register Register2.
Definition MCDwarf.h:672

llvm::MCCFIInstruction::createOffset
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:657

llvm::MCCFIInstruction::createLLVMRegisterPair
static MCCFIInstruction createLLVMRegisterPair(MCSymbol *L, unsigned Register, unsigned R1, unsigned R1SizeInBits, unsigned R2, unsigned R2SizeInBits, SMLoc Loc={})
.cfi_llvm_register_pair Previous value of Register is saved in R1:R2.
Definition MCDwarf.h:748

llvm::MCCFIInstruction::createEscape
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:727

llvm::MCCFIInstruction::createSameValue
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition MCDwarf.h:710

llvm::MCContext::getRegisterInfo
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:411

llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition MCInstrDesc.h:199

llvm::MCRegUnitIterator
Definition MCRegisterInfo.h:659

llvm::MCRegUnitIterator::isValid
bool isValid() const
Returns true if this iterator is not yet at the end.
Definition MCRegisterInfo.h:688

llvm::MCRegUnitMaskIterator
MCRegUnitMaskIterator enumerates a list of register units and their associated lane masks for Reg.
Definition MCRegisterInfo.h:694

llvm::MCRegUnitRootIterator
MCRegUnitRootIterator enumerates the root registers of a register unit.
Definition MCRegisterInfo.h:735

llvm::MCRegUnitRootIterator::isValid
bool isValid() const
Check if the iterator is at the end of the list.
Definition MCRegisterInfo.h:755

llvm::MCRegisterInfo
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Definition MCRegisterInfo.h:151

llvm::MCRegisterInfo::getDwarfRegNum
virtual int64_t getDwarfRegNum(MCRegister Reg, bool isEH) const
Map a target register to an equivalent dwarf register number.
Definition MCRegisterInfo.cpp:144

llvm::MCRegister
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41

llvm::MCRegister::isPhysical
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition MCRegister.h:72

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:119

llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition MachineBasicBlock.h:482

llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition MachineBasicBlock.h:327

llvm::MachineBasicBlock::iterator
MachineInstrBundleIterator< MachineInstr > iterator
Definition MachineBasicBlock.h:345

llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition MachineFrameInfo.h:112

llvm::MachineFrameInfo::hasVarSizedObjects
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
Definition MachineFrameInfo.h:371

llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition MachineFrameInfo.h:612

llvm::MachineFrameInfo::hasCalls
bool hasCalls() const
Return true if the current function has any function calls.
Definition MachineFrameInfo.h:646

llvm::MachineFrameInfo::isFrameAddressTaken
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition MachineFrameInfo.h:387

llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition MachineFrameInfo.h:628

llvm::MachineFrameInfo::hasPatchPoint
bool hasPatchPoint() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition MachineFrameInfo.h:405

llvm::MachineFrameInfo::CreateSpillStackObject
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
Definition MachineFrameInfo.cpp:66

llvm::MachineFrameInfo::hasTailCall
bool hasTailCall() const
Returns true if the function contains a tail call.
Definition MachineFrameInfo.h:676

llvm::MachineFrameInfo::hasStackMap
bool hasStackMap() const
This method may be called any time after instruction selection is complete to determine if there is a...
Definition MachineFrameInfo.h:399

llvm::MachineFrameInfo::RemoveStackObject
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
Definition MachineFrameInfo.h:839

llvm::MachineFrameInfo::getObjectIndexEnd
int getObjectIndexEnd() const
Return one past the maximum frame object index.
Definition MachineFrameInfo.h:426

llvm::MachineFrameInfo::getStackID
uint8_t getStackID(int ObjectIdx) const
Definition MachineFrameInfo.h:792

llvm::MachineFrameInfo::getObjectOffset
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
Definition MachineFrameInfo.h:553

llvm::MachineFrameInfo::isFixedObjectIndex
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
Definition MachineFrameInfo.h:730

llvm::MachineFrameInfo::getObjectIndexBegin
int getObjectIndexBegin() const
Return the minimum frame object index.
Definition MachineFrameInfo.h:423

llvm::MachineFrameInfo::isDeadObjectIndex
bool isDeadObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a dead object.
Definition MachineFrameInfo.h:806

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::MachineFunction::addFrameInst
unsigned addFrameInst(const MCCFIInstruction &Inst)
Definition MachineFunction.cpp:354

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:788

llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition MachineFunction.cpp:565

llvm::MachineFunction::needsFrameMoves
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
Definition MachineFunction.cpp:720

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition MachineFunction.h:804

llvm::MachineFunction::getContext
MCContext & getContext() const
Definition MachineFunction.h:735

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:798

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:749

llvm::MachineFunction::begin
iterator begin()
Definition MachineFunction.h:1012

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition MachineFunction.h:884

llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition MachineFunction.h:1024

llvm::MachineFunction::getTarget
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition MachineFunction.h:784

llvm::MachineInstrBuilder
Definition MachineInstrBuilder.h:171

llvm::MachineInstrBuilder::addExternalSymbol
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
Definition MachineInstrBuilder.h:286

llvm::MachineInstrBuilder::addCFIIndex
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
Definition MachineInstrBuilder.h:348

llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
Definition MachineInstrBuilder.h:199

llvm::MachineInstrBuilder::setMIFlag
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
Definition MachineInstrBuilder.h:384

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:233

llvm::MachineInstrBuilder::addFrameIndex
const MachineInstrBuilder & addFrameIndex(int Idx) const
Definition MachineInstrBuilder.h:254

llvm::MachineInstrBuilder::addMemOperand
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Definition MachineInstrBuilder.h:304

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::MachineInstr::operands
mop_range operands()
Definition MachineInstr.h:707

llvm::MachineInstr::MIFlag
MIFlag
Definition MachineInstr.h:88

llvm::MachineInstr::FrameDestroy
@ FrameDestroy
Definition MachineInstr.h:92

llvm::MachineInstr::FrameSetup
@ FrameSetup
Definition MachineInstr.h:90

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition MachineInstr.h:609

llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition MachineMemOperand.h:130

llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition MachineMemOperand.h:145

llvm::MachineMemOperand::MOLoad
@ MOLoad
The memory access reads data.
Definition MachineMemOperand.h:137

llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition MachineMemOperand.h:147

llvm::MachineMemOperand::MOStore
@ MOStore
The memory access writes data.
Definition MachineMemOperand.h:139

llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition MachineOperand.h:529

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::MachineRegisterInfo::isReserved
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
Definition MachineRegisterInfo.h:974

llvm::MachineRegisterInfo::isAllocatable
bool isAllocatable(MCRegister PhysReg) const
isAllocatable - Returns true when PhysReg belongs to an allocatable register class and it hasn't been...
Definition MachineRegisterInfo.h:992

llvm::MachineRegisterInfo::getCalleeSavedRegs
LLVM_ABI const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Definition MachineRegisterInfo.cpp:643

llvm::MachineRegisterInfo::reserveReg
void reserveReg(MCRegister PhysReg, const TargetRegisterInfo *TRI)
reserveReg – Mark a register as reserved so checks like isAllocatable will not suggest using it.
Definition MachineRegisterInfo.h:938

llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition MachineRegisterInfo.h:1003

llvm::MachineRegisterInfo::replaceRegWith
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition MachineRegisterInfo.cpp:386

llvm::MachineRegisterInfo::isPhysRegModified
LLVM_ABI bool isPhysRegModified(MCRegister PhysReg, bool SkipNoReturnDef=false) const
Return true if the specified register is modified in this function.
Definition MachineRegisterInfo.cpp:592

llvm::MachineRegisterInfo::isPhysRegUsed
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Definition MachineRegisterInfo.cpp:607

llvm::MutableArrayRef
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294

llvm::PrologEpilogSGPRSaveRestoreInfo
Definition SIMachineFunctionInfo.h:389

llvm::PrologEpilogSGPRSpillBuilder
Definition SIFrameLowering.cpp:299

llvm::PrologEpilogSGPRSpillBuilder::restore
void restore()
Definition SIFrameLowering.cpp:519

llvm::PrologEpilogSGPRSpillBuilder::PrologEpilogSGPRSpillBuilder
PrologEpilogSGPRSpillBuilder(Register Reg, const PrologEpilogSGPRSaveRestoreInfo SI, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, const SIInstrInfo *TII, const SIRegisterInfo &TRI, LiveRegUnits &LiveUnits, Register FrameReg, bool IsFramePtrPrologSpill=false)
Definition SIFrameLowering.cpp:486

llvm::PrologEpilogSGPRSpillBuilder::save
void save()
Definition SIFrameLowering.cpp:508

llvm::RegScavenger
Definition RegisterScavenging.h:34

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::Register::asMCReg
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107

llvm::SIFrameLowering
Definition SIFrameLowering.h:19

llvm::SIFrameLowering::determinePrologEpilogSGPRSaves
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs, bool NeedExecCopyReservedReg) const
Definition SIFrameLowering.cpp:1847

llvm::SIFrameLowering::buildCFIForSGPRToVMEMSpill
MachineInstr * buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister SGPR, int64_t Offset) const
Create a CFI index describing a spill of a SGPR to VMEM and build a MachineInstr around it.
Definition SIFrameLowering.cpp:2603

llvm::SIFrameLowering::emitCSRSpillRestores
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy) const
Definition SIFrameLowering.cpp:1324

llvm::SIFrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
Definition SIFrameLowering.cpp:1699

llvm::SIFrameLowering::processFunctionBeforeFrameFinalized
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
Definition SIFrameLowering.cpp:1708

llvm::SIFrameLowering::mayReserveScratchForCWSR
bool mayReserveScratchForCWSR(const MachineFunction &MF) const
Definition SIFrameLowering.cpp:2493

llvm::SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP
bool allocateScavengingFrameIndexesNearIncomingSP(const MachineFunction &MF) const override
Control the placement of special register scavenging spill slots when allocating a stack frame.
Definition SIFrameLowering.cpp:2217

llvm::SIFrameLowering::requiresStackPointerReference
bool requiresStackPointerReference(const MachineFunction &MF) const
Definition SIFrameLowering.cpp:2505

llvm::SIFrameLowering::emitEntryFunctionPrologue
void emitEntryFunctionPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const
Definition SIFrameLowering.cpp:746

llvm::SIFrameLowering::determineCalleeSaves
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition SIFrameLowering.cpp:1932

llvm::SIFrameLowering::hasFPImpl
bool hasFPImpl(const MachineFunction &MF) const override
Definition SIFrameLowering.cpp:2471

llvm::SIFrameLowering::assignCalleeSavedSpillSlotsImpl
bool assignCalleeSavedSpillSlotsImpl(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const
Definition SIFrameLowering.cpp:2176

llvm::SIFrameLowering::buildCFIForVRegToVRegSpill
MachineInstr * buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCRegister Reg, const MCRegister RegCopy) const
Create a CFI index describing a spill of the VGPR/AGPR Reg to another VGPR/AGPR RegCopy and build a M...
Definition SIFrameLowering.cpp:2536

llvm::SIFrameLowering::spillCalleeSavedRegisters
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
Definition SIFrameLowering.cpp:2303

llvm::SIFrameLowering::buildCFIForRegToSGPRPairSpill
MachineInstr * buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister Reg, MCRegister SGPRPair) const
Definition SIFrameLowering.cpp:2631

llvm::SIFrameLowering::buildCFIForVGPRToVMEMSpill
MachineInstr * buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister VGPR, int64_t Offset) const
Create a CFI index describing a spill of a VGPR to VMEM and build a MachineInstr around it.
Definition SIFrameLowering.cpp:2613

llvm::SIFrameLowering::buildCFIForSGPRToVGPRSpill
MachineInstr * buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCRegister SGPR, const MCRegister VGPR, const int Lane) const
Create a CFI index describing a spill of an SGPR to a single lane of a VGPR and build a MachineInstr ...
Definition SIFrameLowering.cpp:2552

llvm::SIFrameLowering::assignCalleeSavedSpillSlots
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
Definition SIFrameLowering.cpp:2161

llvm::SIFrameLowering::determineCalleeSavesSGPR
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
Definition SIFrameLowering.cpp:2037

llvm::SIFrameLowering::emitEpilogue
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Definition SIFrameLowering.cpp:1590

llvm::SIFrameLowering::buildCFIForSameValue
MachineInstr * buildCFIForSameValue(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister Reg) const
Definition SIFrameLowering.cpp:2651

llvm::SIFrameLowering::buildCFI
MachineInstr * buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const MCCFIInstruction &CFIInst, MachineInstr::MIFlag flag=MachineInstr::FrameSetup) const
Create a CFI index for CFIInst and build a MachineInstr around it.
Definition SIFrameLowering.cpp:2524

llvm::SIFrameLowering::emitCSRSpillStores
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, LiveRegUnits &LiveUnits, Register FrameReg, Register FramePtrRegScratchCopy, const bool NeedsFrameMoves) const
Definition SIFrameLowering.cpp:1204

llvm::SIFrameLowering::processFunctionBeforeFrameIndicesReplaced
void processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameIndicesReplaced - This method is called immediately before MO_FrameIndex op...
Definition SIFrameLowering.cpp:1805

llvm::SIFrameLowering::isSupportedStackID
bool isSupportedStackID(TargetStackID::Value ID) const override
Definition SIFrameLowering.cpp:1097

llvm::SIFrameLowering::emitPrologue
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
Definition SIFrameLowering.cpp:1429

llvm::SIFrameLowering::eliminateCallFramePseudoInstr
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
Definition SIFrameLowering.cpp:2422

llvm::SIFrameLowering::restoreCalleeSavedRegisters
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
Definition SIFrameLowering.cpp:2371

llvm::SIInstrInfo
Definition SIInstrInfo.h:101

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition SIMachineFunctionInfo.h:418

llvm::SIMachineFunctionInfo::getPrologEpilogSGPRSpills
ArrayRef< PrologEpilogSGPRSpill > getPrologEpilogSGPRSpills() const
Definition SIMachineFunctionInfo.h:699

llvm::SIMachineFunctionInfo::getWWMSpills
const WWMSpillsMap & getWWMSpills() const
Definition SIMachineFunctionInfo.h:690

llvm::SIMachineFunctionInfo::getAllScratchSGPRCopyDstRegs
void getAllScratchSGPRCopyDstRegs(SmallVectorImpl< Register > &Regs) const
Definition SIMachineFunctionInfo.h:743

llvm::SIMachineFunctionInfo::getAGPRSpillVGPRs
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
Definition SIMachineFunctionInfo.h:812

llvm::SIMachineFunctionInfo::setSGPRForEXECCopy
void setSGPRForEXECCopy(Register Reg)
Definition SIMachineFunctionInfo.h:818

llvm::SIMachineFunctionInfo::removePrologEpilogSGPRSpillEntry
void removePrologEpilogSGPRSpillEntry(Register Reg)
Definition SIMachineFunctionInfo.h:762

llvm::SIMachineFunctionInfo::getNumPreloadedSGPRs
unsigned getNumPreloadedSGPRs() const
Definition SIMachineFunctionInfo.h:1016

llvm::SIMachineFunctionInfo::shiftWwmVGPRsToLowestRange
void shiftWwmVGPRsToLowestRange(MachineFunction &MF, SmallVectorImpl< Register > &WWMVGPRs, BitVector &SavedVGPRs)
Definition SIMachineFunctionInfo.cpp:353

llvm::SIMachineFunctionInfo::setMaskForVGPRBlockOps
void setMaskForVGPRBlockOps(Register RegisterBlock, uint32_t Mask)
Definition SIMachineFunctionInfo.h:640

llvm::SIMachineFunctionInfo::getUserSGPRInfo
GCNUserSGPRUsageInfo & getUserSGPRInfo()
Definition SIMachineFunctionInfo.h:704

llvm::SIMachineFunctionInfo::allocateWWMSpill
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size=4, Align Alignment=Align(4))
Definition SIMachineFunctionInfo.cpp:305

llvm::SIMachineFunctionInfo::getLongBranchReservedReg
Register getLongBranchReservedReg() const
Definition SIMachineFunctionInfo.h:1073

llvm::SIMachineFunctionInfo::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize() const
Definition SIMachineFunctionInfo.h:864

llvm::SIMachineFunctionInfo::hasSpilledVGPRs
bool hasSpilledVGPRs() const
Definition SIMachineFunctionInfo.h:1091

llvm::SIMachineFunctionInfo::setVGPRToAGPRSpillDead
void setVGPRToAGPRSpillDead(int FrameIndex)
Definition SIMachineFunctionInfo.h:830

llvm::SIMachineFunctionInfo::isWholeWaveFunction
bool isWholeWaveFunction() const
Definition SIMachineFunctionInfo.h:697

llvm::SIMachineFunctionInfo::getStackPtrOffsetReg
Register getStackPtrOffsetReg() const
Definition SIMachineFunctionInfo.h:1069

llvm::SIMachineFunctionInfo::isStackRealigned
bool isStackRealigned() const
Definition SIMachineFunctionInfo.h:1107

llvm::SIMachineFunctionInfo::getScratchRSrcReg
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
Definition SIMachineFunctionInfo.h:1040

llvm::SIMachineFunctionInfo::getVGPRSpillAGPRs
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
Definition SIMachineFunctionInfo.h:820

llvm::SIMachineFunctionInfo::getScavengeFI
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
Definition SIMachineFunctionInfo.cpp:604

llvm::SIMachineFunctionInfo::getMaskForVGPRBlockOps
uint32_t getMaskForVGPRBlockOps(Register RegisterBlock) const
Definition SIMachineFunctionInfo.h:645

llvm::SIMachineFunctionInfo::hasMaskForVGPRBlockOps
bool hasMaskForVGPRBlockOps(Register RegisterBlock) const
Definition SIMachineFunctionInfo.h:649

llvm::SIMachineFunctionInfo::hasPrologEpilogSGPRSpillEntry
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const
Definition SIMachineFunctionInfo.h:723

llvm::SIMachineFunctionInfo::getGITPtrLoReg
Register getGITPtrLoReg(const MachineFunction &MF) const
Definition SIMachineFunctionInfo.cpp:635

llvm::SIMachineFunctionInfo::setVGPRForAGPRCopy
void setVGPRForAGPRCopy(Register NewVGPRForAGPRCopy)
Definition SIMachineFunctionInfo.h:634

llvm::SIMachineFunctionInfo::allocateVGPRSpillToAGPR
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR)
Reserve AGPRs or VGPRs to support spilling for FrameIndex FI.
Definition SIMachineFunctionInfo.cpp:495

llvm::SIMachineFunctionInfo::splitWWMSpillRegisters
void splitWWMSpillRegisters(MachineFunction &MF, SmallVectorImpl< std::pair< Register, int > > &CalleeSavedRegs, SmallVectorImpl< std::pair< Register, int > > &ScratchRegs) const
Definition SIMachineFunctionInfo.cpp:330

llvm::SIMachineFunctionInfo::getSGPRForEXECCopy
Register getSGPRForEXECCopy() const
Definition SIMachineFunctionInfo.h:816

llvm::SIMachineFunctionInfo::isWWMReservedRegister
bool isWWMReservedRegister(Register Reg) const
Definition SIMachineFunctionInfo.h:693

llvm::SIMachineFunctionInfo::getSGPRSpillToPhysicalVGPRLanes
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
Definition SIMachineFunctionInfo.h:782

llvm::SIMachineFunctionInfo::getVGPRForAGPRCopy
Register getVGPRForAGPRCopy() const
Definition SIMachineFunctionInfo.h:630

llvm::SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane
bool allocateSGPRSpillToVGPRLane(MachineFunction &MF, int FI, bool SpillToPhysVGPRLane=false, bool IsPrologEpilog=false)
Definition SIMachineFunctionInfo.cpp:448

llvm::SIMachineFunctionInfo::getFrameOffsetReg
Register getFrameOffsetReg() const
Definition SIMachineFunctionInfo.h:1049

llvm::SIMachineFunctionInfo::setLongBranchReservedReg
void setLongBranchReservedReg(Register Reg)
Definition SIMachineFunctionInfo.h:1063

llvm::SIMachineFunctionInfo::setHasSpilledVGPRs
void setHasSpilledVGPRs(bool Spill=true)
Definition SIMachineFunctionInfo.h:1095

llvm::SIMachineFunctionInfo::removeDeadFrameIndices
bool removeDeadFrameIndices(MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs)
If ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill to the default stack.
Definition SIMachineFunctionInfo.cpp:561

llvm::SIMachineFunctionInfo::setScratchReservedForDynamicVGPRs
void setScratchReservedForDynamicVGPRs(unsigned SizeInBytes)
Definition SIMachineFunctionInfo.h:871

llvm::SIMachineFunctionInfo::getPreloadedReg
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const
Definition SIMachineFunctionInfo.h:997

llvm::SIMachineFunctionInfo::checkIndexInPrologEpilogSGPRSpills
bool checkIndexInPrologEpilogSGPRSpills(int FI) const
Definition SIMachineFunctionInfo.h:751

llvm::SIMachineFunctionInfo::getWWMReservedRegs
const ReservedRegSet & getWWMReservedRegs() const
Definition SIMachineFunctionInfo.h:691

llvm::SIMachineFunctionInfo::getImplicitBufferPtrUserSGPR
Register getImplicitBufferPtrUserSGPR() const
Definition SIMachineFunctionInfo.h:1079

llvm::SIMachineFunctionInfo::getPrologEpilogSGPRSaveRestoreInfo
const PrologEpilogSGPRSaveRestoreInfo & getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const
Definition SIMachineFunctionInfo.h:772

llvm::SIMachineFunctionInfo::setIsStackRealigned
void setIsStackRealigned(bool Realigned=true)
Definition SIMachineFunctionInfo.h:1111

llvm::SIMachineFunctionInfo::getGITPtrHigh
unsigned getGITPtrHigh() const
Definition SIMachineFunctionInfo.h:1002

llvm::SIMachineFunctionInfo::hasSpilledSGPRs
bool hasSpilledSGPRs() const
Definition SIMachineFunctionInfo.h:1083

llvm::SIMachineFunctionInfo::addToPrologEpilogSGPRSpills
void addToPrologEpilogSGPRSpills(Register Reg, PrologEpilogSGPRSaveRestoreInfo SI)
Definition SIMachineFunctionInfo.h:708

llvm::SIMachineFunctionInfo::getScratchSGPRCopyDstReg
Register getScratchSGPRCopyDstReg(Register Reg) const
Definition SIMachineFunctionInfo.h:731

llvm::SIMachineFunctionInfo::setScratchRSrcReg
void setScratchRSrcReg(Register Reg)
Definition SIMachineFunctionInfo.h:1044

llvm::SIMachineFunctionInfo::reserveWWMRegister
void reserveWWMRegister(Register Reg)
Definition SIMachineFunctionInfo.h:667

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SIRegisterInfo::getFrameRegister
Register getFrameRegister(const MachineFunction &MF) const override
Definition SIRegisterInfo.cpp:521

llvm::SMLoc
Represents a location in source code.
Definition SMLoc.h:22

llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:581

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::SparseBitVector
Definition SparseBitVector.h:256

llvm::SparseBitVector::set
void set(unsigned Idx)
Definition SparseBitVector.h:508

llvm::SparseBitVector::test
bool test(unsigned Idx) const
Definition SparseBitVector.h:472

llvm::StackOffset
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30

llvm::StackOffset::getFixed
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46

llvm::StringRef
Represent a constant reference to a string, i.e.
Definition StringRef.h:56

llvm::TargetFrameLowering::hasFP
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
Definition TargetFrameLowering.h:303

llvm::TargetFrameLowering::hasReservedCallFrame
virtual bool hasReservedCallFrame(const MachineFunction &MF) const
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
Definition TargetFrameLowering.h:312

llvm::TargetFrameLowering::determineCalleeSaves
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Definition TargetFrameLoweringImpl.cpp:96

llvm::TargetFrameLowering::restoreCalleeSavedRegister
void restoreCalleeSavedRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const CalleeSavedInfo &CS, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const
Definition TargetFrameLoweringImpl.cpp:205

llvm::TargetFrameLowering::getStackAlign
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
Definition TargetFrameLowering.h:107

llvm::TargetMachine::Options
TargetOptions Options
Definition TargetMachine.h:124

llvm::TargetOptions::DisableFramePointerElim
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
Definition TargetOptionsImpl.cpp:23

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

llvm::TargetRegisterClass::contains
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
Definition TargetRegisterInfo.h:95

llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition TargetRegisterInfo.h:242

llvm::cl::opt
Definition CommandLine.h:1454

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition raw_ostream.h:692

llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition raw_ostream.h:721

uint32_t

uint64_t

uint8_t

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

TargetMachine.h

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition AMDGPUAddrSpace.h:37

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition AMDGPUAddrSpace.h:38

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::AMDGPU::Hwreg::ID_MODE
@ ID_MODE
Definition SIDefines.h:518

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1413

llvm::AMDGPU::convertSMRDOffsetUnits
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
Definition AMDGPUBaseInfo.cpp:3435

llvm::AMDGPU::FlatAddrSpace::FlatScratch
@ FlatScratch
Definition AMDGPUAddrSpace.h:92

llvm::AMDGPU::isEntryFunctionCC
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1485

llvm::AMDGPU::isInlinableLiteral32
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
Definition AMDGPUBaseInfo.cpp:3090

llvm::AMDGPU::isCompute
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1480

llvm::AMDGPU::isChainCC
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1503

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition CallingConv.h:197

llvm::TargetStackID::Value
Value
Definition TargetFrameLowering.h:30

llvm::TargetStackID::SGPRSpill
@ SGPRSpill
Definition TargetFrameLowering.h:32

llvm::TargetStackID::ScalableVector
@ ScalableVector
Definition TargetFrameLowering.h:33

llvm::TargetStackID::NoAlloc
@ NoAlloc
Definition TargetFrameLowering.h:36

llvm::TargetStackID::WasmLocal
@ WasmLocal
Definition TargetFrameLowering.h:34

llvm::TargetStackID::Default
@ Default
Definition TargetFrameLowering.h:31

llvm::TargetStackID::ScalablePredicateVector
@ ScalablePredicateVector
Definition TargetFrameLowering.h:35

llvm::cl::ReallyHidden
@ ReallyHidden
Definition CommandLine.h:139

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::dwarf_linker::DebugSectionKind::DebugLoc
@ DebugLoc
Definition DWARFLinkerBase.h:34

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::Offset
@ Offset
Definition DWP.cpp:558

llvm::for_each
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1731

llvm::PseudoProbeType::Block
@ Block
Definition PseudoProbe.h:30

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:449

llvm::RegState::Kill
@ Kill
The last use of a register.
Definition MachineInstrBuilder.h:61

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition MachineInstrBuilder.h:65

llvm::RegState::ImplicitDefine
@ ImplicitDefine
Definition MachineInstrBuilder.h:80

llvm::getKillRegState
constexpr RegState getKillRegState(bool B)
Definition MachineInstrBuilder.h:90

llvm::SGPRSaveKind::SPILL_TO_MEM
@ SPILL_TO_MEM
Definition SIMachineFunctionInfo.h:386

llvm::SGPRSaveKind::SPILL_TO_VGPR_LANE
@ SPILL_TO_VGPR_LANE
Definition SIMachineFunctionInfo.h:385

llvm::SGPRSaveKind::COPY_TO_SCRATCH_SGPR
@ COPY_TO_SCRATCH_SGPR
Definition SIMachineFunctionInfo.h:384

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633

llvm::alignDown
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546

llvm::clearDebugInfoForSpillFIs
void clearDebugInfoForSpillFIs(MachineFrameInfo &MFI, MachineBasicBlock &MBB, const BitVector &SpillFIs)
Replace frame index operands with null registers in debug value instructions for the specified spill ...
Definition SISpillUtils.cpp:16

llvm::countl_zero
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407

llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635

llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209

llvm::make_first_range
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1398

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163

llvm::alignTo
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144

llvm::is_sorted
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1969

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189

llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155

llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
Definition IVDescriptors.h:43

llvm::RecurKind::Add
@ Add
Sum of integers.
Definition IVDescriptors.h:38

llvm::MCPhysReg
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:25

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018

llvm::encodeULEB128
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition LEB128.h:79

llvm::printReg
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition TargetRegisterInfo.cpp:110

llvm::AMDGPUFunctionArgInfo::FLAT_SCRATCH_INIT
@ FLAT_SCRATCH_INIT
Definition AMDGPUArgumentUsageInfo.h:98

llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
@ PRIVATE_SEGMENT_WAVE_BYTE_OFFSET
Definition AMDGPUArgumentUsageInfo.h:103

llvm::AMDGPUFunctionArgInfo::PRIVATE_SEGMENT_BUFFER
@ PRIVATE_SEGMENT_BUFFER
Definition AMDGPUArgumentUsageInfo.h:93

llvm::AMDGPU::EncodingFields< HwregId, HwregOffset, HwregSize >::encode
static constexpr uint64_t encode(Fields... Values)
Definition AMDGPUBaseInfo.h:425

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::Align::value
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77

llvm::MCCFIInstruction::VectorRegisterWithLane
Definition MCDwarf.h:571

llvm::MIPatternMatch::And
Matching combinators.
Definition MIPatternMatch.h:314

llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition MachineMemOperand.h:42

llvm::MachinePointerInfo::getFixedStack
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Definition MachineOperand.cpp:1150

llvm::SIRegisterInfo::SpilledReg
Definition SIRegisterInfo.h:64

llvm::cl::desc
Definition CommandLine.h:410