doxygen/GCNSubtarget_8h_source.html

//=====-- GCNSubtarget.h - Define GCN Subtarget for AMDGPU ------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//==-----------------------------------------------------------------------===//

//

/// \file

/// AMD GCN specific subclass of TargetSubtarget.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H

#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H


#include "AMDGPUCallLowering.h"

#include "AMDGPURegisterBankInfo.h"

#include "AMDGPUSubtarget.h"

#include "SIFrameLowering.h"

#include "SIISelLowering.h"

#include "SIInstrInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/Support/AMDHSAKernelDescriptor.h"

#include "llvm/Support/ErrorHandling.h"


#define GET_SUBTARGETINFO_HEADER

#include "AMDGPUGenSubtargetInfo.inc"


namespace llvm {


class GCNTargetMachine;


/// Module flag names controlling out-of-bounds buffer access semantics.

/// Each flag is an i32 with Module::Max merge behaviour and tri-state values:

///   0 = any (absent/default - backend currently treats as strict)

///   1 = relaxed

///   2 = strict


namespace AMDGPUOOBMode {

inline constexpr StringLiteral BufferFlag("amdgpu.buffer.oob.mode");

inline constexpr StringLiteral TBufferFlag("amdgpu.tbuffer.oob.mode");

} // namespace AMDGPUOOBMode


class GCNSubtarget final : public AMDGPUGenSubtargetInfo,

                           public AMDGPUSubtarget {

public:

  using AMDGPUSubtarget::getMaxWavesPerEU;


  // Following 2 enums are documented at:

  //   - https://llvm.org/docs/AMDGPUUsage.html#trap-handler-abi


  enum class TrapHandlerAbi {

    NONE   = 0x00,

    AMDHSA = 0x01,

  };


  enum class TrapID {

    LLVMAMDHSATrap      = 0x02,

    LLVMAMDHSADebugTrap = 0x03,

  };


private:

  /// SelectionDAGISel related APIs.

  std::unique_ptr<const SelectionDAGTargetInfo> TSInfo;


  /// GlobalISel related APIs.

  std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;

  std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;

  std::unique_ptr<InstructionSelector> InstSelector;

  std::unique_ptr<LegalizerInfo> Legalizer;

  std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;


protected:

  // Basic subtarget description.

  AMDGPU::IsaInfo::AMDGPUTargetID TargetID;

  unsigned Gen = INVALID;

  InstrItineraryData InstrItins;

  int LDSBankCount = 0;

  unsigned MaxPrivateElementSize = 0;


  // Instruction cache line size in bytes; set from TableGen subtarget features.

  unsigned InstCacheLineSize = 0;


  // Dynamically set bits that enable features.

  bool DynamicVGPR = false;

  bool DynamicVGPRBlockSize32 = false;

  bool ScalarizeGlobal = false;

  const bool BufferOOBRelaxed;

  const bool TBufferOOBRelaxed;


  /// The maximum number of instructions that may be placed within an S_CLAUSE,

  /// which is one greater than the maximum argument to S_CLAUSE. A value of 0

  /// indicates a lack of S_CLAUSE support.

  unsigned MaxHardClauseLength = 0;


#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \

  bool ATTRIBUTE = DEFAULT;


#include "AMDGPUGenSubtargetInfo.inc"


private:

  SIInstrInfo InstrInfo;

  SITargetLowering TLInfo;

  SIFrameLowering FrameLowering;


  /// Get the register that represents the actual dependency between the

  /// definition and the use. The definition might only affect a subregister

  /// that is not actually used. Works for both virtual and physical registers.

  /// Note: Currently supports VOP3P instructions (without WMMA an SWMMAC).

  /// Returns the definition register if there is a real dependency and no

  /// better match is found.

  Register getRealSchedDependency(const MachineInstr &DefI, int DefOpIdx,

                                  const MachineInstr &UseI, int UseOpIdx) const;


public:

  GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,

               const GCNTargetMachine &TM, bool BufferOOBRelaxed = false,

               bool TBufferOOBRelaxed = false);

  ~GCNSubtarget() override;


  GCNSubtarget &initializeSubtargetDependencies(const Triple &TT, StringRef GPU,

                                                StringRef FS);


  /// Diagnose inconsistent subtarget features before attempting to codegen

  /// function \p F.

  void checkSubtargetFeatures(const Function &F) const;


  const SIInstrInfo *getInstrInfo() const override { return &InstrInfo; }


  const SIFrameLowering *getFrameLowering() const override {

    return &FrameLowering;

  }


  const SITargetLowering *getTargetLowering() const override { return &TLInfo; }


  const SIRegisterInfo *getRegisterInfo() const override {

    return &InstrInfo.getRegisterInfo();

  }


  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override;


  const CallLowering *getCallLowering() const override {

    return CallLoweringInfo.get();

  }


  const InlineAsmLowering *getInlineAsmLowering() const override {

    return InlineAsmLoweringInfo.get();

  }


  InstructionSelector *getInstructionSelector() const override {

    return InstSelector.get();

  }


  const LegalizerInfo *getLegalizerInfo() const override {

    return Legalizer.get();

  }


  const AMDGPURegisterBankInfo *getRegBankInfo() const override {

    return RegBankInfo.get();

  }


  const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const {

    return TargetID;

  }


  const InstrItineraryData *getInstrItineraryData() const override {

    return &InstrItins;

  }


  void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);


  Generation getGeneration() const { return (Generation)Gen; }


  bool isGFX11Plus() const { return getGeneration() >= GFX11; }


#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \

  bool GETTER() const override { return ATTRIBUTE; }

#include "AMDGPUGenSubtargetInfo.inc"


  unsigned getMaxWaveScratchSize() const {

    // See COMPUTE_TMPRING_SIZE.WAVESIZE.

    if (getGeneration() >= GFX12) {

      // 18-bit field in units of 64-dword.

      return (64 * 4) * ((1 << 18) - 1);

    }

    if (getGeneration() == GFX11) {

      // 15-bit field in units of 64-dword.

      return (64 * 4) * ((1 << 15) - 1);

    }

    // 13-bit field in units of 256-dword.

    return (256 * 4) * ((1 << 13) - 1);

  }


  /// Return the number of high bits known to be zero for a frame index.


  unsigned getKnownHighZeroBitsForFrameIndex() const {

    return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2();

  }


  int getLDSBankCount() const { return LDSBankCount; }


  /// Instruction cache line size in bytes (64 for pre-GFX11, 128 for GFX11+).

  unsigned getInstCacheLineSize() const { return InstCacheLineSize; }


  unsigned getMaxPrivateElementSize(bool ForBufferRSrc = false) const {

    return (ForBufferRSrc || !hasFlatScratchEnabled()) ? MaxPrivateElementSize

                                                       : 16;

  }


  unsigned getConstantBusLimit(unsigned Opcode) const;


  /// Returns if the result of this instruction with a 16-bit result returned in

  /// a 32-bit register implicitly zeroes the high 16-bits, rather than preserve

  /// the original value.

  bool zeroesHigh16BitsOfDest(unsigned Opcode) const;


  bool supportsWGP() const {

    if (HasGFX1250Insts)

      return false;

    return getGeneration() >= GFX10;

  }


  bool hasHWFP64() const { return HasFP64; }


  bool hasAddr64() const {

    return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);

  }


  bool hasFlat() const {

    return (getGeneration() > AMDGPUSubtarget::SOUTHERN_ISLANDS);

  }


  // Return true if the target only has the reverse operand versions of VALU

  // shift instructions (e.g. v_lshrrev_b32, and no v_lshr_b32).


  bool hasOnlyRevVALUShifts() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  bool hasFractBug() const { return getGeneration() == SOUTHERN_ISLANDS; }


  bool hasMed3_16() const { return getGeneration() >= AMDGPUSubtarget::GFX9; }


  bool hasMin3Max3_16() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  bool hasSwap() const { return HasGFX9Insts; }


  bool hasScalarPackInsts() const { return HasGFX9Insts; }


  bool hasScalarMulHiInsts() const { return HasGFX9Insts; }


  bool hasScalarSubwordLoads() const { return getGeneration() >= GFX12; }


  bool hasAsyncMark() const { return hasVMemToLDSLoad() || HasAsynccnt; }


  TrapHandlerAbi getTrapHandlerAbi() const {

    return isAmdHsaOS() ? TrapHandlerAbi::AMDHSA : TrapHandlerAbi::NONE;

  }


  bool supportsGetDoorbellID() const {

    // The S_GETREG DOORBELL_ID is supported by all GFX9 onward targets.

    return getGeneration() >= GFX9;

  }


  /// True if the offset field of DS instructions works as expected. On SI, the

  /// offset uses a 16-bit adder and does not always wrap properly.

  bool hasUsableDSOffset() const { return getGeneration() >= SEA_ISLANDS; }


  bool unsafeDSOffsetFoldingEnabled() const {

    return EnableUnsafeDSOffsetFolding;

  }


  /// Condition output from div_scale is usable.


  bool hasUsableDivScaleConditionOutput() const {

    return getGeneration() != SOUTHERN_ISLANDS;

  }


  /// Extra wait hazard is needed in some cases before

  /// s_cbranch_vccnz/s_cbranch_vccz.

  bool hasReadVCCZBug() const { return getGeneration() <= SEA_ISLANDS; }


  /// Writes to VCC_LO/VCC_HI update the VCCZ flag.

  bool partialVCCWritesUpdateVCCZ() const { return getGeneration() >= GFX10; }


  /// A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR

  /// was written by a VALU instruction.


  bool hasSMRDReadVALUDefHazard() const {

    return getGeneration() == SOUTHERN_ISLANDS;

  }


  /// A read of an SGPR by a VMEM instruction requires 5 wait states when the

  /// SGPR was written by a VALU Instruction.


  bool hasVMEMReadSGPRVALUDefHazard() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  bool hasRFEHazards() const { return getGeneration() >= VOLCANIC_ISLANDS; }


  /// Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.


  unsigned getSetRegWaitStates() const {

    return getGeneration() <= SEA_ISLANDS ? 1 : 2;

  }


  /// Return the amount of LDS that can be used that will not restrict the

  /// occupancy lower than WaveCount.

  unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,

                                           const Function &) const;


  bool supportsMinMaxDenormModes() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  /// \returns If target supports S_DENORM_MODE.


  bool hasDenormModeInst() const {

    return getGeneration() >= AMDGPUSubtarget::GFX10;

  }


  /// \returns If target supports ds_read/write_b128 and user enables generation

  /// of ds_read/write_b128.

  bool useDS128() const { return HasCIInsts && EnableDS128; }


  /// \return If target supports ds_read/write_b96/128.

  bool hasDS96AndDS128() const { return HasCIInsts; }


  /// Have v_trunc_f64, v_ceil_f64, v_rndne_f64

  bool haveRoundOpsF64() const { return HasCIInsts; }


  /// \returns If MUBUF instructions always perform range checking, even for

  /// buffer resources used for private memory access.


  bool privateMemoryResourceIsRangeChecked() const {

    return getGeneration() < AMDGPUSubtarget::GFX9;

  }


  /// \returns If target requires PRT Struct NULL support (zero result registers

  /// for sparse texture support).

  bool usePRTStrictNull() const { return EnablePRTStrictNull; }


  bool hasUnalignedBufferAccessEnabled() const {

    return HasUnalignedBufferAccess && HasUnalignedAccessMode;

  }


  bool hasUnalignedDSAccessEnabled() const {

    return HasUnalignedDSAccess && HasUnalignedAccessMode;

  }


  bool hasUnalignedScratchAccessEnabled() const {

    return HasUnalignedScratchAccess && HasUnalignedAccessMode;

  }


  bool isXNACKEnabled() const { return TargetID.isXnackOnOrAny(); }


  bool isTgSplitEnabled() const { return EnableTgSplit; }


  bool hasRelaxedBufferOOBMode() const { return BufferOOBRelaxed; }

  bool hasRelaxedTBufferOOBMode() const { return TBufferOOBRelaxed; }


  bool isCuModeEnabled() const { return EnableCuMode; }


  bool isPreciseMemoryEnabled() const { return EnablePreciseMemory; }


  bool hasFlatScrRegister() const { return hasFlatAddressSpace(); }


  // Check if target supports ST addressing mode with FLAT scratch instructions.

  // The ST addressing mode means no registers are used, either VGPR or SGPR,

  // but only immediate offset is swizzled and added to the FLAT scratch base.


  bool hasFlatScratchSTMode() const {

    return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());

  }


  bool hasFlatScratchSVSMode() const { return HasGFX940Insts || HasGFX11Insts; }


  bool hasFlatScratchEnabled() const {

    return hasArchitectedFlatScratch() ||

           (EnableFlatScratch && hasFlatScratchInsts());

  }


  bool hasGlobalAddTidInsts() const { return HasGFX10_BEncoding; }


  bool hasAtomicCSub() const { return HasGFX10_BEncoding; }


  bool hasExportInsts() const {

    return !hasGFX940Insts() && !hasGFX1250Insts();

  }


  bool hasVINTERPEncoding() const {

    return HasGFX11Insts && !hasGFX1250Insts();

  }


  // DS_ADD_F64/DS_ADD_RTN_F64


  bool hasLdsAtomicAddF64() const {

    return hasGFX90AInsts() || hasGFX1250Insts();

  }


  bool hasMultiDwordFlatScratchAddressing() const {

    return getGeneration() >= GFX9;

  }


  bool hasFlatLgkmVMemCountInOrder() const { return getGeneration() > GFX9; }


  bool hasD16LoadStore() const { return getGeneration() >= GFX9; }


  bool d16PreservesUnusedBits() const {

    return hasD16LoadStore() && !TargetID.isSramEccOnOrAny();

  }


  bool hasD16Images() const { return getGeneration() >= VOLCANIC_ISLANDS; }


  /// Return if most LDS instructions have an m0 use that require m0 to be

  /// initialized.

  bool ldsRequiresM0Init() const { return getGeneration() < GFX9; }


  // True if the hardware rewinds and replays GWS operations if a wave is

  // preempted.

  //

  // If this is false, a GWS operation requires testing if a nack set the

  // MEM_VIOL bit, and repeating if so.

  bool hasGWSAutoReplay() const { return getGeneration() >= GFX9; }


  /// \returns if target has ds_gws_sema_release_all instruction.

  bool hasGWSSemaReleaseAll() const { return HasCIInsts; }


  bool hasScalarAddSub64() const { return getGeneration() >= GFX12; }


  bool hasScalarSMulU64() const { return getGeneration() >= GFX12; }


  // Covers VS/PS/CS graphics shaders


  bool isMesaGfxShader(const Function &F) const {

    return isMesa3DOS() && AMDGPU::isShader(F.getCallingConv());

  }


  bool hasMad64_32() const { return getGeneration() >= SEA_ISLANDS; }


  bool hasAtomicFaddInsts() const {

    return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;

  }


  bool vmemWriteNeedsExpWaitcnt() const {

    return getGeneration() < SEA_ISLANDS;

  }


  bool hasInstPrefetch() const {

    return getGeneration() == GFX10 || getGeneration() == GFX11;

  }


  bool hasPrefetch() const { return HasGFX12Insts; }


  bool hasInstPrefSize() const { return isGFX11Plus(); }


  void getInstPrefSizeArgs(uint32_t &Mask, uint32_t &Shift, uint32_t &Width,

                           uint32_t &CacheLineSize) const {

    assert(isGFX11Plus());

    CacheLineSize = getInstCacheLineSize();

    if (getGeneration() == GFX11) {

      Mask = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE;

      Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT;

      Width = amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_WIDTH;

    } else {

      Mask = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE;

      Shift = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT;

      Width = amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_WIDTH;

    }

  }


  // Has s_cmpk_* instructions.

  bool hasSCmpK() const { return getGeneration() < GFX12; }


  // Scratch is allocated in 256 dword per wave blocks for the entire

  // wavefront. When viewed from the perspective of an arbitrary workitem, this

  // is 4-byte aligned.

  //

  // Only 4-byte alignment is really needed to access anything. Transformations

  // on the pointer value itself may rely on the alignment / known low bits of

  // the pointer. Set this to something above the minimum to avoid needing

  // dynamic realignment in common cases.

  Align getStackAlignment() const { return Align(16); }


  bool enableMachineScheduler() const override { return true; }


  bool useAA() const override;


  bool enableSubRegLiveness() const override { return true; }


  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }

  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }


  // static wrappers

  static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);


  // XXX - Why is this here if it isn't in the default pass set?

  bool enableEarlyIfConversion() const override { return true; }


  void overrideSchedPolicy(MachineSchedPolicy &Policy,

                           const SchedRegion &Region) const override;


  void overridePostRASchedPolicy(MachineSchedPolicy &Policy,

                                 const SchedRegion &Region) const override;


  void mirFileLoaded(MachineFunction &MF) const override;


  unsigned getMaxNumUserSGPRs() const {

    return AMDGPU::getMaxNumUserSGPRs(*this);

  }


  bool useVGPRIndexMode() const;


  bool hasScalarCompareEq64() const {

    return getGeneration() >= VOLCANIC_ISLANDS;

  }


  bool hasLDSFPAtomicAddF32() const { return HasGFX8Insts; }


  bool hasLDSFPAtomicAddF64() const {

    return HasGFX90AInsts || HasGFX1250Insts;

  }


  /// \returns true if the subtarget has the v_permlane64_b32 instruction.

  bool hasPermLane64() const { return getGeneration() >= GFX11; }


  /// \returns true if the subtarget supports the ds_swizzle rotate and FFT

  /// swizzle modes (GFX9+).

  bool hasDsSwizzleRotateMode() const { return getGeneration() >= GFX9; }


  bool hasDPPRowShare() const {

    return HasDPP && (HasGFX90AInsts || getGeneration() >= GFX10);

  }


  // Has V_PK_MOV_B32 opcode

  bool hasPkMovB32() const { return HasGFX90AInsts; }


  bool hasFmaakFmamkF32Insts() const {

    return getGeneration() >= GFX10 || hasGFX940Insts();

  }


  bool hasFmaakFmamkF64Insts() const { return hasGFX1250Insts(); }


  bool hasNonNSAEncoding() const { return getGeneration() < GFX12; }


  unsigned getNSAMaxSize(bool HasSampler = false) const {

    return AMDGPU::getNSAMaxSize(*this, HasSampler);

  }


  bool hasMadF16() const;


  // Scalar and global loads support scale_offset bit.

  bool hasScaleOffset() const { return HasGFX1250Insts; }


  // FLAT GLOBAL VOffset is signed

  bool hasSignedGVSOffset() const { return HasGFX1250Insts; }


  bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; }


  bool hasUserSGPRInit16BugInWave32() const {

    return HasUserSGPRInit16Bug && isWave32();

  }


  bool has12DWordStoreHazard() const {

    return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;

  }


  // \returns true if the subtarget supports DWORDX3 load/store instructions.

  bool hasDwordx3LoadStores() const { return HasCIInsts; }


  bool hasReadM0MovRelInterpHazard() const {

    return getGeneration() == AMDGPUSubtarget::GFX9;

  }


  bool hasReadM0SendMsgHazard() const {

    return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&

           getGeneration() <= AMDGPUSubtarget::GFX9;

  }


  bool hasReadM0LdsDmaHazard() const {

    return getGeneration() == AMDGPUSubtarget::GFX9;

  }


  bool hasReadM0LdsDirectHazard() const {

    return getGeneration() == AMDGPUSubtarget::GFX9;

  }


  bool hasLDSMisalignedBugInWGPMode() const {

    return HasLDSMisalignedBug && !EnableCuMode;

  }


  // Shift amount of a 64 bit shift cannot be a highest allocated register

  // if also at the end of the allocation block.

  bool hasShift64HighRegBug() const { return HasGFX90AInsts; }


  // Has one cycle hazard on transcendental instruction feeding a

  // non transcendental VALU.

  bool hasTransForwardingHazard() const { return HasGFX940Insts; }


  // Has one cycle hazard on a VALU instruction partially writing dst with

  // a shift of result bits feeding another VALU instruction.

  bool hasDstSelForwardingHazard() const { return HasGFX940Insts; }


  // Cannot use op_sel with v_dot instructions.

  bool hasDOTOpSelHazard() const { return HasGFX940Insts || HasGFX11Insts; }


  // Does not have HW interlocs for VALU writing and then reading SGPRs.

  bool hasVDecCoExecHazard() const { return HasGFX940Insts; }


  bool hasHardClauses() const { return MaxHardClauseLength > 0; }


  bool hasFPAtomicToDenormModeHazard() const {

    return getGeneration() == GFX10;

  }


  bool hasVOP3DPP() const { return getGeneration() >= GFX11; }


  bool hasLdsDirect() const { return getGeneration() >= GFX11; }


  bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }


  bool hasVALUPartialForwardingHazard() const {

    return getGeneration() == GFX11;

  }


  bool hasCvtScaleForwardingHazard() const { return HasGFX950Insts; }


  // All GFX9 targets experience a fetch delay when an instruction at the start

  // of a loop header is split by a 32-byte fetch window boundary, but GFX950

  // is uniquely sensitive to this: the delay triggers further performance

  // degradation beyond the fetch latency itself.

  bool hasLoopHeadInstSplitSensitivity() const { return HasGFX950Insts; }


  bool requiresCodeObjectV6() const { return RequiresCOV6; }


  bool useVGPRBlockOpsForCSR() const { return UseBlockVGPROpsForCSR; }


  bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }


  bool hasVALUReadSGPRHazard() const {

    return HasGFX12Insts && !HasGFX1250Insts;

  }


  bool setRegModeNeedsVNOPs() const {

    return HasGFX1250Insts && getGeneration() == GFX12;

  }


  /// Return if operations acting on VGPR tuples require even alignment.

  bool needsAlignedVGPRs() const { return RequiresAlignVGPR; }


  /// Return true if the target has the S_PACK_HL_B32_B16 instruction.

  bool hasSPackHL() const { return HasGFX11Insts; }


  /// Return true if the target's EXP instruction has the COMPR flag, which

  /// affects the meaning of the EN (enable) bits.

  bool hasCompressedExport() const { return !HasGFX11Insts; }


  /// Return true if the target's EXP instruction supports the NULL export

  /// target.

  bool hasNullExportTarget() const { return !HasGFX11Insts; }


  bool hasFlatScratchSVSSwizzleBug() const { return getGeneration() == GFX11; }


  /// Return true if the target has the S_DELAY_ALU instruction.

  bool hasDelayAlu() const { return HasGFX11Insts; }


  /// Returns true if the target supports

  /// global_load_lds_dwordx3/global_load_lds_dwordx4 or

  /// buffer_load_dwordx3/buffer_load_dwordx4 with the lds bit.

  bool hasLDSLoadB96_B128() const { return hasGFX950Insts(); }


  /// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt

  /// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.

  bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }


  /// \returns true if the target has packed f32 instructions that only read 32

  /// bits from a scalar operand (SGPR or literal) and replicates the bits to

  /// both channels.


  bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const {

    return getGeneration() == GFX12 && HasGFX1250Insts;

  }


  bool hasAddPC64Inst() const { return HasGFX1250Insts; }


  /// \returns true if the target supports expert scheduling mode 2 which relies

  /// on the compiler to insert waits to avoid hazards between VMEM and VALU

  /// instructions in some instances.

  bool hasExpertSchedulingMode() const { return getGeneration() >= GFX12; }


  /// \returns The maximum number of instructions that can be enclosed in an

  /// S_CLAUSE on the given subtarget, or 0 for targets that do not support that

  /// instruction.

  unsigned maxHardClauseLength() const { return MaxHardClauseLength; }


  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs

  /// SGPRs

  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;


  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs

  /// VGPRs

  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs,

                                    unsigned DynamicVGPRBlockSize) const;


  /// Subtarget's minimum/maximum occupancy, in number of waves per EU, that can

  /// be achieved when the only function running on a CU is \p F, each workgroup

  /// uses \p LDSSize bytes of LDS, and each wave uses \p NumSGPRs SGPRs and \p

  /// NumVGPRs VGPRs. The flat workgroup sizes associated to the function are a

  /// range, so this returns a range as well.

  ///

  /// Note that occupancy can be affected by the scratch allocation as well, but

  /// we do not have enough information to compute it.

  std::pair<unsigned, unsigned> computeOccupancy(const Function &F,

                                                 unsigned LDSSize = 0,

                                                 unsigned NumSGPRs = 0,

                                                 unsigned NumVGPRs = 0) const;


  /// \returns true if the flat_scratch register should be initialized with the

  /// pointer to the wave's scratch memory rather than a size and offset.


  bool flatScratchIsPointer() const {

    return getGeneration() >= AMDGPUSubtarget::GFX9;

  }


  /// \returns true if the machine has merged shaders in which s0-s7 are

  /// reserved by the hardware and user SGPRs start at s8

  bool hasMergedShaders() const { return getGeneration() >= GFX9; }


  // \returns true if the target supports the pre-NGG legacy geometry path.

  bool hasLegacyGeometry() const { return getGeneration() < GFX11; }


  // \returns true if the target has split barriers feature

  bool hasSplitBarriers() const { return getGeneration() >= GFX12; }


  // \returns true if the target has WG_RR_MODE kernel descriptor mode bit

  bool hasRrWGMode() const { return getGeneration() >= GFX12; }


  /// \returns true if VADDR and SADDR fields in VSCRATCH can use negative

  /// values.

  bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }


  bool hasINVWBL2WaitCntRequirement() const { return HasGFX1250Insts; }


  bool hasVOPD3() const { return HasGFX1250Insts; }


  // \returns true if the target has V_PK_{MIN|MAX}3_{I|U}16 instructions.

  bool hasPkMinMax3Insts() const { return HasGFX1250Insts; }


  // \returns ture if target has S_GET_SHADER_CYCLES_U64 instruction.

  bool hasSGetShaderCyclesInst() const { return HasGFX1250Insts; }


  // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead

  // of sign-extending. Note that GFX1250 has not only fixed the bug but also

  // extended VA to 57 bits.


  bool hasGetPCZeroExtension() const {

    return HasGFX12Insts && !HasGFX1250Insts;

  }


  // \returns true if the target needs to create a prolog for backward

  // compatibility when preloading kernel arguments.


  bool needsKernArgPreloadProlog() const {

    return hasKernargPreload() && !HasGFX1250Insts;

  }


  bool hasCondSubInsts() const { return HasGFX12Insts; }


  bool hasSubClampInsts() const { return hasGFX10_3Insts(); }


  bool hasFmaLegacy32Insts() const { return hasGFX10_3Insts(); }


  /// \returns SGPR allocation granularity supported by the subtarget.


  unsigned getSGPRAllocGranule() const {

    return AMDGPU::IsaInfo::getSGPRAllocGranule(*this);

  }


  /// \returns SGPR encoding granularity supported by the subtarget.


  unsigned getSGPREncodingGranule() const {

    return AMDGPU::IsaInfo::getSGPREncodingGranule(*this);

  }


  /// \returns Total number of SGPRs supported by the subtarget.


  unsigned getTotalNumSGPRs() const {

    return AMDGPU::IsaInfo::getTotalNumSGPRs(*this);

  }


  /// \returns Addressable number of SGPRs supported by the subtarget.


  unsigned getAddressableNumSGPRs() const {

    return AMDGPU::IsaInfo::getAddressableNumSGPRs(*this);

  }


  /// \returns Minimum number of SGPRs that meets the given number of waves per

  /// execution unit requirement supported by the subtarget.


  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {

    return AMDGPU::IsaInfo::getMinNumSGPRs(*this, WavesPerEU);

  }


  /// \returns Maximum number of SGPRs that meets the given number of waves per

  /// execution unit requirement supported by the subtarget.


  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {

    return AMDGPU::IsaInfo::getMaxNumSGPRs(*this, WavesPerEU, Addressable);

  }


  /// \returns Reserved number of SGPRs. This is common

  /// utility function called by MachineFunction and

  /// Function variants of getReservedNumSGPRs.

  unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const;

  /// \returns Reserved number of SGPRs for given machine function \p MF.

  unsigned getReservedNumSGPRs(const MachineFunction &MF) const;


  /// \returns Reserved number of SGPRs for given function \p F.

  unsigned getReservedNumSGPRs(const Function &F) const;


  /// \returns Maximum number of preloaded SGPRs for the subtarget.

  unsigned getMaxNumPreloadedSGPRs() const;


  /// \returns max num SGPRs. This is the common utility

  /// function called by MachineFunction and Function

  /// variants of getMaxNumSGPRs.

  unsigned getBaseMaxNumSGPRs(const Function &F,

                              std::pair<unsigned, unsigned> WavesPerEU,

                              unsigned PreloadedSGPRs,

                              unsigned ReservedNumSGPRs) const;


  /// \returns Maximum number of SGPRs that meets number of waves per execution

  /// unit requirement for function \p MF, or number of SGPRs explicitly

  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;


  /// \returns Maximum number of SGPRs that meets number of waves per execution

  /// unit requirement for function \p F, or number of SGPRs explicitly

  /// requested using "amdgpu-num-sgpr" attribute attached to function \p F.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumSGPRs(const Function &F) const;


  /// \returns VGPR allocation granularity supported by the subtarget.


  unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const {

    return AMDGPU::IsaInfo::getVGPRAllocGranule(*this, DynamicVGPRBlockSize);

  }


  /// \returns VGPR encoding granularity supported by the subtarget.


  unsigned getVGPREncodingGranule() const {

    return AMDGPU::IsaInfo::getVGPREncodingGranule(*this);

  }


  /// \returns Total number of VGPRs supported by the subtarget.


  unsigned getTotalNumVGPRs() const {

    return AMDGPU::IsaInfo::getTotalNumVGPRs(*this);

  }


  /// \returns Addressable number of architectural VGPRs supported by the

  /// subtarget.


  unsigned getAddressableNumArchVGPRs() const {

    return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(*this);

  }


  /// \returns Addressable number of VGPRs supported by the subtarget.


  unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const {

    return AMDGPU::IsaInfo::getAddressableNumVGPRs(*this, DynamicVGPRBlockSize);

  }


  /// \returns the minimum number of VGPRs that will prevent achieving more than

  /// the specified number of waves \p WavesPerEU.


  unsigned getMinNumVGPRs(unsigned WavesPerEU,

                          unsigned DynamicVGPRBlockSize) const {

    return AMDGPU::IsaInfo::getMinNumVGPRs(*this, WavesPerEU,

                                           DynamicVGPRBlockSize);

  }


  /// \returns the maximum number of VGPRs that can be used and still achieved

  /// at least the specified number of waves \p WavesPerEU.


  unsigned getMaxNumVGPRs(unsigned WavesPerEU,

                          unsigned DynamicVGPRBlockSize) const {

    return AMDGPU::IsaInfo::getMaxNumVGPRs(*this, WavesPerEU,

                                           DynamicVGPRBlockSize);

  }


  /// \returns max num VGPRs. This is the common utility function

  /// called by MachineFunction and Function variants of getMaxNumVGPRs.

  unsigned

  getBaseMaxNumVGPRs(const Function &F,

                     std::pair<unsigned, unsigned> NumVGPRBounds) const;


  /// \returns Maximum number of VGPRs that meets number of waves per execution

  /// unit requirement for function \p F, or number of VGPRs explicitly

  /// requested using "amdgpu-num-vgpr" attribute attached to function \p F.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumVGPRs(const Function &F) const;


  unsigned getMaxNumAGPRs(const Function &F) const { return getMaxNumVGPRs(F); }


  /// Return a pair of maximum numbers of VGPRs and AGPRs that meet the number

  /// of waves per execution unit required for the function \p MF.

  std::pair<unsigned, unsigned> getMaxNumVectorRegs(const Function &F) const;


  /// \returns Maximum number of VGPRs that meets number of waves per execution

  /// unit requirement for function \p MF, or number of VGPRs explicitly

  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.

  ///

  /// \returns Value that meets number of waves per execution unit requirement

  /// if explicitly requested value cannot be converted to integer, violates

  /// subtarget's specifications, or does not meet number of waves per execution

  /// unit requirement.

  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;


  bool supportsWave32() const { return getGeneration() >= GFX10; }


  bool supportsWave64() const { return !hasGFX1250Insts() || HasGFX13Insts; }


  bool isWave32() const { return getWavefrontSize() == 32; }


  bool isWave64() const { return getWavefrontSize() == 64; }


  /// Returns if the wavesize of this subtarget is known reliable. This is false

  /// only for the a default target-cpu that does not have an explicit

  /// +wavefrontsize target feature.


  bool isWaveSizeKnown() const {

    return hasFeature(AMDGPU::FeatureWavefrontSize32) ||

           hasFeature(AMDGPU::FeatureWavefrontSize64);

  }


  const TargetRegisterClass *getBoolRC() const {

    return getRegisterInfo()->getBoolRC();

  }


  /// \returns Maximum number of work groups per compute unit supported by the

  /// subtarget and limited by given \p FlatWorkGroupSize.


  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {

    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(*this, FlatWorkGroupSize);

  }


  /// \returns Minimum flat work group size supported by the subtarget.


  unsigned getMinFlatWorkGroupSize() const override {

    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(*this);

  }


  /// \returns Maximum flat work group size supported by the subtarget.


  unsigned getMaxFlatWorkGroupSize() const override {

    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize();

  }


  /// \returns Number of waves per execution unit required to support the given

  /// \p FlatWorkGroupSize.

  unsigned


  getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override {

    return AMDGPU::IsaInfo::getWavesPerEUForWorkGroup(*this, FlatWorkGroupSize);

  }


  /// \returns Minimum number of waves per execution unit supported by the

  /// subtarget.


  unsigned getMinWavesPerEU() const override {

    return AMDGPU::IsaInfo::getMinWavesPerEU(*this);

  }


  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,

                             SDep &Dep,

                             const TargetSchedModel *SchedModel) const override;


  // \returns true if it's beneficial on this subtarget for the scheduler to

  // cluster stores as well as loads.

  bool shouldClusterStores() const { return getGeneration() >= GFX11; }


  // \returns the number of address arguments from which to enable MIMG NSA

  // on supported architectures.

  unsigned getNSAThreshold(const MachineFunction &MF) const;


  // \returns true if the subtarget has a hazard requiring an "s_nop 0"

  // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".

  bool requiresNopBeforeDeallocVGPRs() const { return !HasGFX1250Insts; }


  // \returns true if the subtarget needs S_WAIT_ALU 0 before S_GETREG_B32 on

  // STATUS, STATE_PRIV, EXCP_FLAG_PRIV, or EXCP_FLAG_USER.

  bool requiresWaitIdleBeforeGetReg() const { return HasGFX1250Insts; }


  bool isDynamicVGPREnabled() const { return DynamicVGPR; }


  unsigned getDynamicVGPRBlockSize() const {

    return DynamicVGPRBlockSize32 ? 32 : 16;

  }


  bool requiresDisjointEarlyClobberAndUndef() const override {

    // AMDGPU doesn't care if early-clobber and undef operands are allocated

    // to the same register.

    return false;

  }


  // DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 shall not be claused with anything

  // and surronded by S_WAIT_ALU(0xFFE3).


  bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const {

    return getGeneration() == GFX12;

  }


  // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base

  // read.


  bool hasScratchBaseForwardingHazard() const {

    return HasGFX1250Insts && getGeneration() == GFX12;

  }


  // src_flat_scratch_hi cannot be used as a source in SALU producing a 64-bit

  // result.


  bool hasFlatScratchHiInB64InstHazard() const {

    return HasGFX1250Insts && getGeneration() == GFX12;

  }


  /// \returns true if the subtarget requires a wait for xcnt before VMEM

  /// accesses that must never be repeated in the event of a page fault/re-try.

  /// Atomic stores/rmw and all volatile accesses fall under this criteria.


  bool requiresWaitXCntForSingleAccessInstructions() const {

    return HasGFX1250Insts;

  }


  /// \returns the number of significant bits in the immediate field of the

  /// S_NOP instruction.


  unsigned getSNopBits() const {

    if (getGeneration() >= AMDGPUSubtarget::GFX12)

      return 7;

    if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)

      return 4;

    return 3;

  }


  bool supportsBPermute() const {

    return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;

  }


  bool supportsWaveWideBPermute() const {

    return (getGeneration() <= AMDGPUSubtarget::GFX9 ||

            getGeneration() == AMDGPUSubtarget::GFX12) ||

           isWave32();

  }


  /// Return true if real (non-fake) variants of True16 instructions using

  /// 16-bit registers should be code-generated. Fake True16 instructions are

  /// identical to non-fake ones except that they take 32-bit registers as

  /// operands and always use their low halves.

  // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully

  // supported and the support for fake True16 instructions is removed.


  bool useRealTrue16Insts() const {

    return hasTrue16BitInsts() && EnableRealTrue16Insts;

  }


  bool requiresWaitOnWorkgroupReleaseFence() const {

    return getGeneration() >= GFX10 || isTgSplitEnabled();

  }


};


class GCNUserSGPRUsageInfo {

public:

  bool hasImplicitBufferPtr() const { return ImplicitBufferPtr; }


  bool hasPrivateSegmentBuffer() const { return PrivateSegmentBuffer; }


  bool hasDispatchPtr() const { return DispatchPtr; }


  bool hasQueuePtr() const { return QueuePtr; }


  bool hasKernargSegmentPtr() const { return KernargSegmentPtr; }


  bool hasDispatchID() const { return DispatchID; }


  bool hasFlatScratchInit() const { return FlatScratchInit; }


  bool hasPrivateSegmentSize() const { return PrivateSegmentSize; }


  unsigned getNumKernargPreloadSGPRs() const { return NumKernargPreloadSGPRs; }


  unsigned getNumUsedUserSGPRs() const { return NumUsedUserSGPRs; }


  unsigned getNumFreeUserSGPRs();


  void allocKernargPreloadSGPRs(unsigned NumSGPRs);


  enum UserSGPRID : unsigned {

    ImplicitBufferPtrID = 0,

    PrivateSegmentBufferID = 1,

    DispatchPtrID = 2,

    QueuePtrID = 3,

    KernargSegmentPtrID = 4,

    DispatchIdID = 5,

    FlatScratchInitID = 6,

    PrivateSegmentSizeID = 7

  };


  // Returns the size in number of SGPRs for preload user SGPR field.


  static unsigned getNumUserSGPRForField(UserSGPRID ID) {

    switch (ID) {

    case ImplicitBufferPtrID:

      return 2;

    case PrivateSegmentBufferID:

      return 4;

    case DispatchPtrID:

      return 2;

    case QueuePtrID:

      return 2;

    case KernargSegmentPtrID:

      return 2;

    case DispatchIdID:

      return 2;

    case FlatScratchInitID:

      return 2;

    case PrivateSegmentSizeID:

      return 1;

    }

    llvm_unreachable("Unknown UserSGPRID.");

  }


  GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST);


private:

  const GCNSubtarget &ST;


  // Private memory buffer

  // Compute directly in sgpr[0:1]

  // Other shaders indirect 64-bits at sgpr[0:1]

  bool ImplicitBufferPtr = false;


  bool PrivateSegmentBuffer = false;


  bool DispatchPtr = false;


  bool QueuePtr = false;


  bool KernargSegmentPtr = false;


  bool DispatchID = false;


  bool FlatScratchInit = false;


  bool PrivateSegmentSize = false;


  unsigned NumKernargPreloadSGPRs = 0;


  unsigned NumUsedUserSGPRs = 0;

};


} // end namespace llvm


#endif // LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

EnableLoadStoreOpt
static cl::opt< bool > EnableLoadStoreOpt("aarch64-enable-ldst-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden)

AMDGPUBaseInfo.h

AMDGPUCallLowering.h
This file describes how to lower LLVM calls to machine code calls.

AMDGPURegisterBankInfo.h
This file declares the targeting of the RegisterBankInfo class for AMDGPU.

AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.

AMDHSAKernelDescriptor.h
AMDHSA kernel descriptor definitions.

F
#define F(x, y, z)
Definition MD5.cpp:54

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

SIFrameLowering.h

SIISelLowering.h
SI DAG Lowering interface definition.

SIInstrInfo.h
Interface definition for SIInstrInfo.

CacheLineSize
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))

AMDGPUGenSubtargetInfo

llvm::AMDGPURegisterBankInfo
Definition AMDGPURegisterBankInfo.h:42

llvm::AMDGPUSubtarget::isMesa3DOS
bool isMesa3DOS() const
Definition AMDGPUSubtarget.h:173

llvm::AMDGPUSubtarget::Generation
Generation
Definition AMDGPUSubtarget.h:32

llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::GFX9
@ GFX9
Definition AMDGPUSubtarget.h:41

llvm::AMDGPUSubtarget::GFX12
@ GFX12
Definition AMDGPUSubtarget.h:44

llvm::AMDGPUSubtarget::INVALID
@ INVALID
Definition AMDGPUSubtarget.h:33

llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition AMDGPUSubtarget.h:39

llvm::AMDGPUSubtarget::SOUTHERN_ISLANDS
@ SOUTHERN_ISLANDS
Definition AMDGPUSubtarget.h:38

llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition AMDGPUSubtarget.h:40

llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition AMDGPUSubtarget.h:43

llvm::AMDGPUSubtarget::getWavefrontSizeLog2
unsigned getWavefrontSizeLog2() const
Definition AMDGPUSubtarget.h:223

llvm::AMDGPUSubtarget::AMDGPUSubtarget
AMDGPUSubtarget(const Triple &TT)
Definition AMDGPUSubtarget.h:65

llvm::AMDGPUSubtarget::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition AMDGPUSubtarget.h:291

llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition AMDGPUSubtarget.h:165

llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition AMDGPUSubtarget.h:219

llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition AMDGPUBaseInfo.h:156

llvm::CallLowering
Definition CallLowering.h:46

llvm::Function
Definition Function.h:65

llvm::GCNSubtarget
Definition GCNSubtarget.h:45

llvm::GCNSubtarget::hasPrefetch
bool hasPrefetch() const
Definition GCNSubtarget.h:443

llvm::GCNSubtarget::hasFlat
bool hasFlat() const
Definition GCNSubtarget.h:226

llvm::GCNSubtarget::hasD16Images
bool hasD16Images() const
Definition GCNSubtarget.h:404

llvm::GCNSubtarget::InstrItins
InstrItineraryData InstrItins
Definition GCNSubtarget.h:76

llvm::GCNSubtarget::useVGPRIndexMode
bool useVGPRIndexMode() const
Definition GCNSubtarget.cpp:419

llvm::GCNSubtarget::partialVCCWritesUpdateVCCZ
bool partialVCCWritesUpdateVCCZ() const
Writes to VCC_LO/VCC_HI update the VCCZ flag.
Definition GCNSubtarget.h:281

llvm::GCNSubtarget::hasSwap
bool hasSwap() const
Definition GCNSubtarget.h:244

llvm::GCNSubtarget::hasPkMinMax3Insts
bool hasPkMinMax3Insts() const
Definition GCNSubtarget.h:734

llvm::GCNSubtarget::hasD16LoadStore
bool hasD16LoadStore() const
Definition GCNSubtarget.h:398

llvm::GCNSubtarget::hasMergedShaders
bool hasMergedShaders() const
Definition GCNSubtarget.h:714

llvm::GCNSubtarget::hasRrWGMode
bool hasRrWGMode() const
Definition GCNSubtarget.h:723

llvm::GCNSubtarget::hasScalarCompareEq64
bool hasScalarCompareEq64() const
Definition GCNSubtarget.h:504

llvm::GCNSubtarget::getLDSBankCount
int getLDSBankCount() const
Definition GCNSubtarget.h:197

llvm::GCNSubtarget::hasOnlyRevVALUShifts
bool hasOnlyRevVALUShifts() const
Definition GCNSubtarget.h:232

llvm::GCNSubtarget::hasNonNSAEncoding
bool hasNonNSAEncoding() const
Definition GCNSubtarget.h:533

llvm::GCNSubtarget::hasUsableDivScaleConditionOutput
bool hasUsableDivScaleConditionOutput() const
Condition output from div_scale is usable.
Definition GCNSubtarget.h:272

llvm::GCNSubtarget::hasExpertSchedulingMode
bool hasExpertSchedulingMode() const
Definition GCNSubtarget.h:677

llvm::GCNSubtarget::mirFileLoaded
void mirFileLoaded(MachineFunction &MF) const override
Definition GCNSubtarget.cpp:404

llvm::GCNSubtarget::hasUsableDSOffset
bool hasUsableDSOffset() const
True if the offset field of DS instructions works as expected.
Definition GCNSubtarget.h:265

llvm::GCNSubtarget::loadStoreOptEnabled
bool loadStoreOptEnabled() const
Definition GCNSubtarget.h:547

llvm::GCNSubtarget::enableSubRegLiveness
bool enableSubRegLiveness() const override
Definition GCNSubtarget.h:479

llvm::GCNSubtarget::getSGPRAllocGranule
unsigned getSGPRAllocGranule() const
Definition GCNSubtarget.h:759

llvm::GCNSubtarget::hasLdsAtomicAddF64
bool hasLdsAtomicAddF64() const
Definition GCNSubtarget.h:388

llvm::GCNSubtarget::hasFlatLgkmVMemCountInOrder
bool hasFlatLgkmVMemCountInOrder() const
Definition GCNSubtarget.h:396

llvm::GCNSubtarget::flatScratchIsPointer
bool flatScratchIsPointer() const
Definition GCNSubtarget.h:708

llvm::GCNSubtarget::requiresWaitOnWorkgroupReleaseFence
bool requiresWaitOnWorkgroupReleaseFence() const
Definition GCNSubtarget.h:1040

llvm::GCNSubtarget::hasShift64HighRegBug
bool hasShift64HighRegBug() const
Definition GCNSubtarget.h:583

llvm::GCNSubtarget::MaxPrivateElementSize
unsigned MaxPrivateElementSize
Definition GCNSubtarget.h:78

llvm::GCNSubtarget::unsafeDSOffsetFoldingEnabled
bool unsafeDSOffsetFoldingEnabled() const
Definition GCNSubtarget.h:267

llvm::GCNSubtarget::hasFPAtomicToDenormModeHazard
bool hasFPAtomicToDenormModeHazard() const
Definition GCNSubtarget.h:601

llvm::GCNSubtarget::getAddressableNumArchVGPRs
unsigned getAddressableNumArchVGPRs() const
Definition GCNSubtarget.h:848

llvm::GCNSubtarget::vmemWriteNeedsExpWaitcnt
bool vmemWriteNeedsExpWaitcnt() const
Definition GCNSubtarget.h:435

llvm::GCNSubtarget::shouldClusterStores
bool shouldClusterStores() const
Definition GCNSubtarget.h:960

llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition GCNSubtarget.h:780

llvm::GCNSubtarget::hasUserSGPRInit16BugInWave32
bool hasUserSGPRInit16BugInWave32() const
Definition GCNSubtarget.h:549

llvm::GCNSubtarget::getSGPREncodingGranule
unsigned getSGPREncodingGranule() const
Definition GCNSubtarget.h:764

llvm::GCNSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)

llvm::GCNSubtarget::hasCompressedExport
bool hasCompressedExport() const
Return true if the target's EXP instruction has the COMPR flag, which affects the meaning of the EN (...
Definition GCNSubtarget.h:645

llvm::GCNSubtarget::hasFlatScratchHiInB64InstHazard
bool hasFlatScratchHiInB64InstHazard() const
Definition GCNSubtarget.h:999

llvm::GCNSubtarget::hasDstSelForwardingHazard
bool hasDstSelForwardingHazard() const
Definition GCNSubtarget.h:591

llvm::GCNSubtarget::setScalarizeGlobalBehavior
void setScalarizeGlobalBehavior(bool b)
Definition GCNSubtarget.h:481

llvm::GCNSubtarget::hasFlatScratchEnabled
bool hasFlatScratchEnabled() const
Definition GCNSubtarget.h:370

llvm::GCNSubtarget::hasRelaxedBufferOOBMode
bool hasRelaxedBufferOOBMode() const
Definition GCNSubtarget.h:352

llvm::GCNSubtarget::getSNopBits
unsigned getSNopBits() const
Definition GCNSubtarget.h:1012

llvm::GCNSubtarget::hasLDSLoadB96_B128
bool hasLDSLoadB96_B128() const
Returns true if the target supports global_load_lds_dwordx3/global_load_lds_dwordx4 or buffer_load_dw...
Definition GCNSubtarget.h:659

llvm::GCNSubtarget::hasMultiDwordFlatScratchAddressing
bool hasMultiDwordFlatScratchAddressing() const
Definition GCNSubtarget.h:392

llvm::GCNSubtarget::hasFmaakFmamkF64Insts
bool hasFmaakFmamkF64Insts() const
Definition GCNSubtarget.h:531

llvm::GCNSubtarget::hasDsSwizzleRotateMode
bool hasDsSwizzleRotateMode() const
Definition GCNSubtarget.h:518

llvm::GCNSubtarget::hasHWFP64
bool hasHWFP64() const
Definition GCNSubtarget.h:220

llvm::GCNSubtarget::hasScaleOffset
bool hasScaleOffset() const
Definition GCNSubtarget.h:542

llvm::GCNSubtarget::hasDenormModeInst
bool hasDenormModeInst() const
Definition GCNSubtarget.h:312

llvm::GCNSubtarget::hasCvtScaleForwardingHazard
bool hasCvtScaleForwardingHazard() const
Definition GCNSubtarget.h:615

llvm::GCNSubtarget::getTotalNumVGPRs
unsigned getTotalNumVGPRs() const
Definition GCNSubtarget.h:842

llvm::GCNSubtarget::getMinWavesPerEU
unsigned getMinWavesPerEU() const override
Definition GCNSubtarget.h:950

llvm::GCNSubtarget::hasUnalignedDSAccessEnabled
bool hasUnalignedDSAccessEnabled() const
Definition GCNSubtarget.h:340

llvm::GCNSubtarget::GCNSubtarget
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM, bool BufferOOBRelaxed=false, bool TBufferOOBRelaxed=false)
Definition GCNSubtarget.cpp:183

llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition GCNSubtarget.h:126

llvm::GCNSubtarget::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override
Definition GCNSubtarget.h:927

llvm::GCNSubtarget::getConstantBusLimit
unsigned getConstantBusLimit(unsigned Opcode) const
Definition GCNSubtarget.cpp:217

llvm::GCNSubtarget::hasVALUMaskWriteHazard
bool hasVALUMaskWriteHazard() const
Definition GCNSubtarget.h:627

llvm::GCNSubtarget::hasCondSubInsts
bool hasCondSubInsts() const
Definition GCNSubtarget.h:752

llvm::GCNSubtarget::TrapHandlerAbi
TrapHandlerAbi
Definition GCNSubtarget.h:51

llvm::GCNSubtarget::TrapHandlerAbi::NONE
@ NONE
Definition GCNSubtarget.h:52

llvm::GCNSubtarget::TrapHandlerAbi::AMDHSA
@ AMDHSA
Definition GCNSubtarget.h:53

llvm::GCNSubtarget::getInlineAsmLowering
const InlineAsmLowering * getInlineAsmLowering() const override
Definition GCNSubtarget.h:144

llvm::GCNSubtarget::getTotalNumSGPRs
unsigned getTotalNumSGPRs() const
Definition GCNSubtarget.h:769

llvm::GCNSubtarget::getInstrItineraryData
const InstrItineraryData * getInstrItineraryData() const override
Definition GCNSubtarget.h:164

llvm::GCNSubtarget::adjustSchedDependency
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
Definition GCNSubtarget.cpp:766

llvm::GCNSubtarget::overridePostRASchedPolicy
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
Definition GCNSubtarget.cpp:367

llvm::GCNSubtarget::getMaxLocalMemSizeWithWaveCount
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount, const Function &) const
Return the amount of LDS that can be used that will not restrict the occupancy lower than WaveCount.

llvm::GCNSubtarget::hasPkMovB32
bool hasPkMovB32() const
Definition GCNSubtarget.h:525

llvm::GCNSubtarget::needsAlignedVGPRs
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
Definition GCNSubtarget.h:638

llvm::GCNSubtarget::getStackAlignment
Align getStackAlignment() const
Definition GCNSubtarget.h:473

llvm::GCNSubtarget::privateMemoryResourceIsRangeChecked
bool privateMemoryResourceIsRangeChecked() const
Definition GCNSubtarget.h:328

llvm::GCNSubtarget::hasScalarSubwordLoads
bool hasScalarSubwordLoads() const
Definition GCNSubtarget.h:250

llvm::GCNSubtarget::BufferOOBRelaxed
const bool BufferOOBRelaxed
Definition GCNSubtarget.h:87

llvm::GCNSubtarget::hasMadF16
bool hasMadF16() const
Definition GCNSubtarget.cpp:415

llvm::GCNSubtarget::hasDsAtomicAsyncBarrierArriveB64PipeBug
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const
Definition GCNSubtarget.h:987

llvm::GCNSubtarget::DynamicVGPR
bool DynamicVGPR
Definition GCNSubtarget.h:84

llvm::GCNSubtarget::getInstCacheLineSize
unsigned getInstCacheLineSize() const
Instruction cache line size in bytes (64 for pre-GFX11, 128 for GFX11+).
Definition GCNSubtarget.h:200

llvm::GCNSubtarget::hasLoopHeadInstSplitSensitivity
bool hasLoopHeadInstSplitSensitivity() const
Definition GCNSubtarget.h:621

llvm::GCNSubtarget::hasDwordx3LoadStores
bool hasDwordx3LoadStores() const
Definition GCNSubtarget.h:558

llvm::GCNSubtarget::hasSignedScratchOffsets
bool hasSignedScratchOffsets() const
Definition GCNSubtarget.h:727

llvm::GCNSubtarget::hasGlobalAddTidInsts
bool hasGlobalAddTidInsts() const
Definition GCNSubtarget.h:375

llvm::GCNSubtarget::hasFlatScrRegister
bool hasFlatScrRegister() const
Definition GCNSubtarget.h:359

llvm::GCNSubtarget::hasGetPCZeroExtension
bool hasGetPCZeroExtension() const
Definition GCNSubtarget.h:742

llvm::GCNSubtarget::hasPermLane64
bool hasPermLane64() const
Definition GCNSubtarget.h:514

llvm::GCNSubtarget::requiresNopBeforeDeallocVGPRs
bool requiresNopBeforeDeallocVGPRs() const
Definition GCNSubtarget.h:968

llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:859

llvm::GCNSubtarget::supportsGetDoorbellID
bool supportsGetDoorbellID() const
Definition GCNSubtarget.h:258

llvm::GCNSubtarget::supportsWave32
bool supportsWave32() const
Definition GCNSubtarget.h:905

llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition GCNSubtarget.h:350

llvm::GCNSubtarget::getMaxNumAGPRs
unsigned getMaxNumAGPRs(const Function &F) const
Definition GCNSubtarget.h:889

llvm::GCNSubtarget::hasReadM0MovRelInterpHazard
bool hasReadM0MovRelInterpHazard() const
Definition GCNSubtarget.h:560

llvm::GCNSubtarget::isDynamicVGPREnabled
bool isDynamicVGPREnabled() const
Definition GCNSubtarget.h:974

llvm::GCNSubtarget::hasInstPrefSize
bool hasInstPrefSize() const
Definition GCNSubtarget.h:445

llvm::GCNSubtarget::getRegisterInfo
const SIRegisterInfo * getRegisterInfo() const override
Definition GCNSubtarget.h:134

llvm::GCNSubtarget::ScalarizeGlobal
bool ScalarizeGlobal
Definition GCNSubtarget.h:86

llvm::GCNSubtarget::hasDOTOpSelHazard
bool hasDOTOpSelHazard() const
Definition GCNSubtarget.h:594

llvm::GCNSubtarget::hasLdsWaitVMSRC
bool hasLdsWaitVMSRC() const
Definition GCNSubtarget.h:609

llvm::GCNSubtarget::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition GCNSubtarget.h:921

llvm::GCNSubtarget::getBaseMaxNumVGPRs
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
Definition GCNSubtarget.cpp:569

llvm::GCNSubtarget::hasFmaakFmamkF32Insts
bool hasFmaakFmamkF32Insts() const
Definition GCNSubtarget.h:527

llvm::GCNSubtarget::hasMad64_32
bool hasMad64_32() const
Definition GCNSubtarget.h:429

llvm::GCNSubtarget::getInstructionSelector
InstructionSelector * getInstructionSelector() const override
Definition GCNSubtarget.h:148

llvm::GCNSubtarget::getVGPREncodingGranule
unsigned getVGPREncodingGranule() const
Definition GCNSubtarget.h:837

llvm::GCNSubtarget::hasHardClauses
bool hasHardClauses() const
Definition GCNSubtarget.h:599

llvm::GCNSubtarget::useDS128
bool useDS128() const
Definition GCNSubtarget.h:318

llvm::GCNSubtarget::hasExtendedWaitCounts
bool hasExtendedWaitCounts() const
Definition GCNSubtarget.h:663

llvm::GCNSubtarget::d16PreservesUnusedBits
bool d16PreservesUnusedBits() const
Definition GCNSubtarget.h:400

llvm::GCNSubtarget::hasInstPrefetch
bool hasInstPrefetch() const
Definition GCNSubtarget.h:439

llvm::GCNSubtarget::hasAddPC64Inst
bool hasAddPC64Inst() const
Definition GCNSubtarget.h:672

llvm::GCNSubtarget::maxHardClauseLength
unsigned maxHardClauseLength() const
Definition GCNSubtarget.h:682

llvm::GCNSubtarget::isMesaGfxShader
bool isMesaGfxShader(const Function &F) const
Definition GCNSubtarget.h:425

llvm::GCNSubtarget::DynamicVGPRBlockSize32
bool DynamicVGPRBlockSize32
Definition GCNSubtarget.h:85

llvm::GCNSubtarget::hasExportInsts
bool hasExportInsts() const
Definition GCNSubtarget.h:379

llvm::GCNSubtarget::hasVINTERPEncoding
bool hasVINTERPEncoding() const
Definition GCNSubtarget.h:383

llvm::GCNSubtarget::getRegBankInfo
const AMDGPURegisterBankInfo * getRegBankInfo() const override
Definition GCNSubtarget.h:156

llvm::GCNSubtarget::hasLegacyGeometry
bool hasLegacyGeometry() const
Definition GCNSubtarget.h:717

llvm::GCNSubtarget::LDSBankCount
int LDSBankCount
Definition GCNSubtarget.h:77

llvm::GCNSubtarget::getTrapHandlerAbi
TrapHandlerAbi getTrapHandlerAbi() const
Definition GCNSubtarget.h:254

llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition GCNSubtarget.h:355

llvm::GCNSubtarget::getFrameLowering
const SIFrameLowering * getFrameLowering() const override
Definition GCNSubtarget.h:128

llvm::GCNSubtarget::hasDPPRowShare
bool hasDPPRowShare() const
Definition GCNSubtarget.h:520

llvm::GCNSubtarget::zeroesHigh16BitsOfDest
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
Definition GCNSubtarget.cpp:245

llvm::GCNSubtarget::getBaseMaxNumSGPRs
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
Definition GCNSubtarget.cpp:485

llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition GCNSubtarget.h:160

llvm::GCNSubtarget::getMaxNumPreloadedSGPRs
unsigned getMaxNumPreloadedSGPRs() const
Definition GCNSubtarget.cpp:539

llvm::GCNSubtarget::initializeSubtargetDependencies
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
Definition GCNSubtarget.cpp:57

llvm::GCNSubtarget::Gen
unsigned Gen
Definition GCNSubtarget.h:75

llvm::GCNSubtarget::has12DWordStoreHazard
bool has12DWordStoreHazard() const
Definition GCNSubtarget.h:553

llvm::GCNSubtarget::hasVALUPartialForwardingHazard
bool hasVALUPartialForwardingHazard() const
Definition GCNSubtarget.h:611

llvm::GCNSubtarget::overrideSchedPolicy
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
Definition GCNSubtarget.cpp:343

llvm::GCNSubtarget::useVGPRBlockOpsForCSR
bool useVGPRBlockOpsForCSR() const
Definition GCNSubtarget.h:625

llvm::GCNSubtarget::computeOccupancy
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition GCNSubtarget.cpp:468

llvm::GCNSubtarget::needsKernArgPreloadProlog
bool needsKernArgPreloadProlog() const
Definition GCNSubtarget.h:748

llvm::GCNSubtarget::hasMin3Max3_16
bool hasMin3Max3_16() const
Definition GCNSubtarget.h:240

llvm::GCNSubtarget::getMaxNumVGPRs
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:867

llvm::GCNSubtarget::getVGPRAllocGranule
unsigned getVGPRAllocGranule(unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:832

llvm::GCNSubtarget::getSetRegWaitStates
unsigned getSetRegWaitStates() const
Number of hazard wait states for s_setreg_b32/s_setreg_imm32_b32.
Definition GCNSubtarget.h:298

llvm::GCNSubtarget::getTargetLowering
const SITargetLowering * getTargetLowering() const override
Definition GCNSubtarget.h:132

llvm::GCNSubtarget::hasTransForwardingHazard
bool hasTransForwardingHazard() const
Definition GCNSubtarget.h:587

llvm::GCNSubtarget::TrapID
TrapID
Definition GCNSubtarget.h:56

llvm::GCNSubtarget::TrapID::LLVMAMDHSADebugTrap
@ LLVMAMDHSADebugTrap
Definition GCNSubtarget.h:58

llvm::GCNSubtarget::TrapID::LLVMAMDHSATrap
@ LLVMAMDHSATrap
Definition GCNSubtarget.h:57

llvm::GCNSubtarget::enableMachineScheduler
bool enableMachineScheduler() const override
Definition GCNSubtarget.h:475

llvm::GCNSubtarget::hasLDSFPAtomicAddF64
bool hasLDSFPAtomicAddF64() const
Definition GCNSubtarget.h:509

llvm::GCNSubtarget::getNSAThreshold
unsigned getNSAThreshold(const MachineFunction &MF) const
Definition GCNSubtarget.cpp:835

llvm::GCNSubtarget::getScalarizeGlobalBehavior
bool getScalarizeGlobalBehavior() const
Definition GCNSubtarget.h:482

llvm::GCNSubtarget::hasPKF32InstsReplicatingLower32BitsOfScalarInput
bool hasPKF32InstsReplicatingLower32BitsOfScalarInput() const
Definition GCNSubtarget.h:668

llvm::GCNSubtarget::hasReadM0LdsDmaHazard
bool hasReadM0LdsDmaHazard() const
Definition GCNSubtarget.h:569

llvm::GCNSubtarget::hasScalarSMulU64
bool hasScalarSMulU64() const
Definition GCNSubtarget.h:422

llvm::GCNSubtarget::getKnownHighZeroBitsForFrameIndex
unsigned getKnownHighZeroBitsForFrameIndex() const
Return the number of high bits known to be zero for a frame index.
Definition GCNSubtarget.h:193

llvm::GCNSubtarget::hasScratchBaseForwardingHazard
bool hasScratchBaseForwardingHazard() const
Definition GCNSubtarget.h:993

llvm::GCNSubtarget::hasRelaxedTBufferOOBMode
bool hasRelaxedTBufferOOBMode() const
Definition GCNSubtarget.h:353

llvm::GCNSubtarget::hasScalarPackInsts
bool hasScalarPackInsts() const
Definition GCNSubtarget.h:246

llvm::GCNSubtarget::requiresDisjointEarlyClobberAndUndef
bool requiresDisjointEarlyClobberAndUndef() const override
Definition GCNSubtarget.h:979

llvm::GCNSubtarget::hasVALUReadSGPRHazard
bool hasVALUReadSGPRHazard() const
Definition GCNSubtarget.h:629

llvm::GCNSubtarget::usePRTStrictNull
bool usePRTStrictNull() const
Definition GCNSubtarget.h:334

llvm::GCNSubtarget::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:853

llvm::GCNSubtarget::supportsWaveWideBPermute
bool supportsWaveWideBPermute() const
Definition GCNSubtarget.h:1024

llvm::GCNSubtarget::hasMed3_16
bool hasMed3_16() const
Definition GCNSubtarget.h:238

llvm::GCNSubtarget::getReservedNumSGPRs
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
Definition GCNSubtarget.cpp:454

llvm::GCNSubtarget::hasUnalignedScratchAccessEnabled
bool hasUnalignedScratchAccessEnabled() const
Definition GCNSubtarget.h:344

llvm::GCNSubtarget::hasNullExportTarget
bool hasNullExportTarget() const
Return true if the target's EXP instruction supports the NULL export target.
Definition GCNSubtarget.h:649

llvm::GCNSubtarget::ldsRequiresM0Init
bool ldsRequiresM0Init() const
Return if most LDS instructions have an m0 use that require m0 to be initialized.
Definition GCNSubtarget.h:408

llvm::GCNSubtarget::useRealTrue16Insts
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
Definition GCNSubtarget.h:1036

llvm::GCNSubtarget::TBufferOOBRelaxed
const bool TBufferOOBRelaxed
Definition GCNSubtarget.h:88

llvm::GCNSubtarget::useAA
bool useAA() const override
Definition GCNSubtarget.cpp:423

llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition GCNSubtarget.h:909

llvm::GCNSubtarget::isGFX11Plus
bool isGFX11Plus() const
Definition GCNSubtarget.h:172

llvm::GCNSubtarget::getOccupancyWithNumVGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
Definition GCNSubtarget.cpp:431

llvm::GCNSubtarget::hasUnalignedBufferAccessEnabled
bool hasUnalignedBufferAccessEnabled() const
Definition GCNSubtarget.h:336

llvm::GCNSubtarget::isWaveSizeKnown
bool isWaveSizeKnown() const
Returns if the wavesize of this subtarget is known reliable.
Definition GCNSubtarget.h:916

llvm::GCNSubtarget::getMaxPrivateElementSize
unsigned getMaxPrivateElementSize(bool ForBufferRSrc=false) const
Definition GCNSubtarget.h:202

llvm::GCNSubtarget::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize() const override
Definition GCNSubtarget.h:932

llvm::GCNSubtarget::hasAsyncMark
bool hasAsyncMark() const
Definition GCNSubtarget.h:252

llvm::GCNSubtarget::hasSPackHL
bool hasSPackHL() const
Return true if the target has the S_PACK_HL_B32_B16 instruction.
Definition GCNSubtarget.h:641

llvm::GCNSubtarget::supportsMinMaxDenormModes
bool supportsMinMaxDenormModes() const
Definition GCNSubtarget.h:307

llvm::GCNSubtarget::supportsWave64
bool supportsWave64() const
Definition GCNSubtarget.h:907

llvm::GCNSubtarget::supportsBPermute
bool supportsBPermute() const
Definition GCNSubtarget.h:1020

llvm::GCNSubtarget::hasFlatScratchSVSMode
bool hasFlatScratchSVSMode() const
Definition GCNSubtarget.h:368

llvm::GCNSubtarget::InstCacheLineSize
unsigned InstCacheLineSize
Definition GCNSubtarget.h:81

llvm::GCNSubtarget::supportsWGP
bool supportsWGP() const
Definition GCNSubtarget.h:214

llvm::GCNSubtarget::hasAtomicFaddInsts
bool hasAtomicFaddInsts() const
Definition GCNSubtarget.h:431

llvm::GCNSubtarget::hasSubClampInsts
bool hasSubClampInsts() const
Definition GCNSubtarget.h:754

llvm::GCNSubtarget::requiresWaitXCntForSingleAccessInstructions
bool requiresWaitXCntForSingleAccessInstructions() const
Definition GCNSubtarget.h:1006

llvm::GCNSubtarget::getNSAMaxSize
unsigned getNSAMaxSize(bool HasSampler=false) const
Definition GCNSubtarget.h:535

llvm::GCNSubtarget::getOccupancyWithNumSGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
Definition GCNSubtarget.cpp:425

llvm::GCNSubtarget::hasVOP3DPP
bool hasVOP3DPP() const
Definition GCNSubtarget.h:605

llvm::GCNSubtarget::getInstPrefSizeArgs
void getInstPrefSizeArgs(uint32_t &Mask, uint32_t &Shift, uint32_t &Width, uint32_t &CacheLineSize) const
Definition GCNSubtarget.h:447

llvm::GCNSubtarget::getMaxFlatWorkGroupSize
unsigned getMaxFlatWorkGroupSize() const override
Definition GCNSubtarget.h:937

llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition GCNSubtarget.h:498

llvm::GCNSubtarget::MaxHardClauseLength
unsigned MaxHardClauseLength
The maximum number of instructions that may be placed within an S_CLAUSE, which is one greater than t...
Definition GCNSubtarget.h:93

llvm::GCNSubtarget::hasFlatScratchSVSSwizzleBug
bool hasFlatScratchSVSSwizzleBug() const
Definition GCNSubtarget.h:651

llvm::GCNSubtarget::hasVDecCoExecHazard
bool hasVDecCoExecHazard() const
Definition GCNSubtarget.h:597

llvm::GCNSubtarget::hasSignedGVSOffset
bool hasSignedGVSOffset() const
Definition GCNSubtarget.h:545

llvm::GCNSubtarget::hasLDSFPAtomicAddF32
bool hasLDSFPAtomicAddF32() const
Definition GCNSubtarget.h:508

llvm::GCNSubtarget::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(unsigned FlatWorkGroupSize) const override
Definition GCNSubtarget.h:944

llvm::GCNSubtarget::haveRoundOpsF64
bool haveRoundOpsF64() const
Have v_trunc_f64, v_ceil_f64, v_rndne_f64.
Definition GCNSubtarget.h:324

llvm::GCNSubtarget::hasDelayAlu
bool hasDelayAlu() const
Return true if the target has the S_DELAY_ALU instruction.
Definition GCNSubtarget.h:654

llvm::GCNSubtarget::hasReadM0SendMsgHazard
bool hasReadM0SendMsgHazard() const
Definition GCNSubtarget.h:564

llvm::GCNSubtarget::hasScalarMulHiInsts
bool hasScalarMulHiInsts() const
Definition GCNSubtarget.h:248

llvm::GCNSubtarget::hasSCmpK
bool hasSCmpK() const
Definition GCNSubtarget.h:463

llvm::GCNSubtarget::getLegalizerInfo
const LegalizerInfo * getLegalizerInfo() const override
Definition GCNSubtarget.h:152

llvm::GCNSubtarget::requiresWaitIdleBeforeGetReg
bool requiresWaitIdleBeforeGetReg() const
Definition GCNSubtarget.h:972

llvm::GCNSubtarget::hasDS96AndDS128
bool hasDS96AndDS128() const
Definition GCNSubtarget.h:321

llvm::GCNSubtarget::hasFmaLegacy32Insts
bool hasFmaLegacy32Insts() const
Definition GCNSubtarget.h:756

llvm::GCNSubtarget::hasReadM0LdsDirectHazard
bool hasReadM0LdsDirectHazard() const
Definition GCNSubtarget.h:573

llvm::GCNSubtarget::hasHalfRate64Ops
static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI)

llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition GCNSubtarget.h:170

llvm::GCNSubtarget::getMaxNumSGPRs
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
Definition GCNSubtarget.h:786

llvm::GCNSubtarget::getMaxNumVectorRegs
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
Definition GCNSubtarget.cpp:602

llvm::GCNSubtarget::isXNACKEnabled
bool isXNACKEnabled() const
Definition GCNSubtarget.h:348

llvm::GCNSubtarget::hasScalarAddSub64
bool hasScalarAddSub64() const
Definition GCNSubtarget.h:420

llvm::GCNSubtarget::hasSplitBarriers
bool hasSplitBarriers() const
Definition GCNSubtarget.h:720

llvm::GCNSubtarget::enableEarlyIfConversion
bool enableEarlyIfConversion() const override
Definition GCNSubtarget.h:488

llvm::GCNSubtarget::hasSMRDReadVALUDefHazard
bool hasSMRDReadVALUDefHazard() const
A read of an SGPR by SMRD instruction requires 4 wait states when the SGPR was written by a VALU inst...
Definition GCNSubtarget.h:285

llvm::GCNSubtarget::hasSGetShaderCyclesInst
bool hasSGetShaderCyclesInst() const
Definition GCNSubtarget.h:737

llvm::GCNSubtarget::hasINVWBL2WaitCntRequirement
bool hasINVWBL2WaitCntRequirement() const
Definition GCNSubtarget.h:729

llvm::GCNSubtarget::hasRFEHazards
bool hasRFEHazards() const
Definition GCNSubtarget.h:295

llvm::GCNSubtarget::hasVMEMReadSGPRVALUDefHazard
bool hasVMEMReadSGPRVALUDefHazard() const
A read of an SGPR by a VMEM instruction requires 5 wait states when the SGPR was written by a VALU In...
Definition GCNSubtarget.h:291

llvm::GCNSubtarget::hasFlatScratchSTMode
bool hasFlatScratchSTMode() const
Definition GCNSubtarget.h:364

llvm::GCNSubtarget::getBaseReservedNumSGPRs
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
Definition GCNSubtarget.cpp:438

llvm::GCNSubtarget::hasGWSSemaReleaseAll
bool hasGWSSemaReleaseAll() const
Definition GCNSubtarget.h:418

llvm::GCNSubtarget::hasAddr64
bool hasAddr64() const
Definition GCNSubtarget.h:222

llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition GCNSubtarget.h:774

llvm::GCNSubtarget::hasReadVCCZBug
bool hasReadVCCZBug() const
Extra wait hazard is needed in some cases before s_cbranch_vccnz/s_cbranch_vccz.
Definition GCNSubtarget.h:278

llvm::GCNSubtarget::isWave64
bool isWave64() const
Definition GCNSubtarget.h:911

llvm::GCNSubtarget::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize() const
Definition GCNSubtarget.h:975

llvm::GCNSubtarget::setRegModeNeedsVNOPs
bool setRegModeNeedsVNOPs() const
Definition GCNSubtarget.h:633

llvm::GCNSubtarget::hasFractBug
bool hasFractBug() const
Definition GCNSubtarget.h:236

llvm::GCNSubtarget::isPreciseMemoryEnabled
bool isPreciseMemoryEnabled() const
Definition GCNSubtarget.h:357

llvm::GCNSubtarget::getMaxWaveScratchSize
unsigned getMaxWaveScratchSize() const
Definition GCNSubtarget.h:178

llvm::GCNSubtarget::hasLDSMisalignedBugInWGPMode
bool hasLDSMisalignedBugInWGPMode() const
Definition GCNSubtarget.h:577

llvm::GCNSubtarget::checkSubtargetFeatures
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
Definition GCNSubtarget.cpp:170

llvm::GCNSubtarget::~GCNSubtarget
~GCNSubtarget() override

llvm::GCNSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition GCNSubtarget.cpp:213

llvm::GCNSubtarget::hasVOPD3
bool hasVOPD3() const
Definition GCNSubtarget.h:731

llvm::GCNSubtarget::hasAtomicCSub
bool hasAtomicCSub() const
Definition GCNSubtarget.h:377

llvm::GCNSubtarget::TargetID
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
Definition GCNSubtarget.h:74

llvm::GCNSubtarget::requiresCodeObjectV6
bool requiresCodeObjectV6() const
Definition GCNSubtarget.h:623

llvm::GCNSubtarget::getCallLowering
const CallLowering * getCallLowering() const override
Definition GCNSubtarget.h:140

llvm::GCNSubtarget::hasLdsDirect
bool hasLdsDirect() const
Definition GCNSubtarget.h:607

llvm::GCNSubtarget::hasGWSAutoReplay
bool hasGWSAutoReplay() const
Definition GCNSubtarget.h:415

llvm::GCNUserSGPRUsageInfo::getNumUserSGPRForField
static unsigned getNumUserSGPRForField(UserSGPRID ID)
Definition GCNSubtarget.h:1083

llvm::GCNUserSGPRUsageInfo::hasQueuePtr
bool hasQueuePtr() const
Definition GCNSubtarget.h:1053

llvm::GCNUserSGPRUsageInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition GCNSubtarget.h:1055

llvm::GCNUserSGPRUsageInfo::allocKernargPreloadSGPRs
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
Definition GCNSubtarget.cpp:914

llvm::GCNUserSGPRUsageInfo::hasDispatchID
bool hasDispatchID() const
Definition GCNSubtarget.h:1057

llvm::GCNUserSGPRUsageInfo::UserSGPRID
UserSGPRID
Definition GCNSubtarget.h:1071

llvm::GCNUserSGPRUsageInfo::ImplicitBufferPtrID
@ ImplicitBufferPtrID
Definition GCNSubtarget.h:1072

llvm::GCNUserSGPRUsageInfo::DispatchIdID
@ DispatchIdID
Definition GCNSubtarget.h:1077

llvm::GCNUserSGPRUsageInfo::QueuePtrID
@ QueuePtrID
Definition GCNSubtarget.h:1075

llvm::GCNUserSGPRUsageInfo::DispatchPtrID
@ DispatchPtrID
Definition GCNSubtarget.h:1074

llvm::GCNUserSGPRUsageInfo::FlatScratchInitID
@ FlatScratchInitID
Definition GCNSubtarget.h:1078

llvm::GCNUserSGPRUsageInfo::PrivateSegmentBufferID
@ PrivateSegmentBufferID
Definition GCNSubtarget.h:1073

llvm::GCNUserSGPRUsageInfo::PrivateSegmentSizeID
@ PrivateSegmentSizeID
Definition GCNSubtarget.h:1079

llvm::GCNUserSGPRUsageInfo::KernargSegmentPtrID
@ KernargSegmentPtrID
Definition GCNSubtarget.h:1076

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition GCNSubtarget.h:1049

llvm::GCNUserSGPRUsageInfo::getNumFreeUserSGPRs
unsigned getNumFreeUserSGPRs()
Definition GCNSubtarget.cpp:920

llvm::GCNUserSGPRUsageInfo::hasImplicitBufferPtr
bool hasImplicitBufferPtr() const
Definition GCNSubtarget.h:1047

llvm::GCNUserSGPRUsageInfo::getNumKernargPreloadSGPRs
unsigned getNumKernargPreloadSGPRs() const
Definition GCNSubtarget.h:1063

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentSize
bool hasPrivateSegmentSize() const
Definition GCNSubtarget.h:1061

llvm::GCNUserSGPRUsageInfo::getNumUsedUserSGPRs
unsigned getNumUsedUserSGPRs() const
Definition GCNSubtarget.h:1065

llvm::GCNUserSGPRUsageInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition GCNSubtarget.h:1051

llvm::GCNUserSGPRUsageInfo::GCNUserSGPRUsageInfo
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
Definition GCNSubtarget.cpp:850

llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition GCNSubtarget.h:1059

llvm::InlineAsmLowering
Definition InlineAsmLowering.h:28

llvm::InstrItineraryData
Itinerary data supplied by a subtarget to be used by a target.
Definition MCInstrItineraries.h:110

llvm::InstructionSelector
Definition InstructionSelector.h:22

llvm::LegalizerInfo
Definition LegalizerInfo.h:1371

llvm::Legalizer
Definition Legalizer.h:39

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::Region
Definition RegionInfo.h:887

llvm::SDep
Scheduling dependency.
Definition ScheduleDAG.h:51

llvm::SIFrameLowering
Definition SIFrameLowering.h:19

llvm::SIInstrInfo
Definition SIInstrInfo.h:101

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SIRegisterInfo::getBoolRC
const TargetRegisterClass * getBoolRC() const
Definition SIRegisterInfo.h:383

llvm::SITargetLowering
Definition SIISelLowering.h:32

llvm::SUnit
Scheduling unit. This is a node in the scheduling DAG.
Definition ScheduleDAG.h:249

llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition SelectionDAGTargetInfo.h:33

llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882

llvm::StringRef
Represent a constant reference to a string, i.e.
Definition StringRef.h:56

llvm::TargetRegisterClass
Definition TargetRegisterInfo.h:45

llvm::TargetSchedModel
Provide an instruction scheduling machine model to CodeGen passes.
Definition TargetSchedule.h:31

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition TargetSubtargetInfo.h:66

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition Use.h:35

uint32_t

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::AMDGPUOOBMode
Module flag names controlling out-of-bounds buffer access semantics.
Definition GCNSubtarget.h:39

llvm::AMDGPUOOBMode::BufferFlag
constexpr StringLiteral BufferFlag("amdgpu.buffer.oob.mode")

llvm::AMDGPUOOBMode::TBufferFlag
constexpr StringLiteral TBufferFlag("amdgpu.tbuffer.oob.mode")

llvm::AMDGPU::IsaInfo::getSGPRAllocGranule
unsigned getSGPRAllocGranule(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1307

llvm::AMDGPU::IsaInfo::getAddressableNumArchVGPRs
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1463

llvm::AMDGPU::IsaInfo::getTotalNumSGPRs
unsigned getTotalNumSGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1318

llvm::AMDGPU::IsaInfo::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1325

llvm::AMDGPU::IsaInfo::getMinFlatWorkGroupSize
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1300

llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1435

llvm::AMDGPU::IsaInfo::getMaxWorkGroupsPerCU
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1264

llvm::AMDGPU::IsaInfo::getMinNumSGPRs
unsigned getMinNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU)
Definition AMDGPUBaseInfo.cpp:1337

llvm::AMDGPU::IsaInfo::getMaxNumSGPRs
unsigned getMaxNumSGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, bool Addressable)
Definition AMDGPUBaseInfo.cpp:1354

llvm::AMDGPU::IsaInfo::getWavesPerEUForWorkGroup
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo &STI, unsigned FlatWorkGroupSize)
Definition AMDGPUBaseInfo.cpp:1294

llvm::AMDGPU::IsaInfo::getMaxFlatWorkGroupSize
constexpr unsigned getMaxFlatWorkGroupSize()
Definition AMDGPUBaseInfo.h:275

llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1316

llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1452

llvm::AMDGPU::IsaInfo::getMinNumVGPRs
unsigned getMinNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1528

llvm::AMDGPU::IsaInfo::getAddressableNumVGPRs
unsigned getAddressableNumVGPRs(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1470

llvm::AMDGPU::IsaInfo::getMaxNumVGPRs
unsigned getMaxNumVGPRs(const MCSubtargetInfo &STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
Definition AMDGPUBaseInfo.cpp:1563

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1413

llvm::AMDGPU::IsaInfo::getMinWavesPerEU
unsigned getMinWavesPerEU(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1283

llvm::AMDGPU::isShader
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1456

llvm::AMDGPU::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2579

llvm::AMDGPU::getNSAMaxSize
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
Definition AMDGPUBaseInfo.cpp:2568

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::countl_zero
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::MachineSchedPolicy
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
Definition MachineScheduler.h:202

llvm::SchedRegion
A region of an MBB for scheduling.
Definition MachineScheduler.h:228