33#define DEBUG_TYPE "gcn-subtarget"
35#define GET_SUBTARGETINFO_TARGET_DESC
36#define GET_SUBTARGETINFO_CTOR
37#define AMDGPUSubtarget GCNSubtarget
38#include "AMDGPUGenSubtargetInfo.inc"
42 "amdgpu-vgpr-index-mode",
43 cl::desc(
"Use GPR indexing mode instead of movrel for vector indexing"),
47 cl::desc(
"Enable the use of AA during codegen."),
52 cl::desc(
"Number of addresses from which to enable MIMG NSA."),
74 FullFS +=
"+flat-for-global,+unaligned-access-mode,+trap-handler,";
76 FullFS +=
"+enable-prt-strict-null,";
79 if (FS.contains_insensitive(
"+wavefrontsize")) {
80 if (!FS.contains_insensitive(
"wavefrontsize16"))
81 FullFS +=
"-wavefrontsize16,";
82 if (!FS.contains_insensitive(
"wavefrontsize32"))
83 FullFS +=
"-wavefrontsize32,";
84 if (!FS.contains_insensitive(
"wavefrontsize64"))
85 FullFS +=
"-wavefrontsize64,";
102 }
else if (!hasFeature(AMDGPU::FeatureWavefrontSize32) &&
103 !hasFeature(AMDGPU::FeatureWavefrontSize64)) {
107 ToggleFeature(AMDGPU::FeatureWavefrontSize32);
122 if (!
hasAddr64() && !FS.contains(
"flat-for-global") && !UseFlatForGlobal) {
123 ToggleFeature(AMDGPU::FeatureUseFlatForGlobal);
124 UseFlatForGlobal =
true;
128 if (!
hasFlat() && !FS.contains(
"flat-for-global") && UseFlatForGlobal) {
129 ToggleFeature(AMDGPU::FeatureUseFlatForGlobal);
130 UseFlatForGlobal =
false;
158 "InstCacheLineSize must be a power of 2");
160 TargetID.setTargetIDFromFeaturesString(FS);
163 <<
TargetID.getXnackSetting() <<
'\n');
165 <<
TargetID.getSramEccSetting() <<
'\n');
172 if (hasFeature(AMDGPU::FeatureWavefrontSize32) &&
173 hasFeature(AMDGPU::FeatureWavefrontSize64)) {
175 F,
"must specify exactly one of wavefrontsize32 and wavefrontsize64"));
177 if (hasFeature(AMDGPU::FeatureXNACKAnyOnly) &&
TargetID.isXnackOnOrOff()) {
179 F,
"target only supports xnack 'Any'; '+/-xnack' is not allowed"));
202 TSInfo = std::make_unique<AMDGPUSelectionDAGInfo>();
205 InlineAsmLoweringInfo =
207 Legalizer = std::make_unique<AMDGPULegalizerInfo>(*
this, TM);
208 RegBankInfo = std::make_unique<AMDGPURegisterBankInfo>(*
this);
210 std::make_unique<AMDGPUInstructionSelector>(*
this, *RegBankInfo, TM);
222 case AMDGPU::V_LSHLREV_B64_e64:
223 case AMDGPU::V_LSHLREV_B64_gfx10:
224 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
225 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
226 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
227 case AMDGPU::V_LSHL_B64_e64:
228 case AMDGPU::V_LSHRREV_B64_e64:
229 case AMDGPU::V_LSHRREV_B64_gfx10:
230 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
231 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
232 case AMDGPU::V_LSHR_B64_e64:
233 case AMDGPU::V_ASHRREV_I64_e64:
234 case AMDGPU::V_ASHRREV_I64_gfx10:
235 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
236 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
237 case AMDGPU::V_ASHR_I64_e64:
247 case AMDGPU::V_CVT_F16_F32_e32:
248 case AMDGPU::V_CVT_F16_F32_e64:
249 case AMDGPU::V_CVT_F16_U16_e32:
250 case AMDGPU::V_CVT_F16_U16_e64:
251 case AMDGPU::V_CVT_F16_I16_e32:
252 case AMDGPU::V_CVT_F16_I16_e64:
253 case AMDGPU::V_RCP_F16_e64:
254 case AMDGPU::V_RCP_F16_e32:
255 case AMDGPU::V_RSQ_F16_e64:
256 case AMDGPU::V_RSQ_F16_e32:
257 case AMDGPU::V_SQRT_F16_e64:
258 case AMDGPU::V_SQRT_F16_e32:
259 case AMDGPU::V_LOG_F16_e64:
260 case AMDGPU::V_LOG_F16_e32:
261 case AMDGPU::V_EXP_F16_e64:
262 case AMDGPU::V_EXP_F16_e32:
263 case AMDGPU::V_SIN_F16_e64:
264 case AMDGPU::V_SIN_F16_e32:
265 case AMDGPU::V_COS_F16_e64:
266 case AMDGPU::V_COS_F16_e32:
267 case AMDGPU::V_FLOOR_F16_e64:
268 case AMDGPU::V_FLOOR_F16_e32:
269 case AMDGPU::V_CEIL_F16_e64:
270 case AMDGPU::V_CEIL_F16_e32:
271 case AMDGPU::V_TRUNC_F16_e64:
272 case AMDGPU::V_TRUNC_F16_e32:
273 case AMDGPU::V_RNDNE_F16_e64:
274 case AMDGPU::V_RNDNE_F16_e32:
275 case AMDGPU::V_FRACT_F16_e64:
276 case AMDGPU::V_FRACT_F16_e32:
277 case AMDGPU::V_FREXP_MANT_F16_e64:
278 case AMDGPU::V_FREXP_MANT_F16_e32:
279 case AMDGPU::V_FREXP_EXP_I16_F16_e64:
280 case AMDGPU::V_FREXP_EXP_I16_F16_e32:
281 case AMDGPU::V_LDEXP_F16_e64:
282 case AMDGPU::V_LDEXP_F16_e32:
283 case AMDGPU::V_LSHLREV_B16_e64:
284 case AMDGPU::V_LSHLREV_B16_e32:
285 case AMDGPU::V_LSHRREV_B16_e64:
286 case AMDGPU::V_LSHRREV_B16_e32:
287 case AMDGPU::V_ASHRREV_I16_e64:
288 case AMDGPU::V_ASHRREV_I16_e32:
289 case AMDGPU::V_ADD_U16_e64:
290 case AMDGPU::V_ADD_U16_e32:
291 case AMDGPU::V_SUB_U16_e64:
292 case AMDGPU::V_SUB_U16_e32:
293 case AMDGPU::V_SUBREV_U16_e64:
294 case AMDGPU::V_SUBREV_U16_e32:
295 case AMDGPU::V_MUL_LO_U16_e64:
296 case AMDGPU::V_MUL_LO_U16_e32:
297 case AMDGPU::V_ADD_F16_e64:
298 case AMDGPU::V_ADD_F16_e32:
299 case AMDGPU::V_SUB_F16_e64:
300 case AMDGPU::V_SUB_F16_e32:
301 case AMDGPU::V_SUBREV_F16_e64:
302 case AMDGPU::V_SUBREV_F16_e32:
303 case AMDGPU::V_MUL_F16_e64:
304 case AMDGPU::V_MUL_F16_e32:
305 case AMDGPU::V_MAX_F16_e64:
306 case AMDGPU::V_MAX_F16_e32:
307 case AMDGPU::V_MIN_F16_e64:
308 case AMDGPU::V_MIN_F16_e32:
309 case AMDGPU::V_MAX_U16_e64:
310 case AMDGPU::V_MAX_U16_e32:
311 case AMDGPU::V_MIN_U16_e64:
312 case AMDGPU::V_MIN_U16_e32:
313 case AMDGPU::V_MAX_I16_e64:
314 case AMDGPU::V_MAX_I16_e32:
315 case AMDGPU::V_MIN_I16_e64:
316 case AMDGPU::V_MIN_I16_e32:
317 case AMDGPU::V_MAD_F16_e64:
318 case AMDGPU::V_MAD_U16_e64:
319 case AMDGPU::V_MAD_I16_e64:
320 case AMDGPU::V_FMA_F16_e64:
321 case AMDGPU::V_DIV_FIXUP_F16_e64:
324 case AMDGPU::V_MADAK_F16:
325 case AMDGPU::V_MADMK_F16:
326 case AMDGPU::V_MAC_F16_e64:
327 case AMDGPU::V_MAC_F16_e32:
328 case AMDGPU::V_FMAMK_F16:
329 case AMDGPU::V_FMAAK_F16:
330 case AMDGPU::V_FMAC_F16_e64:
331 case AMDGPU::V_FMAC_F16_e32:
336 case AMDGPU::V_MAD_MIXLO_F16:
337 case AMDGPU::V_MAD_MIXHI_F16:
363 if (!enableSIScheduler())
370 Attribute PostRADirectionAttr =
F.getFnAttribute(
"amdgpu-post-ra-direction");
371 if (!PostRADirectionAttr.
isValid())
375 if (PostRADirectionStr ==
"topdown") {
378 }
else if (PostRADirectionStr ==
"bottomup") {
381 }
else if (PostRADirectionStr ==
"bidirectional") {
386 F,
F.getSubprogram(),
"invalid value for postRA direction attribute");
387 F.getContext().diagnose(Diag);
391 const char *DirStr =
"default";
397 DirStr =
"bidirectional";
399 dbgs() <<
"Post-MI-sched direction (" <<
F.getName() <<
"): " << DirStr
408 for (
auto &
MBB : MF) {
410 InstrInfo.fixImplicitOperands(
MI);
416 return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16_e64) != -1;
432 unsigned DynamicVGPRBlockSize)
const {
434 DynamicVGPRBlockSize);
442 if (HasFlatScratch || HasArchitectedFlatScratch) {
463 const bool KernelUsesFlatScratch = hasFlatAddressSpace();
467std::pair<unsigned, unsigned>
469 unsigned NumSGPRs,
unsigned NumVGPRs)
const {
481 MaxOcc = std::min(MaxOcc, std::min(SGPROcc, VGPROcc));
482 return {std::min(MinOcc, MaxOcc), MaxOcc};
486 const Function &
F, std::pair<unsigned, unsigned> WavesPerEU,
487 unsigned PreloadedSGPRs,
unsigned ReservedNumSGPRs)
const {
491 unsigned MaxAddressableNumSGPRs =
getMaxNumSGPRs(WavesPerEU.first,
true);
496 F.getFnAttributeAsParsedInteger(
"amdgpu-num-sgpr", MaxNumSGPRs);
498 if (Requested != MaxNumSGPRs) {
500 if (Requested && (Requested <= ReservedNumSGPRs))
510 unsigned InputNumSGPRs = PreloadedSGPRs;
511 if (Requested && Requested < InputNumSGPRs)
512 Requested = InputNumSGPRs;
516 if (Requested && Requested >
getMaxNumSGPRs(WavesPerEU.first,
false))
518 if (WavesPerEU.second && Requested &&
523 MaxNumSGPRs = Requested;
526 if (hasSGPRInitBug())
529 return std::min(MaxNumSGPRs - ReservedNumSGPRs, MaxAddressableNumSGPRs);
542 const unsigned MaxUserSGPRs =
543 USI::getNumUserSGPRForField(USI::PrivateSegmentBufferID) +
544 USI::getNumUserSGPRForField(USI::DispatchPtrID) +
545 USI::getNumUserSGPRForField(USI::QueuePtrID) +
546 USI::getNumUserSGPRForField(USI::KernargSegmentPtrID) +
547 USI::getNumUserSGPRForField(USI::DispatchIdID) +
548 USI::getNumUserSGPRForField(USI::FlatScratchInitID) +
549 USI::getNumUserSGPRForField(USI::ImplicitBufferPtrID);
552 const unsigned MaxSystemSGPRs = 1 +
559 const unsigned SyntheticSGPRs = 1;
561 return MaxUserSGPRs + MaxSystemSGPRs + SyntheticSGPRs;
570 const Function &
F, std::pair<unsigned, unsigned> NumVGPRBounds)
const {
571 const auto [Min, Max] = NumVGPRBounds;
576 unsigned Requested =
F.getFnAttributeAsParsedInteger(
"amdgpu-num-vgpr", Max);
577 if (Requested != Max && hasGFX90AInsts())
581 return std::clamp(Requested, Min, Max);
601std::pair<unsigned, unsigned>
605 unsigned MaxNumVGPRs = MaxVectorRegs;
606 unsigned MaxNumAGPRs = 0;
617 if (hasGFX90AInsts()) {
618 unsigned MinNumAGPRs = 0;
619 const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
621 const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};
625 std::tie(MinNumAGPRs, MaxNumAGPRs) =
629 if (MinNumAGPRs == DefaultNumAGPR.first) {
631 MinNumAGPRs = MaxNumAGPRs = MaxVectorRegs / 2;
634 MinNumAGPRs =
alignTo(MinNumAGPRs, 4);
636 MinNumAGPRs = std::min(MinNumAGPRs, TotalNumAGPRs);
641 MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
642 MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);
644 MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, NumArchVGPRs);
645 MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);
647 assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
648 MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= NumArchVGPRs &&
649 "invalid register counts");
650 }
else if (hasMAIInsts()) {
652 MaxNumAGPRs = MaxNumVGPRs = MaxVectorRegs;
655 return std::pair(MaxNumVGPRs, MaxNumAGPRs);
665 AMDGPU::OpName UseName =
666 AMDGPU::getOperandIdxName(UseI.
getOpcode(), UseOpIdx);
668 case AMDGPU::OpName::src0:
669 return InstrInfo.getNamedOperand(UseI, AMDGPU::OpName::src0_modifiers);
670 case AMDGPU::OpName::src1:
671 return InstrInfo.getNamedOperand(UseI, AMDGPU::OpName::src1_modifiers);
672 case AMDGPU::OpName::src2:
673 return InstrInfo.getNamedOperand(UseI, AMDGPU::OpName::src2_modifiers);
686 if (!InstrInfo.isVOP3P(
I) || InstrInfo.isWMMA(
I) || InstrInfo.isSWMMAC(
I))
687 return AMDGPU::NoSubRegister;
692 return AMDGPU::NoSubRegister;
711 if ((!InstrInfo.isVOP3PMix(
I) && (!OpSel || !OpSelHi) &&
712 (OpSel || OpSelHi)) ||
713 (InstrInfo.isVOP3PMix(
I) && !OpSelHi))
714 return AMDGPU::NoSubRegister;
719 if (
unsigned SubRegIdx = OpSel ? AMDGPU::sub1 : AMDGPU::sub0;
720 TRI.getSubClassWithSubReg(RC, SubRegIdx) == RC)
722 if (
unsigned SubRegIdx = OpSel ? AMDGPU::hi16 : AMDGPU::lo16;
723 TRI.getSubClassWithSubReg(RC, SubRegIdx) == RC)
726 return AMDGPU::NoSubRegister;
732 int UseOpIdx)
const {
734 const MachineOperand &DefOp = DefI.
getOperand(DefOpIdx);
735 const MachineOperand &UseOp = UseI.
getOperand(UseOpIdx);
744 unsigned DefSubRegIdx = DefOp.
getSubReg();
745 if (DefReg.
isVirtual() && DefSubRegIdx == AMDGPU::NoSubRegister)
751 if (!
TRI->checkSubRegInterference(DefReg, DefSubRegIdx,
UseReg, UseSubRegIdx))
759 MCRegister DefMCReg =
760 DefSubRegIdx ?
TRI->getSubReg(DefReg, DefSubRegIdx) : DefReg.
asMCReg();
761 MCRegister UseMCReg =
763 return TRI->isSubRegisterEq(DefMCReg, UseMCReg) ? UseMCReg : DefMCReg;
777 if (Dep.
getReg() == AMDGPU::TENSORcnt || Dep.
getReg() == AMDGPU::ASYNCcnt) {
781 InstrInfo.isLDSDMA(*DefI) &&
782 (UseOp == AMDGPU::S_WAIT_TENSORCNT || UseOp == AMDGPU::S_WAIT_ASYNCCNT);
783 if (!IsBarrierCase) {
789 if (
Register Reg = getRealSchedDependency(*DefI, DefOpIdx, *UseI, UseOpIdx)) {
802 for (++
I;
I != E &&
I->isBundledWithPred(); ++
I) {
803 if (
I->isMetaInstruction())
805 if (
I->modifiesRegister(Reg,
TRI))
817 for (++
I;
I != E &&
I->isBundledWithPred() && Lat; ++
I) {
818 if (
I->isMetaInstruction())
820 if (
I->readsRegister(Reg,
TRI))
830 Dep.
setLatency(InstrInfo.getSchedModel().computeOperandLatency(
831 DefI, DefOpIdx, UseI, UseOpIdx));
843 "amdgpu-nsa-threshold", -1);
845 return std::max(
Value, 2);
854 const bool IsKernel =
857 if (IsKernel && (!
F.arg_empty() || ST.getImplicitArgNumBytes(
F) != 0))
858 KernargSegmentPtr =
true;
860 bool IsAmdHsaOrMesa = ST.isAmdHsaOrMesa(
F);
861 if (IsAmdHsaOrMesa && !ST.hasFlatScratchEnabled())
862 PrivateSegmentBuffer =
true;
863 else if (ST.isMesaGfxShader(
F))
864 ImplicitBufferPtr =
true;
867 if (!
F.hasFnAttribute(
"amdgpu-no-dispatch-ptr"))
871 if (!
F.hasFnAttribute(
"amdgpu-no-queue-ptr"))
874 if (!
F.hasFnAttribute(
"amdgpu-no-dispatch-id"))
879 (IsAmdHsaOrMesa || ST.hasFlatScratchEnabled()) &&
882 (ST.hasFlatScratchEnabled() ||
884 !
F.hasFnAttribute(
"amdgpu-no-flat-scratch-init"))) &&
885 !ST.hasArchitectedFlatScratch()) {
886 FlatScratchInit = true;
916 NumKernargPreloadSGPRs += NumSGPRs;
917 NumUsedUserSGPRs += NumSGPRs;
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the InstructionSelector class for AMDGPU.
This file declares the targeting of the Machinelegalizer class for AMDGPU.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
The AMDGPU TargetMachine interface definition for hw codegen targets.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static cl::opt< unsigned > NSAThreshold("amdgpu-nsa-threshold", cl::desc("Number of addresses from which to enable MIMG NSA."), cl::init(2), cl::Hidden)
static cl::opt< bool > EnableVGPRIndexMode("amdgpu-vgpr-index-mode", cl::desc("Use GPR indexing mode instead of movrel for vector indexing"), cl::init(false))
static cl::opt< bool > UseAA("amdgpu-use-aa-in-codegen", cl::desc("Enable the use of AA during codegen."), cl::init(true))
static const MachineOperand * getVOP3PSourceModifierFromOpIdx(const MachineInstr &UseI, int UseOpIdx, const SIInstrInfo &InstrInfo)
static unsigned getEffectiveSubRegIdx(const SIRegisterInfo &TRI, const SIInstrInfo &InstrInfo, const MachineInstr &I, const MachineOperand &Op)
AMD GCN specific subclass of TargetSubtarget.
static Register UseReg(const MachineOperand &MO)
This file describes how to lower LLVM inline asm to machine code INLINEASM.
Register const TargetRegisterInfo * TRI
Promote Memory to Register
This file defines the SmallString class.
unsigned FlatOffsetBitWidth
std::pair< unsigned, unsigned > getWavesPerEU(const Function &F) const
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
unsigned getWavefrontSizeLog2() const
AMDGPUSubtarget(const Triple &TT)
unsigned AddressableLocalMemorySize
Functions, function parameters, and return types can have attributes to indicate how they should be t...
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
bool isValid() const
Return true if the attribute is any kind of attribute.
Diagnostic information for optimization failures.
Diagnostic information for unsupported feature in backend.
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
InstrItineraryData InstrItins
bool useVGPRIndexMode() const
void mirFileLoaded(MachineFunction &MF) const override
unsigned MaxPrivateElementSize
unsigned getAddressableNumArchVGPRs() const
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, const GCNTargetMachine &TM, bool BufferOOBRelaxed=false, bool TBufferOOBRelaxed=false)
unsigned getConstantBusLimit(unsigned Opcode) const
const InstrItineraryData * getInstrItineraryData() const override
void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx, SDep &Dep, const TargetSchedModel *SchedModel) const override
void overridePostRASchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
Align getStackAlignment() const
const bool BufferOOBRelaxed
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
bool isDynamicVGPREnabled() const
const SIRegisterInfo * getRegisterInfo() const override
unsigned getBaseMaxNumVGPRs(const Function &F, std::pair< unsigned, unsigned > NumVGPRBounds) const
bool zeroesHigh16BitsOfDest(unsigned Opcode) const
Returns if the result of this instruction with a 16-bit result returned in a 32-bit register implicit...
unsigned getBaseMaxNumSGPRs(const Function &F, std::pair< unsigned, unsigned > WavesPerEU, unsigned PreloadedSGPRs, unsigned ReservedNumSGPRs) const
unsigned getMaxNumPreloadedSGPRs() const
GCNSubtarget & initializeSubtargetDependencies(const Triple &TT, StringRef GPU, StringRef FS)
void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
unsigned getMaxNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
const SITargetLowering * getTargetLowering() const override
unsigned getNSAThreshold(const MachineFunction &MF) const
unsigned getReservedNumSGPRs(const MachineFunction &MF) const
const bool TBufferOOBRelaxed
bool useAA() const override
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs, unsigned DynamicVGPRBlockSize) const
Return the maximum number of waves per SIMD for kernels using VGPRs VGPRs.
unsigned InstCacheLineSize
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const
Return the maximum number of waves per SIMD for kernels using SGPRs SGPRs.
unsigned getMaxWavesPerEU() const
Generation getGeneration() const
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
std::pair< unsigned, unsigned > getMaxNumVectorRegs(const Function &F) const
Return a pair of maximum numbers of VGPRs and AGPRs that meet the number of waves per execution unit ...
bool isXNACKEnabled() const
unsigned getBaseReservedNumSGPRs(const bool HasFlatScratch) const
unsigned getDynamicVGPRBlockSize() const
void checkSubtargetFeatures(const Function &F) const
Diagnose inconsistent subtarget features before attempting to codegen function F.
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
AMDGPU::IsaInfo::AMDGPUTargetID TargetID
static unsigned getNumUserSGPRForField(UserSGPRID ID)
bool hasKernargSegmentPtr() const
void allocKernargPreloadSGPRs(unsigned NumSGPRs)
bool hasDispatchID() const
bool hasPrivateSegmentBuffer() const
unsigned getNumFreeUserSGPRs()
bool hasImplicitBufferPtr() const
bool hasPrivateSegmentSize() const
bool hasDispatchPtr() const
GCNUserSGPRUsageInfo(const Function &F, const GCNSubtarget &ST)
bool hasFlatScratchInit() const
This is an important class for using LLVM in a threaded context.
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Kind getKind() const
Returns an enum value representing the kind of the dependence.
@ Data
Regular data dependence (aka true-dependence).
void setLatency(unsigned Lat)
Sets the latency for this edge.
@ Artificial
Arbitrary strong DAG edge (no real dependence).
unsigned getLatency() const
Returns the latency value for this edge, which roughly means the minimum number of cycles that must e...
Register getReg() const
Returns the register associated with this edge.
void setReg(Register Reg)
Assigns the associated register for this edge.
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
unsigned getNumPreloadedSGPRs() const
std::pair< unsigned, unsigned > getWavesPerEU() const
GCNUserSGPRUsageInfo & getUserSGPRInfo()
Scheduling unit. This is a node in the scheduling DAG.
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Represent a constant reference to a string, i.e.
Information about stack frame layout on the target.
Provide an instruction scheduling machine model to CodeGen passes.
Triple - Helper class for working with autoconf configuration names.
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
self_iterator getIterator()
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo &STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
unsigned getEUsPerCU(const MCSubtargetInfo &STI)
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
unsigned getLocalMemorySize(const MCSubtargetInfo &STI)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
StringRef getSchedStrategy(const Function &F)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getDynamicVGPRBlockSize(const Function &F)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
This struct is a compact representation of a valid (non-zero power of two) alignment.
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.
bool ShouldTrackLaneMasks
Track LaneMasks to allow reordering of independent subregister writes of the same vreg.
A region of an MBB for scheduling.