LLVM 23.0.0git
AMDGPUBaseInfo.h
Go to the documentation of this file.
1//===- AMDGPUBaseInfo.h - Top level definitions for AMDGPU ------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
10#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
11
12#include "AMDGPUSubtarget.h"
13#include "SIDefines.h"
14#include "llvm/IR/CallingConv.h"
15#include "llvm/IR/InstrTypes.h"
16#include "llvm/IR/Module.h"
18#include <array>
19#include <functional>
20#include <utility>
21
22// Pull in OpName enum definition and getNamedOperandIdx() declaration.
23#define GET_INSTRINFO_OPERAND_ENUM
24#include "AMDGPUGenInstrInfo.inc"
25
27
28namespace llvm {
29
30struct Align;
31class Argument;
32class Function;
33class GlobalValue;
34class MCInstrInfo;
35class MCRegisterClass;
36class MCRegisterInfo;
37class MCSubtargetInfo;
38class MDNode;
39class StringRef;
40class Triple;
41class raw_ostream;
42
43namespace AMDGPU {
44
45struct AMDGPUMCKernelCodeT;
46struct IsaVersion;
47
48/// Generic target versions emitted by this version of LLVM.
49///
50/// These numbers are incremented every time a codegen breaking change occurs
51/// within a generic family.
52namespace GenericVersion {
53static constexpr unsigned GFX9 = 1;
54static constexpr unsigned GFX9_4 = 1;
55static constexpr unsigned GFX10_1 = 1;
56static constexpr unsigned GFX10_3 = 1;
57static constexpr unsigned GFX11 = 1;
58static constexpr unsigned GFX12 = 1;
59static constexpr unsigned GFX12_5 = 1;
60} // namespace GenericVersion
61
62enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
63
64enum class FPType { None, FP4, FP8 };
65
66/// \returns True if \p STI is AMDHSA.
67bool isHsaAbi(const MCSubtargetInfo &STI);
68
69/// \returns Code object version from the IR module flag.
70unsigned getAMDHSACodeObjectVersion(const Module &M);
71
72/// \returns Code object version from ELF's e_ident[EI_ABIVERSION].
73unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion);
74
75/// \returns The default HSA code object version. This should only be used when
76/// we lack a more accurate CodeObjectVersion value (e.g. from the IR module
77/// flag or a .amdhsa_code_object_version directive)
79
80/// \returns ABIVersion suitable for use in ELF's e_ident[EI_ABIVERSION]. \param
81/// CodeObjectVersion is a value returned by getAMDHSACodeObjectVersion().
82uint8_t getELFABIVersion(const Triple &OS, unsigned CodeObjectVersion);
83
84/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
85unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
86
87/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
88unsigned getHostcallImplicitArgPosition(unsigned COV);
89
90unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
91unsigned getCompletionActionImplicitArgPosition(unsigned COV);
92
94 unsigned Format;
95 unsigned BitsPerComp;
96 unsigned NumComponents;
97 unsigned NumFormat;
98 unsigned DataFormat;
99};
100
106
113
117
119 unsigned T16Op;
120 unsigned HiOp;
121 unsigned LoOp;
122};
123
128
129#define GET_MIMGBaseOpcode_DECL
130#define GET_MIMGDim_DECL
131#define GET_MIMGEncoding_DECL
132#define GET_MIMGLZMapping_DECL
133#define GET_MIMGMIPMapping_DECL
134#define GET_MIMGBiASMapping_DECL
135#define GET_MAIInstInfoTable_DECL
136#define GET_isMFMA_F8F6F4Table_DECL
137#define GET_isCvtScaleF32_F32F16ToF8F4Table_DECL
138#define GET_True16D16Table_DECL
139#define GET_WMMAInstInfoTable_DECL
140#include "AMDGPUGenSearchableTables.inc"
141
142namespace IsaInfo {
143
144enum {
145 // The closed Vulkan driver sets 96, which limits the wave count to 8 but
146 // doesn't spill SGPRs as much as when 80 is set.
149};
150
152
154private:
155 const MCSubtargetInfo &STI;
156 TargetIDSetting XnackSetting;
157 TargetIDSetting SramEccSetting;
158
159public:
160 explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
161 ~AMDGPUTargetID() = default;
162
163 /// \return True if the current xnack setting is not "Unsupported".
164 bool isXnackSupported() const {
165 return XnackSetting != TargetIDSetting::Unsupported;
166 }
167
168 /// \returns True if the current xnack setting is "On" or "Any".
169 bool isXnackOnOrAny() const {
170 return XnackSetting == TargetIDSetting::On ||
171 XnackSetting == TargetIDSetting::Any;
172 }
173
174 /// \returns True if current xnack setting is "On" or "Off",
175 /// false otherwise.
180
181 /// \returns The current xnack TargetIDSetting, possible options are
182 /// "Unsupported", "Any", "Off", and "On".
183 TargetIDSetting getXnackSetting() const { return XnackSetting; }
184
185 /// Sets xnack setting to \p NewXnackSetting.
186 void setXnackSetting(TargetIDSetting NewXnackSetting) {
187 XnackSetting = NewXnackSetting;
188 }
189
190 /// \return True if the current sramecc setting is not "Unsupported".
191 bool isSramEccSupported() const {
192 return SramEccSetting != TargetIDSetting::Unsupported;
193 }
194
195 /// \returns True if the current sramecc setting is "On" or "Any".
196 bool isSramEccOnOrAny() const {
197 return SramEccSetting == TargetIDSetting::On ||
198 SramEccSetting == TargetIDSetting::Any;
199 }
200
201 /// \returns True if current sramecc setting is "On" or "Off",
202 /// false otherwise.
207
208 /// \returns The current sramecc TargetIDSetting, possible options are
209 /// "Unsupported", "Any", "Off", and "On".
210 TargetIDSetting getSramEccSetting() const { return SramEccSetting; }
211
212 /// Sets sramecc setting to \p NewSramEccSetting.
213 void setSramEccSetting(TargetIDSetting NewSramEccSetting) {
214 SramEccSetting = NewSramEccSetting;
215 }
216
219
220 /// Write string representation to \p OS
221 void print(raw_ostream &OS) const;
222
223 /// \returns String representation of an object.
224 std::string toString() const;
225};
226
228 const AMDGPUTargetID &TargetID) {
229 TargetID.print(OS);
230 return OS;
231}
232
233/// \returns Wavefront size for given subtarget \p STI.
234unsigned getWavefrontSize(const MCSubtargetInfo *STI);
235
236/// \returns Local memory size in bytes for given subtarget \p STI.
237unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
238
239/// \returns Maximum addressable local memory size in bytes for given subtarget
240/// \p STI.
242
243/// \returns Number of execution units per compute unit for given subtarget \p
244/// STI.
245unsigned getEUsPerCU(const MCSubtargetInfo *STI);
246
247/// \returns Maximum number of work groups per compute unit for given subtarget
248/// \p STI and limited by given \p FlatWorkGroupSize.
249unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
250 unsigned FlatWorkGroupSize);
251
252/// \returns Minimum number of waves per execution unit for given subtarget \p
253/// STI.
254unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
255
256/// \returns Maximum number of waves per execution unit for given subtarget \p
257/// STI without any kind of limitation.
258unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI);
259
260/// \returns Number of waves per execution unit required to support the given \p
261/// FlatWorkGroupSize.
263 unsigned FlatWorkGroupSize);
264
265/// \returns Minimum flat work group size for given subtarget \p STI.
266unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
267
268/// \returns Maximum flat work group size
269constexpr unsigned getMaxFlatWorkGroupSize() {
270 // Some subtargets allow encoding 2048, but this isn't tested or supported.
271 return 1024;
272}
273
274/// \returns Number of waves per work group for given subtarget \p STI and
275/// \p FlatWorkGroupSize.
276unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
277 unsigned FlatWorkGroupSize);
278
279/// \returns SGPR allocation granularity for given subtarget \p STI.
280unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
281
282/// \returns SGPR encoding granularity for given subtarget \p STI.
283unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
284
285/// \returns Total number of SGPRs for given subtarget \p STI.
286unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
287
288/// \returns Addressable number of SGPRs for given subtarget \p STI.
289unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
290
291/// \returns Minimum number of SGPRs that meets the given number of waves per
292/// execution unit requirement for given subtarget \p STI.
293unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
294
295/// \returns Maximum number of SGPRs that meets the given number of waves per
296/// execution unit requirement for given subtarget \p STI.
297unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
298 bool Addressable);
299
300/// \returns Number of extra SGPRs implicitly required by given subtarget \p
301/// STI when the given special registers are used.
302unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
303 bool FlatScrUsed, bool XNACKUsed);
304
305/// \returns Number of extra SGPRs implicitly required by given subtarget \p
306/// STI when the given special registers are used. XNACK is inferred from
307/// \p STI.
308unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
309 bool FlatScrUsed);
310
311/// \returns Number of SGPR blocks needed for given subtarget \p STI when
312/// \p NumSGPRs are used. \p NumSGPRs should already include any special
313/// register counts.
314unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
315
316/// \returns VGPR allocation granularity for given subtarget \p STI.
317///
318/// For subtargets which support it, \p EnableWavefrontSize32 should match
319/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
320unsigned
321getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize,
322 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
323
324/// \returns VGPR encoding granularity for given subtarget \p STI.
325///
326/// For subtargets which support it, \p EnableWavefrontSize32 should match
327/// the ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
329 const MCSubtargetInfo *STI,
330 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
331
332/// For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage,
333/// returns the allocation granule for ArchVGPRs.
334unsigned getArchVGPRAllocGranule();
335
336/// \returns Total number of VGPRs for given subtarget \p STI.
337unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
338
339/// \returns Addressable number of architectural VGPRs for a given subtarget \p
340/// STI.
342
343/// \returns Addressable number of VGPRs for given subtarget \p STI.
344unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI,
345 unsigned DynamicVGPRBlockSize);
346
347/// \returns Minimum number of VGPRs that meets given number of waves per
348/// execution unit requirement for given subtarget \p STI.
349unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
350 unsigned DynamicVGPRBlockSize);
351
352/// \returns Maximum number of VGPRs that meets given number of waves per
353/// execution unit requirement for given subtarget \p STI.
354unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
355 unsigned DynamicVGPRBlockSize);
356
357/// \returns Number of waves reachable for a given \p NumVGPRs usage for given
358/// subtarget \p STI.
360 unsigned NumVGPRs,
361 unsigned DynamicVGPRBlockSize);
362
363/// \returns Number of waves reachable for a given \p NumVGPRs usage, \p Granule
364/// size, \p MaxWaves possible, and \p TotalNumVGPRs available.
365unsigned getNumWavesPerEUWithNumVGPRs(unsigned NumVGPRs, unsigned Granule,
366 unsigned MaxWaves,
367 unsigned TotalNumVGPRs);
368
369/// \returns Occupancy for a given \p SGPRs usage, \p MaxWaves possible, and \p
370/// Gen.
371unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves,
373
374/// \returns Number of VGPR blocks needed for given subtarget \p STI when
375/// \p NumVGPRs are used. We actually return the number of blocks -1, since
376/// that's what we encode.
377///
378/// For subtargets which support it, \p EnableWavefrontSize32 should match the
379/// ENABLE_WAVEFRONT_SIZE32 kernel descriptor field.
381 const MCSubtargetInfo *STI, unsigned NumVGPRs,
382 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
383
384/// \returns Number of VGPR blocks that need to be allocated for the given
385/// subtarget \p STI when \p NumVGPRs are used.
387 const MCSubtargetInfo *STI, unsigned NumVGPRs,
388 unsigned DynamicVGPRBlockSize,
389 std::optional<bool> EnableWavefrontSize32 = std::nullopt);
390
391} // end namespace IsaInfo
392
393// Represents a field in an encoded value.
394template <unsigned HighBit, unsigned LowBit, unsigned D = 0>
396 static_assert(HighBit >= LowBit, "Invalid bit range!");
397 static constexpr unsigned Offset = LowBit;
398 static constexpr unsigned Width = HighBit - LowBit + 1;
399
401 static constexpr ValueType Default = D;
402
405
406 constexpr uint64_t encode() const { return Value; }
407 static ValueType decode(uint64_t Encoded) { return Encoded; }
408};
409
410// Represents a single bit in an encoded value.
411template <unsigned Bit, unsigned D = 0>
413
414// A helper for encoding and decoding multiple fields.
415template <typename... Fields> struct EncodingFields {
416 static constexpr uint64_t encode(Fields... Values) {
417 return ((Values.encode() << Values.Offset) | ...);
418 }
419
420 static std::tuple<typename Fields::ValueType...> decode(uint64_t Encoded) {
421 return {Fields::decode((Encoded >> Fields::Offset) &
422 maxUIntN(Fields::Width))...};
423 }
424};
425
427inline bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx) {
428 return getNamedOperandIdx(Opcode, NamedIdx) != -1;
429}
430
433
454
457
459const MIMGBaseOpcodeInfo *getMIMGBaseOpcodeInfo(unsigned BaseOpcode);
460
470
472const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
473
476
479
481 MIMGBaseOpcode L;
482 MIMGBaseOpcode LZ;
483};
484
486 MIMGBaseOpcode MIP;
487 MIMGBaseOpcode NONMIP;
488};
489
491 MIMGBaseOpcode Bias;
492 MIMGBaseOpcode NoBias;
493};
494
496 MIMGBaseOpcode Offset;
497 MIMGBaseOpcode NoOffset;
498};
499
501 MIMGBaseOpcode G;
502 MIMGBaseOpcode G16;
503};
504
507
509 unsigned Opcode2Addr;
510 unsigned Opcode3Addr;
511};
512
515
518
521
524
526int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
527 unsigned VDataDwords, unsigned VAddrDwords);
528
530int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
531
533unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
534 const MIMGDimInfo *Dim, bool IsA16,
535 bool IsG16Supported);
536
545
547const MIMGInfo *getMIMGInfo(unsigned Opc);
548
550int getMTBUFBaseOpcode(unsigned Opc);
551
553int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
554
556int getMTBUFElements(unsigned Opc);
557
559bool getMTBUFHasVAddr(unsigned Opc);
560
562bool getMTBUFHasSrsrc(unsigned Opc);
563
565bool getMTBUFHasSoffset(unsigned Opc);
566
568int getMUBUFBaseOpcode(unsigned Opc);
569
571int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements);
572
574int getMUBUFElements(unsigned Opc);
575
577bool getMUBUFHasVAddr(unsigned Opc);
578
580bool getMUBUFHasSrsrc(unsigned Opc);
581
583bool getMUBUFHasSoffset(unsigned Opc);
584
586bool getMUBUFIsBufferInv(unsigned Opc);
587
589bool getMUBUFTfe(unsigned Opc);
590
592bool getSMEMIsBuffer(unsigned Opc);
593
595bool getVOP1IsSingle(unsigned Opc);
596
598bool getVOP2IsSingle(unsigned Opc);
599
601bool getVOP3IsSingle(unsigned Opc);
602
604bool isVOPC64DPP(unsigned Opc);
605
607bool isVOPCAsmOnly(unsigned Opc);
608
609/// Returns true if MAI operation is a double precision GEMM.
611bool getMAIIsDGEMM(unsigned Opc);
612
614bool getMAIIsGFX940XDL(unsigned Opc);
615
617bool getWMMAIsXDL(unsigned Opc);
618
619// Get an equivalent BitOp3 for a binary logical \p Opc.
620// \returns BitOp3 modifier for the logical operation or zero.
621// Used in VOPD3 conversion.
622unsigned getBitOp2(unsigned Opc);
623
624struct CanBeVOPD {
625 bool X;
626 bool Y;
627};
628
629/// \returns SIEncodingFamily used for VOPD encoding on a \p ST.
631unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST);
632
634CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3);
635
637uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal);
638
641 unsigned BLGP,
642 unsigned F8F8Opcode);
643
646
649 unsigned FmtB,
650 unsigned F8F8Opcode);
651
654 uint8_t NumComponents,
655 uint8_t NumFormat,
656 const MCSubtargetInfo &STI);
659 const MCSubtargetInfo &STI);
660
662int32_t getMCOpcode(uint32_t Opcode, unsigned Gen);
663
665unsigned getVOPDOpcode(unsigned Opc, bool VOPD3);
666
668int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily,
669 bool VOPD3);
670
672bool isVOPD(unsigned Opc);
673
675bool isMAC(unsigned Opc);
676
678bool isPermlane16(unsigned Opc);
679
681bool isGenericAtomic(unsigned Opc);
682
684bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc);
685
686namespace VOPD {
687
698
699// LSB mask for VGPR banks per VOPD component operand.
700// 4 banks result in a mask 3, setting 2 lower bits.
701constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
702constexpr unsigned VOPD3_VGPR_BANK_MASKS[] = {1, 3, 3, 3};
703
704enum ComponentIndex : unsigned { X = 0, Y = 1 };
706constexpr unsigned COMPONENTS_NUM = 2;
707
708// Properties of VOPD components.
710private:
711 unsigned SrcOperandsNum = 0;
712 unsigned MandatoryLiteralIdx = ~0u;
713 bool HasSrc2Acc = false;
714 unsigned NumVOPD3Mods = 0;
715 unsigned Opcode = 0;
716 bool IsVOP3 = false;
717
718public:
719 ComponentProps() = default;
720 ComponentProps(const MCInstrDesc &OpDesc, bool VOP3Layout = false);
721
722 // Return the total number of src operands this component has.
723 unsigned getCompSrcOperandsNum() const { return SrcOperandsNum; }
724
725 // Return the number of src operands of this component visible to the parser.
727 return SrcOperandsNum - HasSrc2Acc;
728 }
729
730 // Return true iif this component has a mandatory literal.
731 bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
732
733 // If this component has a mandatory literal, return component operand
734 // index of this literal (i.e. either Component::SRC1 or Component::SRC2).
737 return MandatoryLiteralIdx;
738 }
739
740 // Return true iif this component has operand
741 // with component index CompSrcIdx and this operand may be a register.
742 bool hasRegSrcOperand(unsigned CompSrcIdx) const {
743 assert(CompSrcIdx < Component::MAX_SRC_NUM);
744 return SrcOperandsNum > CompSrcIdx && !hasMandatoryLiteralAt(CompSrcIdx);
745 }
746
747 // Return true iif this component has tied src2.
748 bool hasSrc2Acc() const { return HasSrc2Acc; }
749
750 // Return a number of source modifiers if instruction is used in VOPD3.
751 unsigned getCompVOPD3ModsNum() const { return NumVOPD3Mods; }
752
753 // Return opcode of the component.
754 unsigned getOpcode() const { return Opcode; }
755
756 // Returns if component opcode is in VOP3 encoding.
757 unsigned isVOP3() const { return IsVOP3; }
758
759 // Return index of BitOp3 operand or -1.
760 int getBitOp3OperandIdx() const;
761
762private:
763 bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
764 assert(CompSrcIdx < Component::MAX_SRC_NUM);
765 return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
766 }
767};
768
769enum ComponentKind : unsigned {
770 SINGLE = 0, // A single VOP1 or VOP2 instruction which may be used in VOPD.
771 COMPONENT_X, // A VOPD instruction, X component.
772 COMPONENT_Y, // A VOPD instruction, Y component.
774};
775
776// Interface functions of this class map VOPD component operand indices
777// to indices of operands in MachineInstr/MCInst or parsed operands array.
778//
779// Note that this class operates with 3 kinds of indices:
780// - VOPD component operand indices (Component::DST, Component::SRC0, etc.);
781// - MC operand indices (they refer operands in a MachineInstr/MCInst);
782// - parsed operand indices (they refer operands in parsed operands array).
783//
784// For SINGLE components mapping between these indices is trivial.
785// But things get more complicated for COMPONENT_X and
786// COMPONENT_Y because these components share the same
787// MachineInstr/MCInst and the same parsed operands array.
788// Below is an example of component operand to parsed operand
789// mapping for the following instruction:
790//
791// v_dual_add_f32 v255, v4, v5 :: v_dual_mov_b32 v6, v1
792//
793// PARSED COMPONENT PARSED
794// COMPONENT OPERANDS OPERAND INDEX OPERAND INDEX
795// -------------------------------------------------------------------
796// "v_dual_add_f32" 0
797// v_dual_add_f32 v255 0 (DST) --> 1
798// v4 1 (SRC0) --> 2
799// v5 2 (SRC1) --> 3
800// "::" 4
801// "v_dual_mov_b32" 5
802// v_dual_mov_b32 v6 0 (DST) --> 6
803// v1 1 (SRC0) --> 7
804// -------------------------------------------------------------------
805//
807private:
808 // Regular MachineInstr/MCInst operands are ordered as follows:
809 // dst, src0 [, other src operands]
810 // VOPD MachineInstr/MCInst operands are ordered as follows:
811 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
812 // Each ComponentKind has operand indices defined below.
813 static constexpr unsigned MC_DST_IDX[] = {0, 0, 1};
814
815 // VOPD3 instructions may have 2 or 3 source modifiers, src2 modifier is not
816 // used if there is tied accumulator. Indexing of this array:
817 // MC_SRC_IDX[VOPD3ModsNum][SrcNo]. This returns an index for a SINGLE
818 // instruction layout, add 1 for COMPONENT_X or COMPONENT_Y. For the second
819 // component add OpX.MCSrcNum + OpX.VOPD3ModsNum.
820 // For VOPD1/VOPD2 use column with zero modifiers.
821 static constexpr unsigned SINGLE_MC_SRC_IDX[4][3] = {
822 {1, 2, 3}, {2, 3, 4}, {2, 4, 5}, {2, 4, 6}};
823
824 // Parsed operands of regular instructions are ordered as follows:
825 // Mnemo dst src0 [vsrc1 ...]
826 // Parsed VOPD operands are ordered as follows:
827 // OpXMnemo dstX src0X [vsrc1X|imm vsrc1X|vsrc1X imm] '::'
828 // OpYMnemo dstY src0Y [vsrc1Y|imm vsrc1Y|vsrc1Y imm]
829 // Each ComponentKind has operand indices defined below.
830 static constexpr unsigned PARSED_DST_IDX[] = {1, 1,
831 4 /* + OpX.ParsedSrcNum */};
832 static constexpr unsigned FIRST_PARSED_SRC_IDX[] = {
833 2, 2, 5 /* + OpX.ParsedSrcNum */};
834
835private:
836 const ComponentKind Kind;
837 const ComponentProps PrevComp;
838 const unsigned VOPD3ModsNum;
839 const int BitOp3Idx; // Index of bitop3 operand or -1
840
841public:
842 // Create layout for COMPONENT_X or SINGLE component.
843 ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
844 : Kind(Kind), VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {
846 }
847
848 // Create layout for COMPONENT_Y which depends on COMPONENT_X layout.
849 ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum,
850 int BitOp3Idx)
851 : Kind(ComponentKind::COMPONENT_Y), PrevComp(OpXProps),
852 VOPD3ModsNum(VOPD3ModsNum), BitOp3Idx(BitOp3Idx) {}
853
854public:
855 // Return the index of dst operand in MCInst operands.
856 unsigned getIndexOfDstInMCOperands() const { return MC_DST_IDX[Kind]; }
857
858 // Return the index of the specified src operand in MCInst operands.
859 unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const {
860 assert(CompSrcIdx < Component::MAX_SRC_NUM);
861
862 if (Kind == SINGLE && CompSrcIdx == 2 && BitOp3Idx != -1)
863 return BitOp3Idx;
864
865 if (VOPD3) {
866 return SINGLE_MC_SRC_IDX[VOPD3ModsNum][CompSrcIdx] + getPrevCompSrcNum() +
867 getPrevCompVOPD3ModsNum() + (Kind != SINGLE ? 1 : 0);
868 }
869
870 return SINGLE_MC_SRC_IDX[0][CompSrcIdx] + getPrevCompSrcNum() +
871 (Kind != SINGLE ? 1 : 0);
872 }
873
874 // Return the index of dst operand in the parsed operands array.
876 return PARSED_DST_IDX[Kind] + getPrevCompParsedSrcNum();
877 }
878
879 // Return the index of the specified src operand in the parsed operands array.
880 unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const {
881 assert(CompSrcIdx < Component::MAX_SRC_NUM);
882 return FIRST_PARSED_SRC_IDX[Kind] + getPrevCompParsedSrcNum() + CompSrcIdx;
883 }
884
885private:
886 unsigned getPrevCompSrcNum() const {
887 return PrevComp.getCompSrcOperandsNum();
888 }
889 unsigned getPrevCompParsedSrcNum() const {
890 return PrevComp.getCompParsedSrcOperandsNum();
891 }
892 unsigned getPrevCompVOPD3ModsNum() const {
893 return PrevComp.getCompVOPD3ModsNum();
894 }
895};
896
897// Layout and properties of VOPD components.
899public:
900 // Create ComponentInfo for COMPONENT_X or SINGLE component.
903 bool VOP3Layout = false)
904 : ComponentProps(OpDesc, VOP3Layout),
906
907 // Create ComponentInfo for COMPONENT_Y which depends on COMPONENT_X layout.
908 ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps,
909 bool VOP3Layout = false)
910 : ComponentProps(OpDesc, VOP3Layout),
913
914 // Map component operand index to parsed operand index.
915 // Return 0 if the specified operand does not exist.
916 unsigned getIndexInParsedOperands(unsigned CompOprIdx) const;
917};
918
919// Properties of VOPD instructions.
920class InstInfo {
921private:
922 const ComponentInfo CompInfo[COMPONENTS_NUM];
923
924public:
925 using RegIndices = std::array<MCRegister, Component::MAX_OPR_NUM>;
926
927 InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
928 : CompInfo{OpX, OpY} {}
929
930 InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
931 : CompInfo{OprInfoX, OprInfoY} {}
932
933 const ComponentInfo &operator[](size_t ComponentIdx) const {
934 assert(ComponentIdx < COMPONENTS_NUM);
935 return CompInfo[ComponentIdx];
936 }
937
938 // Check VOPD operands constraints.
939 // GetRegIdx(Component, MCOperandIdx) must return a VGPR register index
940 // for the specified component and MC operand. The callback must return 0
941 // if the operand is not a register or not a VGPR.
942 // If \p SkipSrc is set to true then constraints for source operands are not
943 // checked.
944 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
945 // even though it violates requirement to be from different banks.
946 // If \p VOPD3 is set to true both dst registers allowed to be either odd
947 // or even and instruction may have real src2 as opposed to tied accumulator.
948 bool
949 hasInvalidOperand(std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
950 const MCRegisterInfo &MRI, bool SkipSrc = false,
951 bool AllowSameVGPR = false, bool VOPD3 = false) const {
952 return getInvalidCompOperandIndex(GetRegIdx, MRI, SkipSrc, AllowSameVGPR,
953 VOPD3)
954 .has_value();
955 }
956
957 // Check VOPD operands constraints.
958 // Return the index of an invalid component operand, if any.
959 // If \p SkipSrc is set to true then constraints for source operands are not
960 // checked except for being from the same halves of VGPR file on gfx1250.
961 // If \p AllowSameVGPR is set then same VGPRs are allowed for X and Y sources
962 // even though it violates requirement to be from different banks.
963 // If \p VOPD3 is set to true both dst registers allowed to be either odd
964 // or even and instruction may have real src2 as opposed to tied accumulator.
965 std::optional<unsigned> getInvalidCompOperandIndex(
966 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
967 const MCRegisterInfo &MRI, bool SkipSrc = false,
968 bool AllowSameVGPR = false, bool VOPD3 = false) const;
969
970private:
972 getRegIndices(unsigned ComponentIdx,
973 std::function<MCRegister(unsigned, unsigned)> GetRegIdx,
974 bool VOPD3) const;
975};
976
977} // namespace VOPD
978
980std::pair<unsigned, unsigned> getVOPDComponents(unsigned VOPDOpcode);
981
983// Get properties of 2 single VOP1/VOP2 instructions
984// used as components to create a VOPD instruction.
985VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY);
986
988// Get properties of VOPD X and Y components.
989VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode,
990 const MCInstrInfo *InstrInfo);
991
993bool isAsyncStore(unsigned Opc);
995bool isTensorStore(unsigned Opc);
997unsigned getTemporalHintType(const MCInstrDesc TID);
998
1000bool isTrue16Inst(unsigned Opc);
1001
1003FPType getFPDstSelType(unsigned Opc);
1004
1007
1010
1011bool isDPMACCInstruction(unsigned Opc);
1012
1014unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc);
1015
1017unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc);
1018
1020 const MCSubtargetInfo *STI);
1021
1022bool isGroupSegment(const GlobalValue *GV);
1023bool isGlobalSegment(const GlobalValue *GV);
1024bool isReadOnlySegment(const GlobalValue *GV);
1025
1026/// \returns True if constants should be emitted to .text section for given
1027/// target triple \p TT, false otherwise.
1029
1030/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1031/// register name. Followed by the start register number, and the register
1032/// width. Does not validate the number of registers exists in the class. Unlike
1033/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in
1034/// "{}".
1035std::tuple<char, unsigned, unsigned> parseAsmPhysRegName(StringRef TupleString);
1036
1037/// Returns a valid charcode or 0 in the first entry if this is a valid physical
1038/// register constraint. Followed by the start register number, and the register
1039/// width. Does not validate the number of registers exists in the class.
1040std::tuple<char, unsigned, unsigned>
1042
1043/// \returns Integer value requested using \p F's \p Name attribute.
1044///
1045/// \returns \p Default if attribute is not present.
1046///
1047/// \returns \p Default and emits error if requested value cannot be converted
1048/// to integer.
1050
1051/// \returns A pair of integer values requested using \p F's \p Name attribute
1052/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1053/// is false).
1054///
1055/// \returns \p Default if attribute is not present.
1056///
1057/// \returns \p Default and emits error if one of the requested values cannot be
1058/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
1059/// not present.
1060std::pair<unsigned, unsigned>
1062 std::pair<unsigned, unsigned> Default,
1063 bool OnlyFirstRequired = false);
1064
1065/// \returns A pair of integer values requested using \p F's \p Name attribute
1066/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
1067/// is false).
1068///
1069/// \returns \p std::nullopt if attribute is not present.
1070///
1071/// \returns \p std::nullopt and emits error if one of the requested values
1072/// cannot be converted to integer, or \p OnlyFirstRequired is false and
1073/// "second" value is not present.
1074std::optional<std::pair<unsigned, std::optional<unsigned>>>
1076 bool OnlyFirstRequired = false);
1077
1078/// \returns Generate a vector of integer values requested using \p F's \p Name
1079/// attribute.
1080/// \returns A vector of size \p Size, with all elements set to \p DefaultVal,
1081/// if any error occurs. The corresponding error will also be emitted.
1083 unsigned Size,
1084 unsigned DefaultVal);
1085/// Similar to the function above, but returns std::nullopt if any error occurs.
1086std::optional<SmallVector<unsigned>>
1087getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
1088
1089/// Checks if \p Val is inside \p MD, a !range-like metadata.
1090bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
1091
1093 LOAD_CNT = 0, // VMcnt prior to gfx12.
1094 DS_CNT, // LKGMcnt prior to gfx12.
1096 STORE_CNT, // VScnt in gfx10/gfx11.
1099 BVH_CNT, // gfx12+ only.
1100 KM_CNT, // gfx12+ only.
1101 X_CNT, // gfx1250.
1103 VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
1104 VM_VSRC, // gfx12+ expert mode only.
1107};
1108
1109// Return an iterator over all counters between LOAD_CNT (the first counter)
1110// and \c MaxCounter (exclusive, default value yields an enumeration over
1111// all counters).
1114
1115} // namespace AMDGPU
1116
1117template <> struct enum_iteration_traits<AMDGPU::InstCounterType> {
1118 static constexpr bool is_iterable = true;
1119};
1120
1121namespace AMDGPU {
1122
1123/// Represents the counter values to wait for in an s_waitcnt instruction.
1124///
1125/// Large values (including the maximum possible integer) can be used to
1126/// represent "don't care" waits.
1127class Waitcnt {
1128 unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
1129 unsigned ExpCnt = ~0u;
1130 unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
1131 unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
1132 unsigned SampleCnt = ~0u; // gfx12+ only.
1133 unsigned BvhCnt = ~0u; // gfx12+ only.
1134 unsigned KmCnt = ~0u; // gfx12+ only.
1135 unsigned XCnt = ~0u; // gfx1250.
1136 unsigned VaVdst = ~0u; // gfx12+ expert scheduling mode only.
1137 unsigned VmVsrc = ~0u; // gfx12+ expert scheduling mode only.
1138
1139public:
1140 unsigned get(InstCounterType T) const {
1141 switch (T) {
1142 case LOAD_CNT:
1143 return LoadCnt;
1144 case EXP_CNT:
1145 return ExpCnt;
1146 case DS_CNT:
1147 return DsCnt;
1148 case STORE_CNT:
1149 return StoreCnt;
1150 case SAMPLE_CNT:
1151 return SampleCnt;
1152 case BVH_CNT:
1153 return BvhCnt;
1154 case KM_CNT:
1155 return KmCnt;
1156 case X_CNT:
1157 return XCnt;
1158 case VA_VDST:
1159 return VaVdst;
1160 case VM_VSRC:
1161 return VmVsrc;
1162 default:
1163 llvm_unreachable("bad InstCounterType");
1164 }
1165 }
1166 void set(InstCounterType T, unsigned Val) {
1167 switch (T) {
1168 case LOAD_CNT:
1169 LoadCnt = Val;
1170 break;
1171 case EXP_CNT:
1172 ExpCnt = Val;
1173 break;
1174 case DS_CNT:
1175 DsCnt = Val;
1176 break;
1177 case STORE_CNT:
1178 StoreCnt = Val;
1179 break;
1180 case SAMPLE_CNT:
1181 SampleCnt = Val;
1182 break;
1183 case BVH_CNT:
1184 BvhCnt = Val;
1185 break;
1186 case KM_CNT:
1187 KmCnt = Val;
1188 break;
1189 case X_CNT:
1190 XCnt = Val;
1191 break;
1192 case VA_VDST:
1193 VaVdst = Val;
1194 break;
1195 case VM_VSRC:
1196 VmVsrc = Val;
1197 break;
1198 default:
1199 llvm_unreachable("bad InstCounterType");
1200 }
1201 }
1202
1203 Waitcnt() = default;
1204 // Pre-gfx12 constructor.
1205 Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
1206 : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
1207
1208 // gfx12+ constructor.
1209 Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
1210 unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
1211 unsigned VaVdst, unsigned VmVsrc)
1212 : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
1213 SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt),
1214 VaVdst(VaVdst), VmVsrc(VmVsrc) {}
1215
1216 bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
1217
1219 return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
1220 SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u ||
1221 VaVdst != ~0u || VmVsrc != ~0u;
1222 }
1223
1224 bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
1225
1226 bool hasWaitDepctr() const { return VaVdst != ~0u || VmVsrc != ~0u; }
1227
1229 // Does the right thing provided self and Other are either both pre-gfx12
1230 // or both gfx12+.
1231 return Waitcnt(
1232 std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
1233 std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
1234 std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
1235 std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt),
1236 std::min(VaVdst, Other.VaVdst), std::min(VmVsrc, Other.VmVsrc));
1237 }
1238
1240};
1241
1242/// Represents the hardware counter limits for different wait count types.
1244 unsigned LoadcntMax; // Corresponds to Vmcnt prior to gfx12.
1245 unsigned ExpcntMax;
1246 unsigned DscntMax; // Corresponds to LGKMcnt prior to gfx12.
1247 unsigned StorecntMax; // Corresponds to VScnt in gfx10/gfx11.
1248 unsigned SamplecntMax; // gfx12+ only.
1249 unsigned BvhcntMax; // gfx12+ only.
1250 unsigned KmcntMax; // gfx12+ only.
1251 unsigned XcntMax; // gfx1250.
1252 unsigned VaVdstMax; // gfx12+ expert mode only.
1253 unsigned VmVsrcMax; // gfx12+ expert mode only.
1254
1255 HardwareLimits() = default;
1256
1257 /// Initializes hardware limits from ISA version.
1259};
1260
1261// The following methods are only meaningful on targets that support
1262// S_WAITCNT.
1263
1264/// \returns Vmcnt bit mask for given isa \p Version.
1265unsigned getVmcntBitMask(const IsaVersion &Version);
1266
1267/// \returns Expcnt bit mask for given isa \p Version.
1268unsigned getExpcntBitMask(const IsaVersion &Version);
1269
1270/// \returns Lgkmcnt bit mask for given isa \p Version.
1271unsigned getLgkmcntBitMask(const IsaVersion &Version);
1272
1273/// \returns Waitcnt bit mask for given isa \p Version.
1274unsigned getWaitcntBitMask(const IsaVersion &Version);
1275
1276/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
1277unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
1278
1279/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
1280unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
1281
1282/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
1283unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
1284
1285/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
1286/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
1287/// \p Lgkmcnt respectively. Should not be used on gfx12+, the instruction
1288/// which needs it is deprecated
1289///
1290/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
1291/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9)
1292/// \p Vmcnt = \p Waitcnt[15:14,3:0] (gfx9,10)
1293/// \p Vmcnt = \p Waitcnt[15:10] (gfx11)
1294/// \p Expcnt = \p Waitcnt[6:4] (pre-gfx11)
1295/// \p Expcnt = \p Waitcnt[2:0] (gfx11)
1296/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10)
1297/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10)
1298/// \p Lgkmcnt = \p Waitcnt[9:4] (gfx11)
1299///
1300void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt,
1301 unsigned &Expcnt, unsigned &Lgkmcnt);
1302
1303Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded);
1304
1305/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
1306unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1307 unsigned Vmcnt);
1308
1309/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
1310unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1311 unsigned Expcnt);
1312
1313/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
1314unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1315 unsigned Lgkmcnt);
1316
1317/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
1318/// \p Version. Should not be used on gfx12+, the instruction which needs
1319/// it is deprecated
1320///
1321/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
1322/// Waitcnt[2:0] = \p Expcnt (gfx11+)
1323/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9)
1324/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9,10)
1325/// Waitcnt[6:4] = \p Expcnt (pre-gfx11)
1326/// Waitcnt[9:4] = \p Lgkmcnt (gfx11)
1327/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10)
1328/// Waitcnt[13:8] = \p Lgkmcnt (gfx10)
1329/// Waitcnt[15:10] = \p Vmcnt (gfx11)
1330/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9,10)
1331///
1332/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
1333/// isa \p Version.
1334///
1335unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt,
1336 unsigned Expcnt, unsigned Lgkmcnt);
1337
1338unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded);
1339
1340// The following methods are only meaningful on targets that support
1341// S_WAIT_*CNT, introduced with gfx12.
1342
1343/// \returns Loadcnt bit mask for given isa \p Version.
1344/// Returns 0 for versions that do not support LOADcnt
1345unsigned getLoadcntBitMask(const IsaVersion &Version);
1346
1347/// \returns Samplecnt bit mask for given isa \p Version.
1348/// Returns 0 for versions that do not support SAMPLEcnt
1349unsigned getSamplecntBitMask(const IsaVersion &Version);
1350
1351/// \returns Bvhcnt bit mask for given isa \p Version.
1352/// Returns 0 for versions that do not support BVHcnt
1353unsigned getBvhcntBitMask(const IsaVersion &Version);
1354
1355/// \returns Dscnt bit mask for given isa \p Version.
1356/// Returns 0 for versions that do not support DScnt
1357unsigned getDscntBitMask(const IsaVersion &Version);
1358
1359/// \returns Dscnt bit mask for given isa \p Version.
1360/// Returns 0 for versions that do not support KMcnt
1361unsigned getKmcntBitMask(const IsaVersion &Version);
1362
1363/// \returns Xcnt bit mask for given isa \p Version.
1364/// Returns 0 for versions that do not support Xcnt.
1365unsigned getXcntBitMask(const IsaVersion &Version);
1366
1367/// \return STOREcnt or VScnt bit mask for given isa \p Version.
1368/// returns 0 for versions that do not support STOREcnt or VScnt.
1369/// STOREcnt and VScnt are the same counter, the name used
1370/// depends on the ISA version.
1371unsigned getStorecntBitMask(const IsaVersion &Version);
1372
1373// The following are only meaningful on targets that support
1374// S_WAIT_LOADCNT_DSCNT and S_WAIT_STORECNT_DSCNT.
1375
1376/// \returns Decoded Waitcnt structure from given \p LoadcntDscnt for given
1377/// isa \p Version.
1378Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt);
1379
1380/// \returns Decoded Waitcnt structure from given \p StorecntDscnt for given
1381/// isa \p Version.
1382Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt);
1383
1384/// \returns \p Loadcnt and \p Dscnt components of \p Decoded encoded as an
1385/// immediate that can be used with S_WAIT_LOADCNT_DSCNT for given isa
1386/// \p Version.
1387unsigned encodeLoadcntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1388
1389/// \returns \p Storecnt and \p Dscnt components of \p Decoded encoded as an
1390/// immediate that can be used with S_WAIT_STORECNT_DSCNT for given isa
1391/// \p Version.
1392unsigned encodeStorecntDscnt(const IsaVersion &Version, const Waitcnt &Decoded);
1393
1394namespace Hwreg {
1395
1398
1399struct HwregSize : EncodingField<15, 11, 32> {
1401 constexpr uint64_t encode() const { return Value - 1; }
1402 static ValueType decode(uint64_t Encoded) { return Encoded + 1; }
1403};
1404
1406
1407} // namespace Hwreg
1408
1409namespace DepCtr {
1410
1412int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1413 const MCSubtargetInfo &STI);
1414bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1415 const MCSubtargetInfo &STI);
1416bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1417 bool &IsDefault, const MCSubtargetInfo &STI);
1418
1419/// \returns Maximum VaVdst value that can be encoded.
1420unsigned getVaVdstBitMask();
1421
1422/// \returns Maximum VaSdst value that can be encoded.
1423unsigned getVaSdstBitMask();
1424
1425/// \returns Maximum VaSsrc value that can be encoded.
1426unsigned getVaSsrcBitMask();
1427
1428/// \returns Maximum HoldCnt value that can be encoded.
1429unsigned getHoldCntBitMask(const IsaVersion &Version);
1430
1431/// \returns Maximum VmVsrc value that can be encoded.
1432unsigned getVmVsrcBitMask();
1433
1434/// \returns Maximum VaVcc value that can be encoded.
1435unsigned getVaVccBitMask();
1436
1437/// \returns Maximum SaSdst value that can be encoded.
1438unsigned getSaSdstBitMask();
1439
1440/// \returns Decoded VaVdst from given immediate \p Encoded.
1441unsigned decodeFieldVaVdst(unsigned Encoded);
1442
1443/// \returns Decoded VmVsrc from given immediate \p Encoded.
1444unsigned decodeFieldVmVsrc(unsigned Encoded);
1445
1446/// \returns Decoded SaSdst from given immediate \p Encoded.
1447unsigned decodeFieldSaSdst(unsigned Encoded);
1448
1449/// \returns Decoded VaSdst from given immediate \p Encoded.
1450unsigned decodeFieldVaSdst(unsigned Encoded);
1451
1452/// \returns Decoded VaVcc from given immediate \p Encoded.
1453unsigned decodeFieldVaVcc(unsigned Encoded);
1454
1455/// \returns Decoded SaSrc from given immediate \p Encoded.
1456unsigned decodeFieldVaSsrc(unsigned Encoded);
1457
1458/// \returns Decoded HoldCnt from given immediate \p Encoded.
1459unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version);
1460
1461/// \returns \p VmVsrc as an encoded Depctr immediate.
1462unsigned encodeFieldVmVsrc(unsigned VmVsrc, const MCSubtargetInfo &STI);
1463
1464/// \returns \p Encoded combined with encoded \p VmVsrc.
1465unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
1466
1467/// \returns \p VaVdst as an encoded Depctr immediate.
1468unsigned encodeFieldVaVdst(unsigned VaVdst, const MCSubtargetInfo &STI);
1469
1470/// \returns \p Encoded combined with encoded \p VaVdst.
1471unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
1472
1473/// \returns \p SaSdst as an encoded Depctr immediate.
1474unsigned encodeFieldSaSdst(unsigned SaSdst, const MCSubtargetInfo &STI);
1475
1476/// \returns \p Encoded combined with encoded \p SaSdst.
1477unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
1478
1479/// \returns \p VaSdst as an encoded Depctr immediate.
1480unsigned encodeFieldVaSdst(unsigned VaSdst, const MCSubtargetInfo &STI);
1481
1482/// \returns \p Encoded combined with encoded \p VaSdst.
1483unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst);
1484
1485/// \returns \p VaVcc as an encoded Depctr immediate.
1486unsigned encodeFieldVaVcc(unsigned VaVcc, const MCSubtargetInfo &STI);
1487
1488/// \returns \p Encoded combined with encoded \p VaVcc.
1489unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc);
1490
1491/// \returns \p HoldCnt as an encoded Depctr immediate.
1492unsigned encodeFieldHoldCnt(unsigned HoldCnt, const MCSubtargetInfo &STI);
1493
1494/// \returns \p Encoded combined with encoded \p HoldCnt.
1495unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt,
1496 const IsaVersion &Version);
1497
1498/// \returns \p VaSsrc as an encoded Depctr immediate.
1499unsigned encodeFieldVaSsrc(unsigned VaSsrc, const MCSubtargetInfo &STI);
1500
1501/// \returns \p Encoded combined with encoded \p VaSsrc.
1502unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc);
1503
1504} // namespace DepCtr
1505
1506namespace Exp {
1507
1508bool getTgtName(unsigned Id, StringRef &Name, int &Index);
1509
1511unsigned getTgtId(const StringRef Name);
1512
1514bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI);
1515
1516} // namespace Exp
1517
1518namespace MTBUFFormat {
1519
1521int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
1522
1523void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
1524
1525int64_t getDfmt(const StringRef Name);
1526
1527StringRef getDfmtName(unsigned Id);
1528
1529int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI);
1530
1531StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI);
1532
1533bool isValidDfmtNfmt(unsigned Val, const MCSubtargetInfo &STI);
1534
1535bool isValidNfmt(unsigned Val, const MCSubtargetInfo &STI);
1536
1537int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI);
1538
1539StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI);
1540
1541bool isValidUnifiedFormat(unsigned Val, const MCSubtargetInfo &STI);
1542
1543int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1544 const MCSubtargetInfo &STI);
1545
1546bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI);
1547
1548unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI);
1549
1550} // namespace MTBUFFormat
1551
1552namespace SendMsg {
1553
1555bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI);
1556
1558bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1559 bool Strict = true);
1560
1562bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1563 const MCSubtargetInfo &STI, bool Strict = true);
1564
1566bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI);
1567
1569bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI);
1570
1571void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1572 uint16_t &StreamId, const MCSubtargetInfo &STI);
1573
1575uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId);
1576
1577} // namespace SendMsg
1578
1579unsigned getInitialPSInputAddr(const Function &F);
1580
1581bool getHasColorExport(const Function &F);
1582
1583bool getHasDepthExport(const Function &F);
1584
1586
1587// Returns the value of the "amdgpu-dynamic-vgpr-block-size" attribute, or 0 if
1588// the attribute is missing or its value is invalid.
1589unsigned getDynamicVGPRBlockSize(const Function &F);
1590
1592constexpr bool isShader(CallingConv::ID CC) {
1593 switch (CC) {
1603 return true;
1604 default:
1605 return false;
1606 }
1607}
1608
1610constexpr bool isGraphics(CallingConv::ID CC) {
1611 return isShader(CC) || CC == CallingConv::AMDGPU_Gfx ||
1613}
1614
1616constexpr bool isCompute(CallingConv::ID CC) {
1617 return !isGraphics(CC) || CC == CallingConv::AMDGPU_CS;
1618}
1619
1622 switch (CC) {
1632 return true;
1633 default:
1634 return false;
1635 }
1636}
1637
1639constexpr bool isChainCC(CallingConv::ID CC) {
1640 switch (CC) {
1643 return true;
1644 default:
1645 return false;
1646 }
1647}
1648
1649// These functions are considered entrypoints into the current module, i.e. they
1650// are allowed to be called from outside the current module. This is different
1651// from isEntryFunctionCC, which is only true for functions that are entered by
1652// the hardware. Module entry points include all entry functions but also
1653// include functions that can be called from other functions inside or outside
1654// the current module. Module entry functions are allowed to allocate LDS.
1655//
1656// AMDGPU_CS_Chain is intended for externally callable chain functions, so it is
1657// treated as a module entrypoint. AMDGPU_CS_ChainPreserve is used for internal
1658// helper functions (e.g. retry helpers), so it is not a module entrypoint.
1661 switch (CC) {
1664 return true;
1665 default:
1666 return isEntryFunctionCC(CC);
1667 }
1668}
1669
1671constexpr inline bool isKernel(CallingConv::ID CC) {
1672 switch (CC) {
1675 return true;
1676 default:
1677 return false;
1678 }
1679}
1680
1681inline bool isKernel(const Function &F) { return isKernel(F.getCallingConv()); }
1682
1685 return CC == CallingConv::Fast;
1686}
1687
1688/// Return true if we might ever do TCO for calls with this calling convention.
1691 switch (CC) {
1692 case CallingConv::C:
1695 return true;
1696 default:
1697 return canGuaranteeTCO(CC);
1698 }
1699}
1700
1701bool hasXNACK(const MCSubtargetInfo &STI);
1702bool hasSRAMECC(const MCSubtargetInfo &STI);
1703bool hasMIMG_R128(const MCSubtargetInfo &STI);
1704bool hasA16(const MCSubtargetInfo &STI);
1705bool hasG16(const MCSubtargetInfo &STI);
1706bool hasPackedD16(const MCSubtargetInfo &STI);
1707bool hasGDS(const MCSubtargetInfo &STI);
1708unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler = false);
1709unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI);
1710
1711bool isSI(const MCSubtargetInfo &STI);
1712bool isCI(const MCSubtargetInfo &STI);
1713bool isVI(const MCSubtargetInfo &STI);
1714bool isGFX9(const MCSubtargetInfo &STI);
1715bool isGFX9_GFX10(const MCSubtargetInfo &STI);
1716bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI);
1717bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI);
1718bool isGFX8Plus(const MCSubtargetInfo &STI);
1719bool isGFX9Plus(const MCSubtargetInfo &STI);
1720bool isNotGFX9Plus(const MCSubtargetInfo &STI);
1721bool isGFX10(const MCSubtargetInfo &STI);
1722bool isGFX10_GFX11(const MCSubtargetInfo &STI);
1723bool isGFX10Plus(const MCSubtargetInfo &STI);
1724bool isNotGFX10Plus(const MCSubtargetInfo &STI);
1725bool isGFX10Before1030(const MCSubtargetInfo &STI);
1726bool isGFX11(const MCSubtargetInfo &STI);
1727bool isGFX1170(const MCSubtargetInfo &STI);
1728bool isGFX11Plus(const MCSubtargetInfo &STI);
1729bool isGFX12(const MCSubtargetInfo &STI);
1730bool isGFX12Plus(const MCSubtargetInfo &STI);
1731bool isGFX1250(const MCSubtargetInfo &STI);
1732bool isGFX1250Plus(const MCSubtargetInfo &STI);
1733bool isGFX13(const MCSubtargetInfo &STI);
1734bool isGFX13Plus(const MCSubtargetInfo &STI);
1735bool supportsWGP(const MCSubtargetInfo &STI);
1736bool isNotGFX12Plus(const MCSubtargetInfo &STI);
1737bool isNotGFX11Plus(const MCSubtargetInfo &STI);
1738bool isGCN3Encoding(const MCSubtargetInfo &STI);
1739bool isGFX10_AEncoding(const MCSubtargetInfo &STI);
1740bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
1741bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
1742bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
1743bool isGFX90A(const MCSubtargetInfo &STI);
1744bool isGFX940(const MCSubtargetInfo &STI);
1746bool hasMAIInsts(const MCSubtargetInfo &STI);
1747bool hasVOPD(const MCSubtargetInfo &STI);
1748bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
1749
1750inline bool supportsWave32(const MCSubtargetInfo &STI) {
1751 return AMDGPU::isGFX10Plus(STI) && !AMDGPU::isGFX1250(STI);
1752}
1753
1754int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
1755unsigned hasKernargPreload(const MCSubtargetInfo &STI);
1757
1758/// Is Reg - scalar register
1759bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI);
1760
1761/// \returns if \p Reg occupies the high 16-bits of a 32-bit register.
1762bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI);
1763
1764/// If \p Reg is a pseudo reg, return the correct hardware register given
1765/// \p STI otherwise return \p Reg.
1767
1768/// Convert hardware register \p Reg to a pseudo register
1771
1774
1775/// Is this an AMDGPU specific source operand? These include registers,
1776/// inline constants, literals and mandatory literals (KImm).
1777constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo) {
1778 return OpInfo.OperandType >= AMDGPU::OPERAND_SRC_FIRST &&
1779 OpInfo.OperandType <= AMDGPU::OPERAND_SRC_LAST;
1780}
1781
1782inline bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1783 return isSISrcOperand(Desc.operands()[OpNo]);
1784}
1785
1786/// Is this a KImm operand?
1787bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo);
1788
1789/// Is this floating-point operand?
1790bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
1791
1792/// Does this operand support only inlinable literals?
1793bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
1794
1795/// Get the size in bits of a register from the register class \p RC.
1796unsigned getRegBitWidth(unsigned RCID);
1797
1798/// Get the size in bits of a register from the register class \p RC.
1799unsigned getRegBitWidth(const MCRegisterClass &RC);
1800
1802inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
1803 switch (OpInfo.OperandType) {
1813 case AMDGPU::OPERAND_KIMM16: // mandatory literal is always size 4
1815 return 4;
1816
1823 return 8;
1824
1839 return 2;
1840
1841 default:
1842 llvm_unreachable("unhandled operand type");
1843 }
1844}
1845
1847inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
1848 return getOperandSize(Desc.operands()[OpNo]);
1849}
1850
1851/// Is this literal inlinable, and not one of the values intended for floating
1852/// point values.
1854inline bool isInlinableIntLiteral(int64_t Literal) {
1855 return Literal >= -16 && Literal <= 64;
1856}
1857
1858/// Is this literal inlinable
1860bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
1861
1863bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
1864
1866bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi);
1867
1869bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi);
1870
1872bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi);
1873
1875std::optional<unsigned> getInlineEncodingV2I16(uint32_t Literal);
1876
1878std::optional<unsigned> getInlineEncodingV2BF16(uint32_t Literal);
1879
1881std::optional<unsigned> getInlineEncodingV2F16(uint32_t Literal);
1882
1884std::optional<unsigned> getPKFMACF16InlineEncoding(uint32_t Literal,
1885 bool IsGFX11Plus);
1886
1889
1892
1895
1898
1900bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus);
1901
1903bool isValid32BitLiteral(uint64_t Val, bool IsFP64);
1904
1906int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit);
1907
1908bool isArgPassedInSGPR(const Argument *Arg);
1909
1910bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
1911
1912LLVM_READONLY bool isPackedFP32Inst(unsigned Opc);
1913
1916 int64_t EncodedOffset);
1917
1920 int64_t EncodedOffset, bool IsBuffer);
1921
1922/// Convert \p ByteOffset to dwords if the subtarget uses dword SMRD immediate
1923/// offsets.
1925
1926/// \returns The encoding that will be used for \p ByteOffset in the
1927/// SMRD offset field, or std::nullopt if it won't fit. On GFX9 and GFX10
1928/// S_LOAD instructions have a signed offset, on other subtargets it is
1929/// unsigned. S_BUFFER has an unsigned offset for all subtargets.
1930std::optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
1931 int64_t ByteOffset, bool IsBuffer,
1932 bool HasSOffset = false);
1933
1934/// \return The encoding that can be used for a 32-bit literal offset in an SMRD
1935/// instruction. This is only useful on CI.s
1936std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
1937 int64_t ByteOffset);
1938
1939/// For pre-GFX12 FLAT instructions the offset must be positive;
1940/// MSB is ignored and forced to zero.
1941///
1942/// \return The number of bits available for the signed offset field in flat
1943/// instructions. Note that some forms of the instruction disallow negative
1944/// offsets.
1945unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
1946
1947/// \returns true if this offset is small enough to fit in the SMRD
1948/// offset field. \p ByteOffset should be the offset in bytes and
1949/// not the encoded offset.
1950bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
1951
1953inline bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC) {
1954 if (isGFX12(ST))
1955 return DC >= DPP::ROW_SHARE_FIRST && DC <= DPP::ROW_SHARE_LAST;
1956 if (isGFX90A(ST))
1957 return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
1958 return false;
1959}
1960
1961/// \returns true if an instruction may have a 64-bit VGPR operand.
1963 const MCSubtargetInfo &ST);
1964
1965/// \returns true if an instruction is a DP ALU DPP without any 64-bit operands.
1966bool isDPALU_DPP32BitOpc(unsigned Opc);
1967
1968/// \returns true if an instruction is a DP ALU DPP.
1969bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII,
1970 const MCSubtargetInfo &ST);
1971
1972/// \returns true if the intrinsic is divergent
1973bool isIntrinsicSourceOfDivergence(unsigned IntrID);
1974
1975/// \returns true if the intrinsic is uniform
1976bool isIntrinsicAlwaysUniform(unsigned IntrID);
1977
1978/// \returns a register class for the physical register \p Reg if it is a VGPR
1979/// or nullptr otherwise.
1981 const MCRegisterInfo &MRI);
1982
1983/// \returns the MODE bits which have to be set by the S_SET_VGPR_MSB for the
1984/// physical register \p Reg.
1985unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI);
1986
1987/// If \p Reg is a low VGPR return a corresponding high VGPR with \p MSBs set.
1989 const MCRegisterInfo &MRI);
1990
1991// Returns a table for the opcode with a given \p Desc to map the VGPR MSB
1992// set by the S_SET_VGPR_MSB to one of 4 sources. In case of VOPD returns 2
1993// maps, one for X and one for Y component.
1994std::pair<const AMDGPU::OpName *, const AMDGPU::OpName *>
1996
1997/// \returns true if a memory instruction supports scale_offset modifier.
1998bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode);
1999
2000/// \returns lds block size in terms of dwords. \p
2001/// This is used to calculate the lds size encoded for PAL metadata 3.0+ which
2002/// must be defined in terms of bytes.
2003unsigned getLdsDwGranularity(const MCSubtargetInfo &ST);
2004
2006public:
2008
2009 ClusterDimsAttr() = default;
2010
2011 Kind getKind() const { return AttrKind; }
2012
2013 bool isUnknown() const { return getKind() == Kind::Unknown; }
2014
2015 bool isNoCluster() const { return getKind() == Kind::NoCluster; }
2016
2017 bool isFixedDims() const { return getKind() == Kind::FixedDims; }
2018
2019 bool isVariableDims() const { return getKind() == Kind::VariableDims; }
2020
2022
2024
2026
2027 /// \returns the dims stored. Note that this function can only be called if
2028 /// the kind is \p Fixed.
2029 const std::array<unsigned, 3> &getDims() const;
2030
2031 bool operator==(const ClusterDimsAttr &RHS) const {
2032 return AttrKind == RHS.AttrKind && Dims == RHS.Dims;
2033 }
2034
2035 std::string to_string() const;
2036
2037 static ClusterDimsAttr get(const Function &F);
2038
2039private:
2040 enum Encoding { EncoNoCluster = 0, EncoVariableDims = 1024 };
2041
2042 ClusterDimsAttr(Kind AttrKind) : AttrKind(AttrKind) {}
2043
2044 std::array<unsigned, 3> Dims = {0, 0, 0};
2045
2046 Kind AttrKind = Kind::Unknown;
2047};
2048
2049} // namespace AMDGPU
2050
2053
2054} // end namespace llvm
2055
2056#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Base class for AMDGPU specific classes of TargetSubtarget.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_READONLY
Definition Compiler.h:322
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition MD5.cpp:54
#define G(x, y, z)
Definition MD5.cpp:55
Register Reg
Register const TargetRegisterInfo * TRI
#define T
Value * RHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
static ClusterDimsAttr get(const Function &F)
bool operator==(const ClusterDimsAttr &RHS) const
const std::array< unsigned, 3 > & getDims() const
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
TargetIDSetting getXnackSetting() const
void print(raw_ostream &OS) const
Write string representation to OS.
AMDGPUTargetID(const MCSubtargetInfo &STI)
void setTargetIDFromTargetIDStream(StringRef TargetID)
void setSramEccSetting(TargetIDSetting NewSramEccSetting)
Sets sramecc setting to NewSramEccSetting.
TargetIDSetting getSramEccSetting() const
unsigned getIndexInParsedOperands(unsigned CompOprIdx) const
ComponentInfo(const MCInstrDesc &OpDesc, ComponentKind Kind=ComponentKind::SINGLE, bool VOP3Layout=false)
ComponentInfo(const MCInstrDesc &OpDesc, const ComponentProps &OpXProps, bool VOP3Layout=false)
unsigned getIndexOfSrcInMCOperands(unsigned CompSrcIdx, bool VOPD3) const
ComponentLayout(const ComponentProps &OpXProps, unsigned VOPD3ModsNum, int BitOp3Idx)
unsigned getIndexOfSrcInParsedOperands(unsigned CompSrcIdx) const
ComponentLayout(ComponentKind Kind, unsigned VOPD3ModsNum, int BitOp3Idx)
bool hasRegSrcOperand(unsigned CompSrcIdx) const
unsigned getMandatoryLiteralCompOperandIndex() const
std::optional< unsigned > getInvalidCompOperandIndex(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
InstInfo(const ComponentInfo &OprInfoX, const ComponentInfo &OprInfoY)
bool hasInvalidOperand(std::function< MCRegister(unsigned, unsigned)> GetRegIdx, const MCRegisterInfo &MRI, bool SkipSrc=false, bool AllowSameVGPR=false, bool VOPD3=false) const
const ComponentInfo & operator[](size_t ComponentIdx) const
InstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
std::array< MCRegister, Component::MAX_OPR_NUM > RegIndices
Represents the counter values to wait for in an s_waitcnt instruction.
Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt, unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt, unsigned VaVdst, unsigned VmVsrc)
bool hasWaitExceptStoreCnt() const
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
Waitcnt combined(const Waitcnt &Other) const
unsigned get(InstCounterType T) const
friend raw_ostream & operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait)
void set(InstCounterType T, unsigned Val)
This class represents an incoming formal argument to a Function.
Definition Argument.h:32
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
MCRegisterClass - Base class of TargetRegisterClass.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
Metadata node.
Definition Metadata.h:1080
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned decodeFieldVaVcc(unsigned Encoded)
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
unsigned decodeFieldHoldCnt(unsigned Encoded, const IsaVersion &Version)
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val, bool &IsDefault, const MCSubtargetInfo &STI)
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt, const IsaVersion &Version)
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
unsigned decodeFieldSaSdst(unsigned Encoded)
unsigned getHoldCntBitMask(const IsaVersion &Version)
unsigned decodeFieldVaSdst(unsigned Encoded)
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
unsigned decodeFieldVaSsrc(unsigned Encoded)
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal, const MCSubtargetInfo &STI)
unsigned decodeFieldVaVdst(unsigned Encoded)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
unsigned decodeFieldVmVsrc(unsigned Encoded)
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
bool getTgtName(unsigned Id, StringRef &Name, int &Index)
unsigned getTgtId(const StringRef Name)
Generic target versions emitted by this version of LLVM.
static constexpr unsigned GFX12_5
static constexpr unsigned GFX9_4
static constexpr unsigned GFX10_1
static constexpr unsigned GFX10_3
static constexpr unsigned GFX11
static constexpr unsigned GFX9
static constexpr unsigned GFX12
EncodingField< 10, 6 > HwregOffset
EncodingField< 5, 0 > HwregId
EncodingFields< HwregId, HwregOffset, HwregSize > HwregEncoding
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI)
unsigned getArchVGPRAllocGranule()
For subtargets with a unified VGPR file and mixed ArchVGPR/AGPR usage, returns the allocation granule...
unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getWavefrontSize(const MCSubtargetInfo *STI)
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize)
unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, unsigned FlatWorkGroupSize)
unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getEUsPerCU(const MCSubtargetInfo *STI)
constexpr unsigned getMaxFlatWorkGroupSize()
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI)
unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, bool Addressable)
unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
unsigned getMinWavesPerEU(const MCSubtargetInfo *STI)
unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI)
unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, unsigned DynamicVGPRBlockSize)
raw_ostream & operator<<(raw_ostream &OS, const AMDGPUTargetID &TargetID)
unsigned getAllocatedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
unsigned getEncodedNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, std::optional< bool > EnableWavefrontSize32)
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs, unsigned MaxWaves, AMDGPUSubtarget::Generation Gen)
unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI)
unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI)
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI, unsigned DynamicVGPRBlockSize)
bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI)
bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI)
bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
StringRef getDfmtName(unsigned Id)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt)
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId, uint16_t &StreamId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
constexpr unsigned VOPD_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS_NUM
constexpr unsigned VOPD3_VGPR_BANK_MASKS[]
constexpr unsigned COMPONENTS[]
bool isPackedFP32Inst(unsigned Opc)
bool isGCN3Encoding(const MCSubtargetInfo &STI)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGG16MappingInfo * getMIMGG16MappingInfo(unsigned G)
bool isInlineValue(MCRegister Reg)
bool isGFX10_GFX11(const MCSubtargetInfo &STI)
bool isInlinableLiteralV216(uint32_t Literal, uint8_t OpType)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
Decodes Vmcnt, Expcnt and Lgkmcnt from given Waitcnt for given isa Version, and writes decoded values...
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, uint64_t ByteOffset)
Convert ByteOffset to dwords if the subtarget uses dword SMRD immediate offsets.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
LLVM_READONLY const MIMGOffsetMappingInfo * getMIMGOffsetMappingInfo(unsigned Offset)
bool isVOPCAsmOnly(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool getMTBUFHasSrsrc(unsigned Opc)
std::optional< int64_t > getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, int64_t ByteOffset)
bool getWMMAIsXDL(unsigned Opc)
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
bool isGFX10Before1030(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo)
Does this operand support only inlinable literals?
unsigned mapWMMA2AddrTo3AddrOpcode(unsigned Opc)
bool shouldEmitConstantsToTextSection(const Triple &TT)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isDPMACCInstruction(unsigned Opc)
int getMTBUFElements(unsigned Opc)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
iota_range< InstCounterType > inst_counter_types(InstCounterType MaxCounter)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
FPType getFPDstSelType(unsigned Opc)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
const MCRegisterClass * getVGPRPhysRegClass(MCRegister Reg, const MCRegisterInfo &MRI)
LLVM_READNONE constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC)
int getIntegerAttribute(const Function &F, StringRef Name, int Default)
bool hasPackedD16(const MCSubtargetInfo &STI)
unsigned getStorecntBitMask(const IsaVersion &Version)
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI)
bool isGFX13(const MCSubtargetInfo &STI)
bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val)
Checks if Val is inside MD, a !range-like metadata.
LLVM_READONLY bool isInvalidSingleUseProducerInst(unsigned Opc)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_READONLY bool isInvalidSingleUseConsumerInst(unsigned Opc)
unsigned getVOPDOpcode(unsigned Opc, bool VOPD3)
bool isGroupSegment(const GlobalValue *GV)
LLVM_READONLY const MIMGMIPMappingInfo * getMIMGMIPMappingInfo(unsigned MIP)
bool getMTBUFHasSoffset(unsigned Opc)
bool hasXNACK(const MCSubtargetInfo &STI)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
unsigned encodeWaitcnt(const IsaVersion &Version, unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
Encodes Vmcnt, Expcnt and Lgkmcnt into Waitcnt for given isa Version.
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isVOPC64DPP(unsigned Opc)
int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool getMAIIsGFX940XDL(unsigned Opc)
bool isSI(const MCSubtargetInfo &STI)
unsigned getDefaultAMDHSACodeObjectVersion()
bool isReadOnlySegment(const GlobalValue *GV)
bool isArgPassedInSGPR(const Argument *A)
LLVM_READNONE constexpr bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
bool isIntrinsicAlwaysUniform(unsigned IntrID)
int getMUBUFBaseOpcode(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool getVOP3IsSingle(unsigned Opc)
bool isGFX9(const MCSubtargetInfo &STI)
bool isDPALU_DPP32BitOpc(unsigned Opc)
bool getVOP1IsSingle(unsigned Opc)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool getHasColorExport(const Function &F)
int getMTBUFBaseOpcode(unsigned Opc)
bool isGFX90A(const MCSubtargetInfo &STI)
unsigned getSamplecntBitMask(const IsaVersion &Version)
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
std::tuple< char, unsigned, unsigned > parseAsmPhysRegName(StringRef RegName)
Returns a valid charcode or 0 in the first entry if this is a valid physical register name.
bool hasSRAMECC(const MCSubtargetInfo &STI)
bool getHasDepthExport(const Function &F)
bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
bool getMUBUFHasVAddr(unsigned Opc)
bool isTrue16Inst(unsigned Opc)
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getVGPREncodingMSBs(MCRegister Reg, const MCRegisterInfo &MRI)
std::pair< unsigned, unsigned > getVOPDComponents(unsigned VOPDOpcode)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned getInitialPSInputAddr(const Function &F)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool isAsyncStore(unsigned Opc)
unsigned getDynamicVGPRBlockSize(const Function &F)
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset)
unsigned getKmcntBitMask(const IsaVersion &Version)
MCRegister getVGPRWithMSBs(MCRegister Reg, unsigned MSBs, const MCRegisterInfo &MRI)
If Reg is a low VGPR return a corresponding high VGPR with MSBs set.
unsigned getVmcntBitMask(const IsaVersion &Version)
bool isNotGFX10Plus(const MCSubtargetInfo &STI)
bool hasMAIInsts(const MCSubtargetInfo &STI)
unsigned getBitOp2(unsigned Opc)
bool isIntrinsicSourceOfDivergence(unsigned IntrID)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
unsigned getXcntBitMask(const IsaVersion &Version)
bool isGenericAtomic(unsigned Opc)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
Waitcnt decodeStorecntDscnt(const IsaVersion &Version, unsigned StorecntDscnt)
bool isGFX8Plus(const MCSubtargetInfo &STI)
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
unsigned getLgkmcntBitMask(const IsaVersion &Version)
bool getMUBUFTfe(unsigned Opc)
LLVM_READONLY const MIMGBiasMappingInfo * getMIMGBiasMappingInfo(unsigned Bias)
unsigned getBvhcntBitMask(const IsaVersion &Version)
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasGFX10_3Insts(const MCSubtargetInfo &STI)
std::pair< const AMDGPU::OpName *, const AMDGPU::OpName * > getVGPRLoweringOperandTables(const MCInstrDesc &Desc)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements)
bool isGFX13Plus(const MCSubtargetInfo &STI)
unsigned getExpcntBitMask(const IsaVersion &Version)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
int32_t getMCOpcode(uint32_t Opcode, unsigned Gen)
bool getMUBUFHasSoffset(unsigned Opc)
bool isNotGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
std::optional< unsigned > getInlineEncodingV2F16(uint32_t Literal)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
std::tuple< char, unsigned, unsigned > parseAsmConstraintPhysReg(StringRef Constraint)
Returns a valid charcode or 0 in the first entry if this is a valid physical register constraint.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX10Plus(const MCSubtargetInfo &STI)
std::optional< int64_t > getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset, bool IsBuffer, bool HasSOffset)
bool isGlobalSegment(const GlobalValue *GV)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
std::optional< unsigned > getPKFMACF16InlineEncoding(uint32_t Literal, bool IsGFX11Plus)
void initDefaultAMDKernelCodeT(AMDGPUMCKernelCodeT &KernelCode, const MCSubtargetInfo *STI)
bool isNotGFX9Plus(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGLZMappingInfo * getMIMGLZMappingInfo(unsigned L)
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
LLVM_READONLY int32_t getSOPPWithRelaxation(uint32_t Opcode)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isCvt_F32_Fp8_Bf8_e64(unsigned Opc)
Waitcnt decodeLoadcntDscnt(const IsaVersion &Version, unsigned LoadcntDscnt)
std::optional< unsigned > getInlineEncodingV2I16(uint32_t Literal)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1170(const MCSubtargetInfo &STI)
static unsigned encodeStorecntDscnt(const IsaVersion &Version, unsigned Storecnt, unsigned Dscnt)
bool isGFX1250(const MCSubtargetInfo &STI)
bool supportsWave32(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool isTensorStore(unsigned Opc)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfo(unsigned DimEnum)
bool getMUBUFIsBufferInv(unsigned Opc)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
std::optional< unsigned > getInlineEncodingV2BF16(uint32_t Literal)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool hasDynamicVGPR(const Function &F)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
bool getVOP2IsSingle(unsigned Opc)
bool getMAIIsDGEMM(unsigned Opc)
Returns true if MAI operation is a double precision GEMM.
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
SmallVector< unsigned > getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size, unsigned DefaultVal)
LLVM_READNONE constexpr bool isChainCC(CallingConv::ID CC)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels)
bool isNotGFX12Plus(const MCSubtargetInfo &STI)
bool getMTBUFHasVAddr(unsigned Opc)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
uint8_t getELFABIVersion(const Triple &T, unsigned CodeObjectVersion)
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
unsigned getLoadcntBitMask(const IsaVersion &Version)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
LLVM_READNONE constexpr bool canGuaranteeTCO(CallingConv::ID CC)
LLVM_READNONE constexpr bool isGraphics(CallingConv::ID CC)
int getVOPDFull(unsigned OpX, unsigned OpY, unsigned EncodingFamily, bool VOPD3)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
bool isGFX9_GFX10_GFX11(const MCSubtargetInfo &STI)
bool isGFX9_GFX10(const MCSubtargetInfo &STI)
int getMUBUFElements(unsigned Opc)
static unsigned encodeLoadcntDscnt(const IsaVersion &Version, unsigned Loadcnt, unsigned Dscnt)
const GcnBufferFormatInfo * getGcnBufferFormatInfo(uint8_t BitsPerComp, uint8_t NumComponents, uint8_t NumFormat, const MCSubtargetInfo &STI)
unsigned mapWMMA3AddrTo2AddrOpcode(unsigned Opc)
bool isPermlane16(unsigned Opc)
bool getMUBUFHasSrsrc(unsigned Opc)
unsigned getDscntBitMask(const IsaVersion &Version)
bool hasAny64BitVGPROperands(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_Gfx
Used for AMD graphics targets.
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
constexpr uint64_t maxUIntN(uint64_t N)
Gets the maximum value for a N-bit unsigned integer.
Definition MathExtras.h:207
@ Wait
Definition Threading.h:60
Op::Description Desc
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
@ Other
Any other memory.
Definition ModRef.h:68
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
AMD Kernel Code Object (amd_kernel_code_t).
constexpr EncodingField(ValueType Value)
static ValueType decode(uint64_t Encoded)
constexpr uint64_t encode() const
static constexpr uint64_t encode(Fields... Values)
static std::tuple< typename Fields::ValueType... > decode(uint64_t Encoded)
constexpr EncodingField(ValueType Value)
constexpr uint64_t encode() const
static ValueType decode(uint64_t Encoded)
Instruction set architecture version.