LLVM 23.0.0git
AMDGPUDisassembler.cpp
Go to the documentation of this file.
1//===- AMDGPUDisassembler.cpp - Disassembler for AMDGPU ISA ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9//===----------------------------------------------------------------------===//
10//
11/// \file
12///
13/// This file contains definition for AMDGPU ISA disassembler
14//
15//===----------------------------------------------------------------------===//
16
17// ToDo: What to do with instruction suffixes (v_mov_b32 vs v_mov_b32_e32)?
18
22#include "SIDefines.h"
23#include "SIRegisterInfo.h"
29#include "llvm/MC/MCAsmInfo.h"
30#include "llvm/MC/MCContext.h"
31#include "llvm/MC/MCDecoder.h"
33#include "llvm/MC/MCExpr.h"
34#include "llvm/MC/MCInstrDesc.h"
40
41using namespace llvm;
42using namespace llvm::MCD;
43
44#define DEBUG_TYPE "amdgpu-disassembler"
45
46#define SGPR_MAX \
47 (isGFX10Plus() ? AMDGPU::EncValues::SGPR_MAX_GFX10 \
48 : AMDGPU::EncValues::SGPR_MAX_SI)
49
51
52static int64_t getInlineImmValF16(unsigned Imm);
53static int64_t getInlineImmValBF16(unsigned Imm);
54static int64_t getInlineImmVal32(unsigned Imm);
55static int64_t getInlineImmVal64(unsigned Imm);
56
58 MCContext &Ctx, MCInstrInfo const *MCII)
59 : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
60 MAI(Ctx.getAsmInfo()),
61 HwModeRegClass(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)),
62 TargetMaxInstBytes(MAI.getMaxInstLength(&STI)),
63 CodeObjectVersion(AMDGPU::getDefaultAMDHSACodeObjectVersion()) {
64 // ToDo: AMDGPUDisassembler supports only VI ISA.
65 if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
66 reportFatalUsageError("disassembly not yet supported for subtarget");
67
68 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
69 createConstantSymbolExpr(Symbol, Code);
70
71 UCVersionW64Expr = createConstantSymbolExpr("UC_VERSION_W64_BIT", 0x2000);
72 UCVersionW32Expr = createConstantSymbolExpr("UC_VERSION_W32_BIT", 0x4000);
73 UCVersionMDPExpr = createConstantSymbolExpr("UC_VERSION_MDP_BIT", 0x8000);
74}
75
79
81addOperand(MCInst &Inst, const MCOperand& Opnd) {
82 Inst.addOperand(Opnd);
83 return Opnd.isValid() ?
86}
87
89 AMDGPU::OpName Name) {
90 int OpIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), Name);
91 if (OpIdx != -1) {
92 auto *I = MI.begin();
93 std::advance(I, OpIdx);
94 MI.insert(I, Op);
95 }
96 return OpIdx;
97}
98
99static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
100 uint64_t Addr,
101 const MCDisassembler *Decoder) {
102 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
103
104 // Our branches take a simm16.
105 int64_t Offset = SignExtend64<16>(Imm) * 4 + 4 + Addr;
106
107 if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
109 return addOperand(Inst, MCOperand::createImm(Imm));
110}
111
112static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr,
113 const MCDisassembler *Decoder) {
114 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
115 int64_t Offset;
116 if (DAsm->isGFX12Plus()) { // GFX12 supports 24-bit signed offsets.
118 } else if (DAsm->isVI()) { // VI supports 20-bit unsigned offsets.
119 Offset = Imm & 0xFFFFF;
120 } else { // GFX9+ supports 21-bit signed offsets.
122 }
124}
125
126static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
127 const MCDisassembler *Decoder) {
128 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
129 return addOperand(Inst, DAsm->decodeBoolReg(Inst, Val));
130}
131
132static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val,
133 uint64_t Addr,
134 const MCDisassembler *Decoder) {
135 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
136 return addOperand(Inst, DAsm->decodeSplitBarrier(Inst, Val));
137}
138
139static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr,
140 const MCDisassembler *Decoder) {
141 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
142 return addOperand(Inst, DAsm->decodeDpp8FI(Val));
143}
144
145#define DECODE_OPERAND(StaticDecoderName, DecoderName) \
146 static DecodeStatus StaticDecoderName(MCInst &Inst, unsigned Imm, \
147 uint64_t /*Addr*/, \
148 const MCDisassembler *Decoder) { \
149 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
150 return addOperand(Inst, DAsm->DecoderName(Imm)); \
151 }
152
153// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
154// number of register. Used by VGPR only and AGPR only operands.
155#define DECODE_OPERAND_REG_8(RegClass) \
156 static DecodeStatus Decode##RegClass##RegisterClass( \
157 MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
158 const MCDisassembler *Decoder) { \
159 assert(Imm < (1 << 8) && "8-bit encoding"); \
160 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
161 return addOperand( \
162 Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
163 }
164
165#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm) \
166 static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
167 const MCDisassembler *Decoder) { \
168 assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
169 auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
170 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm)); \
171 }
172
173static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize,
174 unsigned OpWidth, unsigned Imm, unsigned EncImm,
175 const MCDisassembler *Decoder) {
176 assert(Imm < (1U << EncSize) && "Operand doesn't fit encoding!");
177 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
178 return addOperand(Inst, DAsm->decodeSrcOp(Inst, OpWidth, EncImm));
179}
180
181// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
182// get register class. Used by SGPR only operands.
183#define DECODE_OPERAND_SREG_7(RegClass, OpWidth) \
184 DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm)
185
186#define DECODE_OPERAND_SREG_8(RegClass, OpWidth) \
187 DECODE_SrcOp(Decode##RegClass##RegisterClass, 8, OpWidth, Imm)
188
189// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
190// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
191// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
192// Used by AV_ register classes (AGPR or VGPR only register operands).
193template <unsigned OpWidth>
194static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
195 const MCDisassembler *Decoder) {
196 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm | AMDGPU::EncValues::IS_VGPR,
197 Decoder);
198}
199
200// Decoder for Src(9-bit encoding) registers only.
201template <unsigned OpWidth>
202static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm,
203 uint64_t /* Addr */,
204 const MCDisassembler *Decoder) {
205 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
206}
207
208// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
209// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
210// only.
211template <unsigned OpWidth>
212static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t /* Addr */,
213 const MCDisassembler *Decoder) {
214 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
215}
216
217// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
218// Imm{9} is acc, registers only.
219template <unsigned OpWidth>
220static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm,
221 uint64_t /* Addr */,
222 const MCDisassembler *Decoder) {
223 return decodeSrcOp(Inst, 10, OpWidth, Imm, Imm, Decoder);
224}
225
226// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
227// register from RegClass or immediate. Registers that don't belong to RegClass
228// will be decoded and InstPrinter will report warning. Immediate will be
229// decoded into constant matching the OperandType (important for floating point
230// types).
231template <unsigned OpWidth>
232static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm,
233 uint64_t /* Addr */,
234 const MCDisassembler *Decoder) {
235 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm, Decoder);
236}
237
238// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
239// and decode using 'enum10' from decodeSrcOp.
240template <unsigned OpWidth>
241static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm,
242 uint64_t /* Addr */,
243 const MCDisassembler *Decoder) {
244 return decodeSrcOp(Inst, 9, OpWidth, Imm, Imm | 512, Decoder);
245}
246
247// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
248// when RegisterClass is used as an operand. Most often used for destination
249// operands.
250
252DECODE_OPERAND_REG_8(VGPR_32_Lo128)
255DECODE_OPERAND_REG_8(VReg_128)
256DECODE_OPERAND_REG_8(VReg_192)
257DECODE_OPERAND_REG_8(VReg_256)
258DECODE_OPERAND_REG_8(VReg_288)
259DECODE_OPERAND_REG_8(VReg_320)
260DECODE_OPERAND_REG_8(VReg_352)
261DECODE_OPERAND_REG_8(VReg_384)
262DECODE_OPERAND_REG_8(VReg_512)
263DECODE_OPERAND_REG_8(VReg_1024)
264
265DECODE_OPERAND_SREG_7(SReg_32, 32)
266DECODE_OPERAND_SREG_7(SReg_32_XM0, 32)
267DECODE_OPERAND_SREG_7(SReg_32_XEXEC, 32)
268DECODE_OPERAND_SREG_7(SReg_32_XM0_XEXEC, 32)
269DECODE_OPERAND_SREG_7(SReg_32_XEXEC_HI, 32)
270DECODE_OPERAND_SREG_7(SReg_64_XEXEC, 64)
271DECODE_OPERAND_SREG_7(SReg_64_XEXEC_XNULL, 64)
272DECODE_OPERAND_SREG_7(SReg_96, 96)
273DECODE_OPERAND_SREG_7(SReg_128, 128)
274DECODE_OPERAND_SREG_7(SReg_128_XNULL, 128)
275DECODE_OPERAND_SREG_7(SReg_256, 256)
276DECODE_OPERAND_SREG_7(SReg_256_XNULL, 256)
277DECODE_OPERAND_SREG_7(SReg_512, 512)
278
279DECODE_OPERAND_SREG_8(SReg_64, 64)
280
283DECODE_OPERAND_REG_8(AReg_128)
284DECODE_OPERAND_REG_8(AReg_256)
285DECODE_OPERAND_REG_8(AReg_512)
286DECODE_OPERAND_REG_8(AReg_1024)
287
289 uint64_t /*Addr*/,
290 const MCDisassembler *Decoder) {
291 assert(isUInt<10>(Imm) && "10-bit encoding expected");
292 assert((Imm & (1 << 8)) == 0 && "Imm{8} should not be used");
293
294 bool IsHi = Imm & (1 << 9);
295 unsigned RegIdx = Imm & 0xff;
296 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
297 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
298}
299
300static DecodeStatus
302 const MCDisassembler *Decoder) {
303 assert(isUInt<8>(Imm) && "8-bit encoding expected");
304
305 bool IsHi = Imm & (1 << 7);
306 unsigned RegIdx = Imm & 0x7f;
307 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
308 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
309}
310
311template <unsigned OpWidth>
313 uint64_t /*Addr*/,
314 const MCDisassembler *Decoder) {
315 assert(isUInt<9>(Imm) && "9-bit encoding expected");
316
317 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
318 if (Imm & AMDGPU::EncValues::IS_VGPR) {
319 bool IsHi = Imm & (1 << 7);
320 unsigned RegIdx = Imm & 0x7f;
321 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
322 }
323 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
324}
325
326template <unsigned OpWidth>
327static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
328 uint64_t /*Addr*/,
329 const MCDisassembler *Decoder) {
330 assert(isUInt<10>(Imm) && "10-bit encoding expected");
331
332 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
333 if (Imm & AMDGPU::EncValues::IS_VGPR) {
334 bool IsHi = Imm & (1 << 9);
335 unsigned RegIdx = Imm & 0xff;
336 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
337 }
338 return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(Inst, OpWidth, Imm & 0xFF));
339}
340
341static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
342 uint64_t /*Addr*/,
343 const MCDisassembler *Decoder) {
344 assert(isUInt<10>(Imm) && "10-bit encoding expected");
345 assert(Imm & AMDGPU::EncValues::IS_VGPR && "VGPR expected");
346
347 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
348
349 bool IsHi = Imm & (1 << 9);
350 unsigned RegIdx = Imm & 0xff;
351 return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
352}
353
354static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
355 uint64_t Addr,
356 const MCDisassembler *Decoder) {
357 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
358 return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
359}
360
362 uint64_t Addr,
363 const MCDisassembler *Decoder) {
364 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
365 return addOperand(Inst, DAsm->decodeMandatoryLiteral64Constant(Imm));
366}
367
368static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
369 uint64_t Addr, const void *Decoder) {
370 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
371 return addOperand(Inst, DAsm->decodeVOPDDstYOp(Inst, Val));
372}
373
374static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw,
375 const MCDisassembler *Decoder) {
376 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
377 return addOperand(Inst, DAsm->decodeSrcOp(Inst, Opw, Imm | 256));
378}
379
380template <unsigned Opw>
381static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm,
382 uint64_t /* Addr */,
383 const MCDisassembler *Decoder) {
384 return decodeAVLdSt(Inst, Imm, Opw, Decoder);
385}
386
387static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm,
388 uint64_t Addr,
389 const MCDisassembler *Decoder) {
390 assert(Imm < (1 << 9) && "9-bit encoding");
391 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
392 return addOperand(Inst, DAsm->decodeSrcOp(Inst, 64, Imm));
393}
394
395#define DECODE_SDWA(DecName) \
396DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
397
398DECODE_SDWA(Src32)
399DECODE_SDWA(Src16)
400DECODE_SDWA(VopcDst)
401
402static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm,
403 uint64_t /* Addr */,
404 const MCDisassembler *Decoder) {
405 const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
406 return addOperand(Inst, DAsm->decodeVersionImm(Imm));
407}
408
409#include "AMDGPUGenDisassemblerTables.inc"
410
411namespace {
412// Define bitwidths for various types used to instantiate the decoder.
413template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
414template <> constexpr uint32_t InsnBitWidth<uint64_t> = 64;
415template <> constexpr uint32_t InsnBitWidth<std::bitset<96>> = 96;
416template <> constexpr uint32_t InsnBitWidth<std::bitset<128>> = 128;
417} // namespace
418
419//===----------------------------------------------------------------------===//
420//
421//===----------------------------------------------------------------------===//
422
423template <typename InsnType>
425 InsnType Inst, uint64_t Address,
426 raw_ostream &Comments) const {
427 assert(MI.getOpcode() == 0);
428 assert(MI.getNumOperands() == 0);
429 MCInst TmpInst;
430 HasLiteral = false;
431 const auto SavedBytes = Bytes;
432
433 SmallString<64> LocalComments;
434 raw_svector_ostream LocalCommentStream(LocalComments);
435 CommentStream = &LocalCommentStream;
436
437 DecodeStatus Res =
438 decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
439
440 CommentStream = nullptr;
441
442 if (Res != MCDisassembler::Fail) {
443 MI = TmpInst;
444 Comments << LocalComments;
446 }
447 Bytes = SavedBytes;
449}
450
451template <typename InsnType>
454 MCInst &MI, InsnType Inst, uint64_t Address,
455 raw_ostream &Comments) const {
456 for (const uint8_t *T : {Table1, Table2}) {
457 if (DecodeStatus Res = tryDecodeInst(T, MI, Inst, Address, Comments))
458 return Res;
459 }
461}
462
463template <typename T> static inline T eatBytes(ArrayRef<uint8_t>& Bytes) {
464 assert(Bytes.size() >= sizeof(T));
465 const auto Res =
467 Bytes = Bytes.slice(sizeof(T));
468 return Res;
469}
470
471static inline std::bitset<96> eat12Bytes(ArrayRef<uint8_t> &Bytes) {
472 using namespace llvm::support::endian;
473 assert(Bytes.size() >= 12);
474 std::bitset<96> Lo(read<uint64_t, endianness::little>(Bytes.data()));
475 Bytes = Bytes.slice(8);
476 std::bitset<96> Hi(read<uint32_t, endianness::little>(Bytes.data()));
477 Bytes = Bytes.slice(4);
478 return (Hi << 64) | Lo;
479}
480
481static inline std::bitset<128> eat16Bytes(ArrayRef<uint8_t> &Bytes) {
482 using namespace llvm::support::endian;
483 assert(Bytes.size() >= 16);
484 std::bitset<128> Lo(read<uint64_t, endianness::little>(Bytes.data()));
485 Bytes = Bytes.slice(8);
486 std::bitset<128> Hi(read<uint64_t, endianness::little>(Bytes.data()));
487 Bytes = Bytes.slice(8);
488 return (Hi << 64) | Lo;
489}
490
491void AMDGPUDisassembler::decodeImmOperands(MCInst &MI,
492 const MCInstrInfo &MCII) const {
493 const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
494 for (auto [OpNo, OpDesc] : enumerate(Desc.operands())) {
495 if (OpNo >= MI.getNumOperands())
496 continue;
497
498 // TODO: Fix V_DUAL_FMAMK_F32_X_FMAAK_F32_gfx12 vsrc operands,
499 // defined to take VGPR_32, but in reality allowing inline constants.
500 bool IsSrc = AMDGPU::OPERAND_SRC_FIRST <= OpDesc.OperandType &&
501 OpDesc.OperandType <= AMDGPU::OPERAND_SRC_LAST;
502 if (!IsSrc && OpDesc.OperandType != MCOI::OPERAND_REGISTER)
503 continue;
504
505 MCOperand &Op = MI.getOperand(OpNo);
506 if (!Op.isImm())
507 continue;
508 int64_t Imm = Op.getImm();
511 Op = decodeIntImmed(Imm);
512 continue;
513 }
514
516 Op = decodeLiteralConstant(Desc, OpDesc);
517 continue;
518 }
519
522 switch (OpDesc.OperandType) {
528 break;
531 Imm = getInlineImmValF16(Imm);
532 break;
535 Imm = getInlineImmValF16(Imm);
536 break;
538 // V_PK_FMAC_F16 on GFX11+ duplicates the f16 inline constant to both
539 // halves, so we need to produce the duplicated value for correct
540 // round-trip.
541 if (isGFX11Plus()) {
542 int64_t F16Val = getInlineImmValF16(Imm);
543 Imm = (F16Val << 16) | (F16Val & 0xFFFF);
544 } else {
545 Imm = getInlineImmValF16(Imm);
546 }
547 break;
548 }
554 Imm = getInlineImmVal64(Imm);
555 break;
556 default:
557 Imm = getInlineImmVal32(Imm);
558 }
559 Op.setImm(Imm);
560 }
561 }
562}
563
565 ArrayRef<uint8_t> Bytes_,
567 raw_ostream &CS) const {
568 unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
569 Bytes = Bytes_.slice(0, MaxInstBytesNum);
570
571 // In case the opcode is not recognized we'll assume a Size of 4 bytes (unless
572 // there are fewer bytes left). This will be overridden on success.
573 Size = std::min((size_t)4, Bytes_.size());
574
575 do {
576 // ToDo: better to switch encoding length using some bit predicate
577 // but it is unknown yet, so try all we can
578
579 // Try to decode DPP and SDWA first to solve conflict with VOP1 and VOP2
580 // encodings
581 if (isGFX1250Plus() && Bytes.size() >= 16) {
582 std::bitset<128> DecW = eat16Bytes(Bytes);
583 if (tryDecodeInst(DecoderTableGFX1250128, MI, DecW, Address, CS))
584 break;
585 Bytes = Bytes_.slice(0, MaxInstBytesNum);
586 }
587
588 if (isGFX11Plus() && Bytes.size() >= 12) {
589 std::bitset<96> DecW = eat12Bytes(Bytes);
590
591 if (isGFX1170() &&
592 tryDecodeInst(DecoderTableGFX117096, DecoderTableGFX1170_FAKE1696, MI,
593 DecW, Address, CS))
594 break;
595
596 if (isGFX11() &&
597 tryDecodeInst(DecoderTableGFX1196, DecoderTableGFX11_FAKE1696, MI,
598 DecW, Address, CS))
599 break;
600
601 if (isGFX1250() &&
602 tryDecodeInst(DecoderTableGFX125096, DecoderTableGFX1250_FAKE1696, MI,
603 DecW, Address, CS))
604 break;
605
606 if (isGFX12() &&
607 tryDecodeInst(DecoderTableGFX1296, DecoderTableGFX12_FAKE1696, MI,
608 DecW, Address, CS))
609 break;
610
611 if (isGFX12() &&
612 tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
613 break;
614
615 if (isGFX13() &&
616 tryDecodeInst(DecoderTableGFX1396, DecoderTableGFX13_FAKE1696, MI,
617 DecW, Address, CS))
618 break;
619
620 if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
621 // Return 8 bytes for a potential literal.
622 Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
623
624 if (isGFX1250() &&
625 tryDecodeInst(DecoderTableGFX125096, MI, DecW, Address, CS))
626 break;
627 }
628
629 // Reinitialize Bytes
630 Bytes = Bytes_.slice(0, MaxInstBytesNum);
631
632 } else if (Bytes.size() >= 16 &&
633 STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
634 std::bitset<128> DecW = eat16Bytes(Bytes);
635 if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
636 break;
637
638 // Reinitialize Bytes
639 Bytes = Bytes_.slice(0, MaxInstBytesNum);
640 }
641
642 if (Bytes.size() >= 8) {
643 const uint64_t QW = eatBytes<uint64_t>(Bytes);
644
645 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
646 tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS))
647 break;
648
649 if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) &&
650 tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS))
651 break;
652
653 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
654 tryDecodeInst(DecoderTableGFX95064, MI, QW, Address, CS))
655 break;
656
657 // Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
658 // v_mad_mixhi_f16 for FMA variants. Try to decode using this special
659 // table first so we print the correct name.
660 if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts) &&
661 tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
662 break;
663
664 if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
665 tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
666 break;
667
668 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
669 tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS))
670 break;
671
672 if ((isVI() || isGFX9()) &&
673 tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS))
674 break;
675
676 if (isGFX9() && tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS))
677 break;
678
679 if (isGFX10() && tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS))
680 break;
681
682 if (isGFX1250() &&
683 tryDecodeInst(DecoderTableGFX125064, DecoderTableGFX1250_FAKE1664, MI,
684 QW, Address, CS))
685 break;
686
687 if (isGFX12() &&
688 tryDecodeInst(DecoderTableGFX1264, DecoderTableGFX12_FAKE1664, MI, QW,
689 Address, CS))
690 break;
691
692 if (isGFX1170() &&
693 tryDecodeInst(DecoderTableGFX117064, DecoderTableGFX1170_FAKE1664, MI,
694 QW, Address, CS))
695 break;
696
697 if (isGFX11() &&
698 tryDecodeInst(DecoderTableGFX1164, DecoderTableGFX11_FAKE1664, MI, QW,
699 Address, CS))
700 break;
701
702 if (isGFX1170() &&
703 tryDecodeInst(DecoderTableGFX1170W6464, MI, QW, Address, CS))
704 break;
705
706 if (isGFX11() &&
707 tryDecodeInst(DecoderTableGFX11W6464, MI, QW, Address, CS))
708 break;
709
710 if (isGFX12() &&
711 tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
712 break;
713
714 if (isGFX13() &&
715 tryDecodeInst(DecoderTableGFX1364, DecoderTableGFX13_FAKE1664, MI, QW,
716 Address, CS))
717 break;
718
719 // Reinitialize Bytes
720 Bytes = Bytes_.slice(0, MaxInstBytesNum);
721 }
722
723 // Try decode 32-bit instruction
724 if (Bytes.size() >= 4) {
725 const uint32_t DW = eatBytes<uint32_t>(Bytes);
726
727 if ((isVI() || isGFX9()) &&
728 tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS))
729 break;
730
731 if (tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS))
732 break;
733
734 if (isGFX9() && tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS))
735 break;
736
737 if (STI.hasFeature(AMDGPU::FeatureGFX950Insts) &&
738 tryDecodeInst(DecoderTableGFX95032, MI, DW, Address, CS))
739 break;
740
741 if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
742 tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS))
743 break;
744
745 if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding) &&
746 tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS))
747 break;
748
749 if (isGFX10() && tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS))
750 break;
751
752 if (isGFX1170() &&
753 tryDecodeInst(DecoderTableGFX117032, DecoderTableGFX1170_FAKE1632, MI,
754 DW, Address, CS))
755 break;
756
757 if (isGFX11() &&
758 tryDecodeInst(DecoderTableGFX1132, DecoderTableGFX11_FAKE1632, MI, DW,
759 Address, CS))
760 break;
761
762 if (isGFX1250() &&
763 tryDecodeInst(DecoderTableGFX125032, DecoderTableGFX1250_FAKE1632, MI,
764 DW, Address, CS))
765 break;
766
767 if (isGFX12() &&
768 tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
769 Address, CS))
770 break;
771
772 if (isGFX13() &&
773 tryDecodeInst(DecoderTableGFX1332, DecoderTableGFX13_FAKE1632, MI, DW,
774 Address, CS))
775 break;
776 }
777
779 } while (false);
780
782
783 decodeImmOperands(MI, *MCII);
784
785 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DPP) {
786 if (isMacDPP(MI))
788
789 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
791 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
792 convertVOPCDPPInst(MI); // Special VOP3 case
793 else if (AMDGPU::isVOPC64DPP(MI.getOpcode()))
794 convertVOPC64DPPInst(MI); // Special VOP3 case
795 else if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8) !=
796 -1)
798 else if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3)
799 convertVOP3DPPInst(MI); // Regular VOP3 case
800 }
801
803
804 if (AMDGPU::isMAC(MI.getOpcode())) {
805 // Insert dummy unused src2_modifiers.
807 AMDGPU::OpName::src2_modifiers);
808 }
809
810 if (MI.getOpcode() == AMDGPU::V_CVT_SR_BF8_F32_e64_dpp ||
811 MI.getOpcode() == AMDGPU::V_CVT_SR_FP8_F32_e64_dpp) {
812 // Insert dummy unused src2_modifiers.
814 AMDGPU::OpName::src2_modifiers);
815 }
816
817 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::DS) &&
819 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::gds);
820 }
821
822 if (MCII->get(MI.getOpcode()).TSFlags &
824 int CPolPos = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
825 AMDGPU::OpName::cpol);
826 if (CPolPos != -1) {
827 unsigned CPol =
828 (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsAtomicRet) ?
830 if (MI.getNumOperands() <= (unsigned)CPolPos) {
832 AMDGPU::OpName::cpol);
833 } else if (CPol) {
834 MI.getOperand(CPolPos).setImm(MI.getOperand(CPolPos).getImm() | CPol);
835 }
836 }
837 }
838
839 if ((MCII->get(MI.getOpcode()).TSFlags &
841 (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
842 // GFX90A lost TFE, its place is occupied by ACC.
843 int TFEOpIdx =
844 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
845 if (TFEOpIdx != -1) {
846 auto *TFEIter = MI.begin();
847 std::advance(TFEIter, TFEOpIdx);
848 MI.insert(TFEIter, MCOperand::createImm(0));
849 }
850 }
851
852 // Validate buffer instruction offsets for GFX12+ - must not be a negative.
854 int OffsetIdx =
855 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::offset);
856 if (OffsetIdx != -1) {
857 uint32_t Imm = MI.getOperand(OffsetIdx).getImm();
858 int64_t SignedOffset = SignExtend64<24>(Imm);
859 if (SignedOffset < 0)
861 }
862 }
863
864 if (MCII->get(MI.getOpcode()).TSFlags &
866 int SWZOpIdx =
867 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
868 if (SWZOpIdx != -1) {
869 auto *SWZIter = MI.begin();
870 std::advance(SWZIter, SWZOpIdx);
871 MI.insert(SWZIter, MCOperand::createImm(0));
872 }
873 }
874
875 const MCInstrDesc &Desc = MCII->get(MI.getOpcode());
876 if (Desc.TSFlags & SIInstrFlags::MIMG) {
877 int VAddr0Idx =
878 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
879 int RsrcIdx =
880 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
881 unsigned NSAArgs = RsrcIdx - VAddr0Idx - 1;
882 if (VAddr0Idx >= 0 && NSAArgs > 0) {
883 unsigned NSAWords = (NSAArgs + 3) / 4;
884 if (Bytes.size() < 4 * NSAWords)
886 for (unsigned i = 0; i < NSAArgs; ++i) {
887 const unsigned VAddrIdx = VAddr0Idx + 1 + i;
888 auto VAddrRCID =
889 MCII->getOpRegClassID(Desc.operands()[VAddrIdx], HwModeRegClass);
890 MI.insert(MI.begin() + VAddrIdx, createRegOperand(VAddrRCID, Bytes[i]));
891 }
892 Bytes = Bytes.slice(4 * NSAWords);
893 }
894
896 }
897
898 if (MCII->get(MI.getOpcode()).TSFlags &
901
902 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP)
904
905 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VINTERP)
907
908 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SDWA)
910
911 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsMAI)
913
914 if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::IsWMMA)
916
917 int VDstIn_Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
918 AMDGPU::OpName::vdst_in);
919 if (VDstIn_Idx != -1) {
920 int Tied = MCII->get(MI.getOpcode()).getOperandConstraint(VDstIn_Idx,
922 if (Tied != -1 && (MI.getNumOperands() <= (unsigned)VDstIn_Idx ||
923 !MI.getOperand(VDstIn_Idx).isReg() ||
924 MI.getOperand(VDstIn_Idx).getReg() != MI.getOperand(Tied).getReg())) {
925 if (MI.getNumOperands() > (unsigned)VDstIn_Idx)
926 MI.erase(&MI.getOperand(VDstIn_Idx));
928 MCOperand::createReg(MI.getOperand(Tied).getReg()),
929 AMDGPU::OpName::vdst_in);
930 }
931 }
932
933 bool IsSOPK = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::SOPK;
934 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm) && !IsSOPK)
936
937 // Some VOPC instructions, e.g., v_cmpx_f_f64, use VOP3 encoding and
938 // have EXEC as implicit destination. Issue a warning if encoding for
939 // vdst is not EXEC.
940 if ((MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3) &&
941 MCII->get(MI.getOpcode()).getNumDefs() == 0 &&
942 MCII->get(MI.getOpcode()).hasImplicitDefOfPhysReg(AMDGPU::EXEC)) {
943 auto ExecEncoding = MRI.getEncodingValue(AMDGPU::EXEC_LO);
944 if (Bytes_[0] != ExecEncoding)
946 }
947
948 Size = MaxInstBytesNum - Bytes.size();
949 return Status;
950}
951
953 if (STI.hasFeature(AMDGPU::FeatureGFX11Insts)) {
954 // The MCInst still has these fields even though they are no longer encoded
955 // in the GFX11 instruction.
956 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
957 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::compr);
958 }
959}
960
963 if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
964 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
965 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
966 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
967 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx13 ||
968 MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx13 ||
969 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
970 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
971 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
972 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
973 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx13 ||
974 MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx13 ||
975 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
976 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
977 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
978 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
979 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx13 ||
980 MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx13 ||
981 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
982 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
983 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
984 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12 ||
985 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx13 ||
986 MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx13) {
987 // The MCInst has this field that is not directly encoded in the
988 // instruction.
989 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
990 }
991}
992
994 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
995 STI.hasFeature(AMDGPU::FeatureGFX10)) {
996 if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
997 // VOPC - insert clamp
998 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
999 } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
1000 int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
1001 if (SDst != -1) {
1002 // VOPC - insert VCC register as sdst
1004 AMDGPU::OpName::sdst);
1005 } else {
1006 // VOP1/2 - insert omod if present in instruction
1007 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::omod);
1008 }
1009 }
1010}
1011
1012/// Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the
1013/// appropriate subregister for the used format width.
1015 MCOperand &MO, uint8_t NumRegs) {
1016 switch (NumRegs) {
1017 case 4:
1018 return MO.setReg(MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3));
1019 case 6:
1020 return MO.setReg(
1021 MRI.getSubReg(MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5));
1022 case 8:
1023 if (MCRegister NewReg = MRI.getSubReg(
1024 MO.getReg(), AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7)) {
1025 MO.setReg(NewReg);
1026 }
1027 return;
1028 case 12: {
1029 // There is no 384-bit subreg index defined.
1030 MCRegister BaseReg = MRI.getSubReg(MO.getReg(), AMDGPU::sub0);
1031 MCRegister NewReg = MRI.getMatchingSuperReg(
1032 BaseReg, AMDGPU::sub0, &MRI.getRegClass(AMDGPU::VReg_384RegClassID));
1033 return MO.setReg(NewReg);
1034 }
1035 case 16:
1036 // No-op in cases where one operand is still f8/bf8.
1037 return;
1038 default:
1039 llvm_unreachable("Unexpected size for mfma/wmma f8f6f4 operand");
1040 }
1041}
1042
1043/// f8f6f4 instructions have different pseudos depending on the used formats. In
1044/// the disassembler table, we only have the variants with the largest register
1045/// classes which assume using an fp8/bf8 format for both operands. The actual
1046/// register class depends on the format in blgp and cbsz operands. Adjust the
1047/// register classes depending on the used format.
1049 int BlgpIdx =
1050 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::blgp);
1051 if (BlgpIdx == -1)
1052 return;
1053
1054 int CbszIdx =
1055 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::cbsz);
1056
1057 unsigned CBSZ = MI.getOperand(CbszIdx).getImm();
1058 unsigned BLGP = MI.getOperand(BlgpIdx).getImm();
1059
1060 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1061 AMDGPU::getMFMA_F8F6F4_WithFormatArgs(CBSZ, BLGP, MI.getOpcode());
1062 if (!AdjustedRegClassOpcode ||
1063 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1064 return;
1065
1066 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1067 int Src0Idx =
1068 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1069 int Src1Idx =
1070 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1071 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1072 AdjustedRegClassOpcode->NumRegsSrcA);
1073 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1074 AdjustedRegClassOpcode->NumRegsSrcB);
1075}
1076
1078 int FmtAIdx =
1079 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_a_fmt);
1080 if (FmtAIdx == -1)
1081 return;
1082
1083 int FmtBIdx =
1084 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::matrix_b_fmt);
1085
1086 unsigned FmtA = MI.getOperand(FmtAIdx).getImm();
1087 unsigned FmtB = MI.getOperand(FmtBIdx).getImm();
1088
1089 const AMDGPU::MFMA_F8F6F4_Info *AdjustedRegClassOpcode =
1090 AMDGPU::getWMMA_F8F6F4_WithFormatArgs(FmtA, FmtB, MI.getOpcode());
1091 if (!AdjustedRegClassOpcode ||
1092 AdjustedRegClassOpcode->Opcode == MI.getOpcode())
1093 return;
1094
1095 MI.setOpcode(AdjustedRegClassOpcode->Opcode);
1096 int Src0Idx =
1097 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
1098 int Src1Idx =
1099 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
1100 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src0Idx),
1101 AdjustedRegClassOpcode->NumRegsSrcA);
1102 adjustMFMA_F8F6F4OpRegClass(MRI, MI.getOperand(Src1Idx),
1103 AdjustedRegClassOpcode->NumRegsSrcB);
1104}
1105
1107 unsigned OpSel = 0;
1108 unsigned OpSelHi = 0;
1109 unsigned NegLo = 0;
1110 unsigned NegHi = 0;
1111};
1112
1113// Reconstruct values of VOP3/VOP3P operands such as op_sel.
1114// Note that these values do not affect disassembler output,
1115// so this is only necessary for consistency with src_modifiers.
1117 bool IsVOP3P = false) {
1118 VOPModifiers Modifiers;
1119 unsigned Opc = MI.getOpcode();
1120 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
1121 AMDGPU::OpName::src1_modifiers,
1122 AMDGPU::OpName::src2_modifiers};
1123 for (int J = 0; J < 3; ++J) {
1124 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
1125 if (OpIdx == -1)
1126 continue;
1127
1128 unsigned Val = MI.getOperand(OpIdx).getImm();
1129
1130 Modifiers.OpSel |= !!(Val & SISrcMods::OP_SEL_0) << J;
1131 if (IsVOP3P) {
1132 Modifiers.OpSelHi |= !!(Val & SISrcMods::OP_SEL_1) << J;
1133 Modifiers.NegLo |= !!(Val & SISrcMods::NEG) << J;
1134 Modifiers.NegHi |= !!(Val & SISrcMods::NEG_HI) << J;
1135 } else if (J == 0) {
1136 Modifiers.OpSel |= !!(Val & SISrcMods::DST_OP_SEL) << 3;
1137 }
1138 }
1139
1140 return Modifiers;
1141}
1142
1143// Instructions decode the op_sel/suffix bits into the src_modifier
1144// operands. Copy those bits into the src operands for true16 VGPRs.
1146 const unsigned Opc = MI.getOpcode();
1147 const MCRegisterClass &ConversionRC =
1148 MRI.getRegClass(AMDGPU::VGPR_16RegClassID);
1149 constexpr std::array<std::tuple<AMDGPU::OpName, AMDGPU::OpName, unsigned>, 4>
1150 OpAndOpMods = {{{AMDGPU::OpName::src0, AMDGPU::OpName::src0_modifiers,
1152 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_modifiers,
1154 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_modifiers,
1156 {AMDGPU::OpName::vdst, AMDGPU::OpName::src0_modifiers,
1158 for (const auto &[OpName, OpModsName, OpSelMask] : OpAndOpMods) {
1159 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
1160 int OpModsIdx = AMDGPU::getNamedOperandIdx(Opc, OpModsName);
1161 if (OpIdx == -1 || OpModsIdx == -1)
1162 continue;
1163 MCOperand &Op = MI.getOperand(OpIdx);
1164 if (!Op.isReg())
1165 continue;
1166 if (!ConversionRC.contains(Op.getReg()))
1167 continue;
1168 unsigned OpEnc = MRI.getEncodingValue(Op.getReg());
1169 const MCOperand &OpMods = MI.getOperand(OpModsIdx);
1170 unsigned ModVal = OpMods.getImm();
1171 if (ModVal & OpSelMask) { // isHi
1172 unsigned RegIdx = OpEnc & AMDGPU::HWEncoding::REG_IDX_MASK;
1173 Op.setReg(ConversionRC.getRegister(RegIdx * 2 + 1));
1174 }
1175 }
1176}
1177
1178// MAC opcodes have special old and src2 operands.
1179// src2 is tied to dst, while old is not tied (but assumed to be).
1181 constexpr int DST_IDX = 0;
1182 auto Opcode = MI.getOpcode();
1183 const auto &Desc = MCII->get(Opcode);
1184 auto OldIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::old);
1185
1186 if (OldIdx != -1 && Desc.getOperandConstraint(
1187 OldIdx, MCOI::OperandConstraint::TIED_TO) == -1) {
1188 assert(AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src2));
1189 assert(Desc.getOperandConstraint(
1190 AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2),
1192 (void)DST_IDX;
1193 return true;
1194 }
1195
1196 return false;
1197}
1198
1199// Create dummy old operand and insert dummy unused src2_modifiers
1201 assert(MI.getNumOperands() + 1 < MCII->get(MI.getOpcode()).getNumOperands());
1202 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1204 AMDGPU::OpName::src2_modifiers);
1205}
1206
1208 unsigned Opc = MI.getOpcode();
1209
1210 int VDstInIdx =
1211 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1212 if (VDstInIdx != -1)
1213 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1214
1215 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1216 if (MI.getNumOperands() < DescNumOps &&
1217 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1219 auto Mods = collectVOPModifiers(MI);
1221 AMDGPU::OpName::op_sel);
1222 } else {
1223 // Insert dummy unused src modifiers.
1224 if (MI.getNumOperands() < DescNumOps &&
1225 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1227 AMDGPU::OpName::src0_modifiers);
1228
1229 if (MI.getNumOperands() < DescNumOps &&
1230 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1232 AMDGPU::OpName::src1_modifiers);
1233 }
1234}
1235
1238
1239 int VDstInIdx =
1240 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdst_in);
1241 if (VDstInIdx != -1)
1242 insertNamedMCOperand(MI, MI.getOperand(0), AMDGPU::OpName::vdst_in);
1243
1244 unsigned Opc = MI.getOpcode();
1245 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1246 if (MI.getNumOperands() < DescNumOps &&
1247 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1248 auto Mods = collectVOPModifiers(MI);
1250 AMDGPU::OpName::op_sel);
1251 }
1252}
1253
1254// Given a wide tuple \p Reg check if it will overflow 256 registers.
1255// \returns \p Reg on success or NoRegister otherwise.
1257 const MCRegisterInfo &MRI) {
1258 unsigned NumRegs = RC.getSizeInBits() / 32;
1259 MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
1260 if (!Sub0)
1261 return Reg;
1262
1263 MCRegister BaseReg;
1264 if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
1265 BaseReg = AMDGPU::VGPR0;
1266 else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
1267 BaseReg = AMDGPU::AGPR0;
1268
1269 assert(BaseReg && "Only vector registers expected");
1270
1271 return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : MCRegister();
1272}
1273
1274// Note that before gfx10, the MIMG encoding provided no information about
1275// VADDR size. Consequently, decoded instructions always show address as if it
1276// has 1 dword, which could be not really so.
1278 auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
1279
1280 int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1281 AMDGPU::OpName::vdst);
1282
1283 int VDataIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1284 AMDGPU::OpName::vdata);
1285 int VAddr0Idx =
1286 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
1287 AMDGPU::OpName RsrcOpName = (TSFlags & SIInstrFlags::MIMG)
1288 ? AMDGPU::OpName::srsrc
1289 : AMDGPU::OpName::rsrc;
1290 int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
1291 int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1292 AMDGPU::OpName::dmask);
1293
1294 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1295 AMDGPU::OpName::tfe);
1296 int D16Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
1297 AMDGPU::OpName::d16);
1298
1299 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1300 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
1301 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
1302
1303 assert(VDataIdx != -1);
1304 if (BaseOpcode->BVH) {
1305 // Add A16 operand for intersect_ray instructions
1306 addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
1307 return;
1308 }
1309
1310 bool IsAtomic = (VDstIdx != -1);
1311 bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
1312 bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
1313 bool IsNSA = false;
1314 bool IsPartialNSA = false;
1315 unsigned AddrSize = Info->VAddrDwords;
1316
1317 if (isGFX10Plus()) {
1318 unsigned DimIdx =
1319 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dim);
1320 int A16Idx =
1321 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::a16);
1322 const AMDGPU::MIMGDimInfo *Dim =
1323 AMDGPU::getMIMGDimInfoByEncoding(MI.getOperand(DimIdx).getImm());
1324 const bool IsA16 = (A16Idx != -1 && MI.getOperand(A16Idx).getImm());
1325
1326 AddrSize =
1327 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
1328
1329 // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
1330 // VIMAGE insts other than BVH never use vaddr4.
1331 IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
1332 Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
1333 Info->MIMGEncoding == AMDGPU::MIMGEncGfx12 ||
1334 Info->MIMGEncoding == AMDGPU::MIMGEncGfx13;
1335 if (!IsNSA) {
1336 if (!IsVSample && AddrSize > 12)
1337 AddrSize = 16;
1338 } else {
1339 if (AddrSize > Info->VAddrDwords) {
1340 if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
1341 // The NSA encoding does not contain enough operands for the
1342 // combination of base opcode / dimension. Should this be an error?
1343 return;
1344 }
1345 IsPartialNSA = true;
1346 }
1347 }
1348 }
1349
1350 unsigned DMask = MI.getOperand(DMaskIdx).getImm() & 0xf;
1351 unsigned DstSize = IsGather4 ? 4 : std::max(llvm::popcount(DMask), 1);
1352
1353 bool D16 = D16Idx >= 0 && MI.getOperand(D16Idx).getImm();
1354 if (D16 && AMDGPU::hasPackedD16(STI)) {
1355 DstSize = (DstSize + 1) / 2;
1356 }
1357
1358 if (TFEIdx != -1 && MI.getOperand(TFEIdx).getImm())
1359 DstSize += 1;
1360
1361 if (DstSize == Info->VDataDwords && AddrSize == Info->VAddrDwords)
1362 return;
1363
1364 int NewOpcode =
1365 AMDGPU::getMIMGOpcode(Info->BaseOpcode, Info->MIMGEncoding, DstSize, AddrSize);
1366 if (NewOpcode == -1)
1367 return;
1368
1369 // Widen the register to the correct number of enabled channels.
1370 MCRegister NewVdata;
1371 if (DstSize != Info->VDataDwords) {
1372 auto DataRCID = MCII->getOpRegClassID(
1373 MCII->get(NewOpcode).operands()[VDataIdx], HwModeRegClass);
1374
1375 // Get first subregister of VData
1376 MCRegister Vdata0 = MI.getOperand(VDataIdx).getReg();
1377 MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
1378 Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;
1379
1380 const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
1381 NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
1382 NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
1383 if (!NewVdata) {
1384 // It's possible to encode this such that the low register + enabled
1385 // components exceeds the register count.
1386 return;
1387 }
1388 }
1389
1390 // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
1391 // If using partial NSA on GFX11+ widen last address register.
1392 int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
1393 MCRegister NewVAddrSA;
1394 if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
1395 AddrSize != Info->VAddrDwords) {
1396 MCRegister VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
1397 MCRegister VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
1398 VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
1399
1400 auto AddrRCID = MCII->getOpRegClassID(
1401 MCII->get(NewOpcode).operands()[VAddrSAIdx], HwModeRegClass);
1402
1403 const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
1404 NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
1405 NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
1406 if (!NewVAddrSA)
1407 return;
1408 }
1409
1410 MI.setOpcode(NewOpcode);
1411
1412 if (NewVdata != AMDGPU::NoRegister) {
1413 MI.getOperand(VDataIdx) = MCOperand::createReg(NewVdata);
1414
1415 if (IsAtomic) {
1416 // Atomic operations have an additional operand (a copy of data)
1417 MI.getOperand(VDstIdx) = MCOperand::createReg(NewVdata);
1418 }
1419 }
1420
1421 if (NewVAddrSA) {
1422 MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
1423 } else if (IsNSA) {
1424 assert(AddrSize <= Info->VAddrDwords);
1425 MI.erase(MI.begin() + VAddr0Idx + AddrSize,
1426 MI.begin() + VAddr0Idx + Info->VAddrDwords);
1427 }
1428}
1429
1430// Opsel and neg bits are used in src_modifiers and standalone operands. Autogen
1431// decoder only adds to src_modifiers, so manually add the bits to the other
1432// operands.
1434 unsigned Opc = MI.getOpcode();
1435 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1436 auto Mods = collectVOPModifiers(MI, true);
1437
1438 if (MI.getNumOperands() < DescNumOps &&
1439 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
1440 insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vdst_in);
1441
1442 if (MI.getNumOperands() < DescNumOps &&
1443 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel))
1445 AMDGPU::OpName::op_sel);
1446 if (MI.getNumOperands() < DescNumOps &&
1447 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel_hi))
1449 AMDGPU::OpName::op_sel_hi);
1450 if (MI.getNumOperands() < DescNumOps &&
1451 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_lo))
1453 AMDGPU::OpName::neg_lo);
1454 if (MI.getNumOperands() < DescNumOps &&
1455 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::neg_hi))
1457 AMDGPU::OpName::neg_hi);
1458}
1459
1460// Create dummy old operand and insert optional operands
1462 unsigned Opc = MI.getOpcode();
1463 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1464
1465 if (MI.getNumOperands() < DescNumOps &&
1466 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::old))
1467 insertNamedMCOperand(MI, MCOperand::createReg(0), AMDGPU::OpName::old);
1468
1469 if (MI.getNumOperands() < DescNumOps &&
1470 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src0_modifiers))
1472 AMDGPU::OpName::src0_modifiers);
1473
1474 if (MI.getNumOperands() < DescNumOps &&
1475 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::src1_modifiers))
1477 AMDGPU::OpName::src1_modifiers);
1478}
1479
1481 unsigned Opc = MI.getOpcode();
1482 unsigned DescNumOps = MCII->get(Opc).getNumOperands();
1483
1485
1486 if (MI.getNumOperands() < DescNumOps &&
1487 AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
1490 AMDGPU::OpName::op_sel);
1491 }
1492}
1493
1495 assert(HasLiteral && "Should have decoded a literal");
1496 insertNamedMCOperand(MI, MCOperand::createImm(Literal), AMDGPU::OpName::immX);
1497}
1498
1499const char* AMDGPUDisassembler::getRegClassName(unsigned RegClassID) const {
1500 return getContext().getRegisterInfo()->
1501 getRegClassName(&AMDGPUMCRegisterClasses[RegClassID]);
1502}
1503
1504inline
1506 const Twine& ErrMsg) const {
1507 *CommentStream << "Error: " + ErrMsg;
1508
1509 // ToDo: add support for error operands to MCInst.h
1510 // return MCOperand::createError(V);
1511 return MCOperand();
1512}
1513
1517
1518inline
1520 unsigned Val) const {
1521 const auto& RegCl = AMDGPUMCRegisterClasses[RegClassID];
1522 if (Val >= RegCl.getNumRegs())
1523 return errOperand(Val, Twine(getRegClassName(RegClassID)) +
1524 ": unknown register " + Twine(Val));
1525 return createRegOperand(RegCl.getRegister(Val));
1526}
1527
1528inline
1530 unsigned Val) const {
1531 // ToDo: SI/CI have 104 SGPRs, VI - 102
1532 // Valery: here we accepting as much as we can, let assembler sort it out
1533 int shift = 0;
1534 switch (SRegClassID) {
1535 case AMDGPU::SGPR_32RegClassID:
1536 case AMDGPU::TTMP_32RegClassID:
1537 break;
1538 case AMDGPU::SGPR_64RegClassID:
1539 case AMDGPU::TTMP_64RegClassID:
1540 shift = 1;
1541 break;
1542 case AMDGPU::SGPR_96RegClassID:
1543 case AMDGPU::TTMP_96RegClassID:
1544 case AMDGPU::SGPR_128RegClassID:
1545 case AMDGPU::TTMP_128RegClassID:
1546 // ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
1547 // this bundle?
1548 case AMDGPU::SGPR_256RegClassID:
1549 case AMDGPU::TTMP_256RegClassID:
1550 // ToDo: unclear if s[96:104] is available on VI. Can we use VCC as SGPR in
1551 // this bundle?
1552 case AMDGPU::SGPR_288RegClassID:
1553 case AMDGPU::TTMP_288RegClassID:
1554 case AMDGPU::SGPR_320RegClassID:
1555 case AMDGPU::TTMP_320RegClassID:
1556 case AMDGPU::SGPR_352RegClassID:
1557 case AMDGPU::TTMP_352RegClassID:
1558 case AMDGPU::SGPR_384RegClassID:
1559 case AMDGPU::TTMP_384RegClassID:
1560 case AMDGPU::SGPR_512RegClassID:
1561 case AMDGPU::TTMP_512RegClassID:
1562 shift = 2;
1563 break;
1564 // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
1565 // this bundle?
1566 default:
1567 llvm_unreachable("unhandled register class");
1568 }
1569
1570 if (Val % (1 << shift)) {
1571 *CommentStream << "Warning: " << getRegClassName(SRegClassID)
1572 << ": scalar reg isn't aligned " << Val;
1573 }
1574
1575 return createRegOperand(SRegClassID, Val >> shift);
1576}
1577
1579 bool IsHi) const {
1580 unsigned RegIdxInVGPR16 = RegIdx * 2 + (IsHi ? 1 : 0);
1581 return createRegOperand(AMDGPU::VGPR_16RegClassID, RegIdxInVGPR16);
1582}
1583
1584// Decode Literals for insts which always have a literal in the encoding
1587 if (HasLiteral) {
1588 assert(
1590 "Should only decode multiple kimm with VOPD, check VSrc operand types");
1591 if (Literal != Val)
1592 return errOperand(Val, "More than one unique literal is illegal");
1593 }
1594 HasLiteral = true;
1595 Literal = Val;
1596 return MCOperand::createImm(Literal);
1597}
1598
1601 if (HasLiteral) {
1602 if (Literal != Val)
1603 return errOperand(Val, "More than one unique literal is illegal");
1604 }
1605 HasLiteral = true;
1606 Literal = Val;
1607
1608 bool UseLit64 = Hi_32(Literal) == 0;
1610 LitModifier::Lit64, Literal, getContext()))
1611 : MCOperand::createImm(Literal);
1612}
1613
1616 const MCOperandInfo &OpDesc) const {
1617 // For now all literal constants are supposed to be unsigned integer
1618 // ToDo: deal with signed/unsigned 64-bit integer constants
1619 // ToDo: deal with float/double constants
1620 if (!HasLiteral) {
1621 if (Bytes.size() < 4) {
1622 return errOperand(0, "cannot read literal, inst bytes left " +
1623 Twine(Bytes.size()));
1624 }
1625 HasLiteral = true;
1626 Literal = eatBytes<uint32_t>(Bytes);
1627 }
1628
1629 // For disassembling always assume all inline constants are available.
1630 bool HasInv2Pi = true;
1631
1632 // Invalid instruction codes may contain literals for inline-only
1633 // operands, so we support them here as well.
1634 int64_t Val = Literal;
1635 bool UseLit = false;
1636 switch (OpDesc.OperandType) {
1637 default:
1638 llvm_unreachable("Unexpected operand type!");
1642 UseLit = AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
1643 break;
1646 break;
1650 UseLit = AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
1651 break;
1653 UseLit = AMDGPU::isInlinableLiteralV2F16(Val);
1654 break;
1657 break;
1659 break;
1663 UseLit = AMDGPU::isInlinableLiteralI16(Val, HasInv2Pi);
1664 break;
1666 UseLit = AMDGPU::isInlinableLiteralV2I16(Val);
1667 break;
1677 UseLit = AMDGPU::isInlinableLiteral32(Val, HasInv2Pi);
1678 break;
1682 UseLit = AMDGPU::isInlinableLiteral64(Val << 32, HasInv2Pi);
1683 if (!UseLit)
1684 Val <<= 32;
1685 break;
1688 UseLit = AMDGPU::isInlinableLiteral64(Val, HasInv2Pi);
1689 break;
1691 // TODO: Disassembling V_DUAL_FMAMK_F32_X_FMAMK_F32_gfx11 hits
1692 // decoding a literal in a position of a register operand. Give
1693 // it special handling in the caller, decodeImmOperands(), instead
1694 // of quietly allowing it here.
1695 break;
1696 }
1697
1700 : MCOperand::createImm(Val);
1701}
1702
1704 assert(STI.hasFeature(AMDGPU::Feature64BitLiterals));
1705
1706 if (!HasLiteral) {
1707 if (Bytes.size() < 8) {
1708 return errOperand(0, "cannot read literal64, inst bytes left " +
1709 Twine(Bytes.size()));
1710 }
1711 HasLiteral = true;
1712 Literal = eatBytes<uint64_t>(Bytes);
1713 }
1714
1715 bool UseLit64 = Hi_32(Literal) == 0;
1716
1717 UseLit64 |= AMDGPU::isInlinableLiteral64(
1718 Literal, STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm));
1719
1721 LitModifier::Lit64, Literal, getContext()))
1722 : MCOperand::createImm(Literal);
1723}
1724
1726 using namespace AMDGPU::EncValues;
1727
1728 assert(Imm >= INLINE_INTEGER_C_MIN && Imm <= INLINE_INTEGER_C_MAX);
1729 return MCOperand::createImm((Imm <= INLINE_INTEGER_C_POSITIVE_MAX) ?
1730 (static_cast<int64_t>(Imm) - INLINE_INTEGER_C_MIN) :
1731 (INLINE_INTEGER_C_POSITIVE_MAX - static_cast<int64_t>(Imm)));
1732 // Cast prevents negative overflow.
1733}
1734
1735static int64_t getInlineImmVal32(unsigned Imm) {
1736 switch (Imm) {
1737 case 240:
1738 return llvm::bit_cast<uint32_t>(0.5f);
1739 case 241:
1740 return llvm::bit_cast<uint32_t>(-0.5f);
1741 case 242:
1742 return llvm::bit_cast<uint32_t>(1.0f);
1743 case 243:
1744 return llvm::bit_cast<uint32_t>(-1.0f);
1745 case 244:
1746 return llvm::bit_cast<uint32_t>(2.0f);
1747 case 245:
1748 return llvm::bit_cast<uint32_t>(-2.0f);
1749 case 246:
1750 return llvm::bit_cast<uint32_t>(4.0f);
1751 case 247:
1752 return llvm::bit_cast<uint32_t>(-4.0f);
1753 case 248: // 1 / (2 * PI)
1754 return 0x3e22f983;
1755 default:
1756 llvm_unreachable("invalid fp inline imm");
1757 }
1758}
1759
1760static int64_t getInlineImmVal64(unsigned Imm) {
1761 switch (Imm) {
1762 case 240:
1763 return llvm::bit_cast<uint64_t>(0.5);
1764 case 241:
1765 return llvm::bit_cast<uint64_t>(-0.5);
1766 case 242:
1767 return llvm::bit_cast<uint64_t>(1.0);
1768 case 243:
1769 return llvm::bit_cast<uint64_t>(-1.0);
1770 case 244:
1771 return llvm::bit_cast<uint64_t>(2.0);
1772 case 245:
1773 return llvm::bit_cast<uint64_t>(-2.0);
1774 case 246:
1775 return llvm::bit_cast<uint64_t>(4.0);
1776 case 247:
1777 return llvm::bit_cast<uint64_t>(-4.0);
1778 case 248: // 1 / (2 * PI)
1779 return 0x3fc45f306dc9c882;
1780 default:
1781 llvm_unreachable("invalid fp inline imm");
1782 }
1783}
1784
1785static int64_t getInlineImmValF16(unsigned Imm) {
1786 switch (Imm) {
1787 case 240:
1788 return 0x3800;
1789 case 241:
1790 return 0xB800;
1791 case 242:
1792 return 0x3C00;
1793 case 243:
1794 return 0xBC00;
1795 case 244:
1796 return 0x4000;
1797 case 245:
1798 return 0xC000;
1799 case 246:
1800 return 0x4400;
1801 case 247:
1802 return 0xC400;
1803 case 248: // 1 / (2 * PI)
1804 return 0x3118;
1805 default:
1806 llvm_unreachable("invalid fp inline imm");
1807 }
1808}
1809
1810static int64_t getInlineImmValBF16(unsigned Imm) {
1811 switch (Imm) {
1812 case 240:
1813 return 0x3F00;
1814 case 241:
1815 return 0xBF00;
1816 case 242:
1817 return 0x3F80;
1818 case 243:
1819 return 0xBF80;
1820 case 244:
1821 return 0x4000;
1822 case 245:
1823 return 0xC000;
1824 case 246:
1825 return 0x4080;
1826 case 247:
1827 return 0xC080;
1828 case 248: // 1 / (2 * PI)
1829 return 0x3E22;
1830 default:
1831 llvm_unreachable("invalid fp inline imm");
1832 }
1833}
1834
1835unsigned AMDGPUDisassembler::getVgprClassId(unsigned Width) const {
1836 using namespace AMDGPU;
1837
1838 switch (Width) {
1839 case 16:
1840 case 32:
1841 return VGPR_32RegClassID;
1842 case 64:
1843 return VReg_64RegClassID;
1844 case 96:
1845 return VReg_96RegClassID;
1846 case 128:
1847 return VReg_128RegClassID;
1848 case 160:
1849 return VReg_160RegClassID;
1850 case 192:
1851 return VReg_192RegClassID;
1852 case 256:
1853 return VReg_256RegClassID;
1854 case 288:
1855 return VReg_288RegClassID;
1856 case 320:
1857 return VReg_320RegClassID;
1858 case 352:
1859 return VReg_352RegClassID;
1860 case 384:
1861 return VReg_384RegClassID;
1862 case 512:
1863 return VReg_512RegClassID;
1864 case 1024:
1865 return VReg_1024RegClassID;
1866 }
1867 llvm_unreachable("Invalid register width!");
1868}
1869
1870unsigned AMDGPUDisassembler::getAgprClassId(unsigned Width) const {
1871 using namespace AMDGPU;
1872
1873 switch (Width) {
1874 case 16:
1875 case 32:
1876 return AGPR_32RegClassID;
1877 case 64:
1878 return AReg_64RegClassID;
1879 case 96:
1880 return AReg_96RegClassID;
1881 case 128:
1882 return AReg_128RegClassID;
1883 case 160:
1884 return AReg_160RegClassID;
1885 case 256:
1886 return AReg_256RegClassID;
1887 case 288:
1888 return AReg_288RegClassID;
1889 case 320:
1890 return AReg_320RegClassID;
1891 case 352:
1892 return AReg_352RegClassID;
1893 case 384:
1894 return AReg_384RegClassID;
1895 case 512:
1896 return AReg_512RegClassID;
1897 case 1024:
1898 return AReg_1024RegClassID;
1899 }
1900 llvm_unreachable("Invalid register width!");
1901}
1902
1903unsigned AMDGPUDisassembler::getSgprClassId(unsigned Width) const {
1904 using namespace AMDGPU;
1905
1906 switch (Width) {
1907 case 16:
1908 case 32:
1909 return SGPR_32RegClassID;
1910 case 64:
1911 return SGPR_64RegClassID;
1912 case 96:
1913 return SGPR_96RegClassID;
1914 case 128:
1915 return SGPR_128RegClassID;
1916 case 160:
1917 return SGPR_160RegClassID;
1918 case 256:
1919 return SGPR_256RegClassID;
1920 case 288:
1921 return SGPR_288RegClassID;
1922 case 320:
1923 return SGPR_320RegClassID;
1924 case 352:
1925 return SGPR_352RegClassID;
1926 case 384:
1927 return SGPR_384RegClassID;
1928 case 512:
1929 return SGPR_512RegClassID;
1930 }
1931 llvm_unreachable("Invalid register width!");
1932}
1933
1934unsigned AMDGPUDisassembler::getTtmpClassId(unsigned Width) const {
1935 using namespace AMDGPU;
1936
1937 switch (Width) {
1938 case 16:
1939 case 32:
1940 return TTMP_32RegClassID;
1941 case 64:
1942 return TTMP_64RegClassID;
1943 case 128:
1944 return TTMP_128RegClassID;
1945 case 256:
1946 return TTMP_256RegClassID;
1947 case 288:
1948 return TTMP_288RegClassID;
1949 case 320:
1950 return TTMP_320RegClassID;
1951 case 352:
1952 return TTMP_352RegClassID;
1953 case 384:
1954 return TTMP_384RegClassID;
1955 case 512:
1956 return TTMP_512RegClassID;
1957 }
1958 llvm_unreachable("Invalid register width!");
1959}
1960
1961int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
1962 using namespace AMDGPU::EncValues;
1963
1964 unsigned TTmpMin = isGFX9Plus() ? TTMP_GFX9PLUS_MIN : TTMP_VI_MIN;
1965 unsigned TTmpMax = isGFX9Plus() ? TTMP_GFX9PLUS_MAX : TTMP_VI_MAX;
1966
1967 return (TTmpMin <= Val && Val <= TTmpMax)? Val - TTmpMin : -1;
1968}
1969
1971 unsigned Val) const {
1972 using namespace AMDGPU::EncValues;
1973
1974 assert(Val < 1024); // enum10
1975
1976 bool IsAGPR = Val & 512;
1977 Val &= 511;
1978
1979 if (VGPR_MIN <= Val && Val <= VGPR_MAX) {
1980 return createRegOperand(IsAGPR ? getAgprClassId(Width)
1981 : getVgprClassId(Width), Val - VGPR_MIN);
1982 }
1983 return decodeNonVGPRSrcOp(Inst, Width, Val & 0xFF);
1984}
1985
1987 unsigned Width,
1988 unsigned Val) const {
1989 // Cases when Val{8} is 1 (vgpr, agpr or true 16 vgpr) should have been
1990 // decoded earlier.
1991 assert(Val < (1 << 8) && "9-bit Src encoding when Val{8} is 0");
1992 using namespace AMDGPU::EncValues;
1993
1994 if (Val <= SGPR_MAX) {
1995 // "SGPR_MIN <= Val" is always true and causes compilation warning.
1996 static_assert(SGPR_MIN == 0);
1997 return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
1998 }
1999
2000 int TTmpIdx = getTTmpIdx(Val);
2001 if (TTmpIdx >= 0) {
2002 return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
2003 }
2004
2005 if ((INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX) ||
2006 (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX) ||
2007 Val == LITERAL_CONST)
2008 return MCOperand::createImm(Val);
2009
2010 if (Val == LITERAL64_CONST && STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
2011 return decodeLiteral64Constant();
2012 }
2013
2014 switch (Width) {
2015 case 32:
2016 case 16:
2017 return decodeSpecialReg32(Val);
2018 case 64:
2019 return decodeSpecialReg64(Val);
2020 case 96:
2021 case 128:
2022 case 256:
2023 case 512:
2024 return decodeSpecialReg96Plus(Val);
2025 default:
2026 llvm_unreachable("unexpected immediate type");
2027 }
2028}
2029
2030// Bit 0 of DstY isn't stored in the instruction, because it's always the
2031// opposite of bit 0 of DstX.
2033 unsigned Val) const {
2034 int VDstXInd =
2035 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::vdstX);
2036 assert(VDstXInd != -1);
2037 assert(Inst.getOperand(VDstXInd).isReg());
2038 unsigned XDstReg = MRI.getEncodingValue(Inst.getOperand(VDstXInd).getReg());
2039 Val |= ~XDstReg & 1;
2040 return createRegOperand(getVgprClassId(32), Val);
2041}
2042
2044 using namespace AMDGPU;
2045
2046 switch (Val) {
2047 // clang-format off
2048 case 102: return createRegOperand(FLAT_SCR_LO);
2049 case 103: return createRegOperand(FLAT_SCR_HI);
2050 case 104: return createRegOperand(XNACK_MASK_LO);
2051 case 105: return createRegOperand(XNACK_MASK_HI);
2052 case 106: return createRegOperand(VCC_LO);
2053 case 107: return createRegOperand(VCC_HI);
2054 case 108: return createRegOperand(TBA_LO);
2055 case 109: return createRegOperand(TBA_HI);
2056 case 110: return createRegOperand(TMA_LO);
2057 case 111: return createRegOperand(TMA_HI);
2058 case 124:
2059 return isGFX11Plus() ? createRegOperand(SGPR_NULL) : createRegOperand(M0);
2060 case 125:
2061 return isGFX11Plus() ? createRegOperand(M0) : createRegOperand(SGPR_NULL);
2062 case 126: return createRegOperand(EXEC_LO);
2063 case 127: return createRegOperand(EXEC_HI);
2064 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2065 case 231: return createRegOperand(SRC_FLAT_SCRATCH_BASE_HI);
2066 case 235: return createRegOperand(SRC_SHARED_BASE_LO);
2067 case 236: return createRegOperand(SRC_SHARED_LIMIT_LO);
2068 case 237: return createRegOperand(SRC_PRIVATE_BASE_LO);
2069 case 238: return createRegOperand(SRC_PRIVATE_LIMIT_LO);
2070 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2071 case 251: return createRegOperand(SRC_VCCZ);
2072 case 252: return createRegOperand(SRC_EXECZ);
2073 case 253: return createRegOperand(SRC_SCC);
2074 case 254: return createRegOperand(LDS_DIRECT);
2075 default: break;
2076 // clang-format on
2077 }
2078 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2079}
2080
2082 using namespace AMDGPU;
2083
2084 switch (Val) {
2085 case 102: return createRegOperand(FLAT_SCR);
2086 case 104: return createRegOperand(XNACK_MASK);
2087 case 106: return createRegOperand(VCC);
2088 case 108: return createRegOperand(TBA);
2089 case 110: return createRegOperand(TMA);
2090 case 124:
2091 if (isGFX11Plus())
2092 return createRegOperand(SGPR_NULL);
2093 break;
2094 case 125:
2095 if (!isGFX11Plus())
2096 return createRegOperand(SGPR_NULL);
2097 break;
2098 case 126: return createRegOperand(EXEC);
2099 case 230: return createRegOperand(SRC_FLAT_SCRATCH_BASE_LO);
2100 case 235: return createRegOperand(SRC_SHARED_BASE);
2101 case 236: return createRegOperand(SRC_SHARED_LIMIT);
2102 case 237: return createRegOperand(SRC_PRIVATE_BASE);
2103 case 238: return createRegOperand(SRC_PRIVATE_LIMIT);
2104 case 239: return createRegOperand(SRC_POPS_EXITING_WAVE_ID);
2105 case 251: return createRegOperand(SRC_VCCZ);
2106 case 252: return createRegOperand(SRC_EXECZ);
2107 case 253: return createRegOperand(SRC_SCC);
2108 default: break;
2109 }
2110 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2111}
2112
2114 using namespace AMDGPU;
2115
2116 switch (Val) {
2117 case 124:
2118 if (isGFX11Plus())
2119 return createRegOperand(SGPR_NULL);
2120 break;
2121 case 125:
2122 if (!isGFX11Plus())
2123 return createRegOperand(SGPR_NULL);
2124 break;
2125 default:
2126 break;
2127 }
2128 return errOperand(Val, "unknown operand encoding " + Twine(Val));
2129}
2130
2132 const unsigned Val) const {
2133 using namespace AMDGPU::SDWA;
2134 using namespace AMDGPU::EncValues;
2135
2136 if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
2137 STI.hasFeature(AMDGPU::FeatureGFX10)) {
2138 // XXX: cast to int is needed to avoid stupid warning:
2139 // compare with unsigned is always true
2140 if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
2141 Val <= SDWA9EncValues::SRC_VGPR_MAX) {
2142 return createRegOperand(getVgprClassId(Width),
2143 Val - SDWA9EncValues::SRC_VGPR_MIN);
2144 }
2145 if (SDWA9EncValues::SRC_SGPR_MIN <= Val &&
2146 Val <= (isGFX10Plus() ? SDWA9EncValues::SRC_SGPR_MAX_GFX10
2147 : SDWA9EncValues::SRC_SGPR_MAX_SI)) {
2148 return createSRegOperand(getSgprClassId(Width),
2149 Val - SDWA9EncValues::SRC_SGPR_MIN);
2150 }
2151 if (SDWA9EncValues::SRC_TTMP_MIN <= Val &&
2152 Val <= SDWA9EncValues::SRC_TTMP_MAX) {
2153 return createSRegOperand(getTtmpClassId(Width),
2154 Val - SDWA9EncValues::SRC_TTMP_MIN);
2155 }
2156
2157 const unsigned SVal = Val - SDWA9EncValues::SRC_SGPR_MIN;
2158
2159 if ((INLINE_INTEGER_C_MIN <= SVal && SVal <= INLINE_INTEGER_C_MAX) ||
2160 (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX))
2161 return MCOperand::createImm(SVal);
2162
2163 return decodeSpecialReg32(SVal);
2164 }
2165 if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands))
2166 return createRegOperand(getVgprClassId(Width), Val);
2167 llvm_unreachable("unsupported target");
2168}
2169
2171 return decodeSDWASrc(16, Val);
2172}
2173
2175 return decodeSDWASrc(32, Val);
2176}
2177
2179 using namespace AMDGPU::SDWA;
2180
2181 assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
2182 STI.hasFeature(AMDGPU::FeatureGFX10)) &&
2183 "SDWAVopcDst should be present only on GFX9+");
2184
2185 bool IsWave32 = STI.hasFeature(AMDGPU::FeatureWavefrontSize32);
2186
2187 if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
2188 Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
2189
2190 int TTmpIdx = getTTmpIdx(Val);
2191 if (TTmpIdx >= 0) {
2192 auto TTmpClsId = getTtmpClassId(IsWave32 ? 32 : 64);
2193 return createSRegOperand(TTmpClsId, TTmpIdx);
2194 }
2195 if (Val > SGPR_MAX) {
2196 return IsWave32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
2197 }
2198 return createSRegOperand(getSgprClassId(IsWave32 ? 32 : 64), Val);
2199 }
2200 return createRegOperand(IsWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC);
2201}
2202
2204 unsigned Val) const {
2205 return STI.hasFeature(AMDGPU::FeatureWavefrontSize32)
2206 ? decodeSrcOp(Inst, 32, Val)
2207 : decodeSrcOp(Inst, 64, Val);
2208}
2209
2211 unsigned Val) const {
2212 return decodeSrcOp(Inst, 32, Val);
2213}
2214
2217 return MCOperand();
2218 return MCOperand::createImm(Val);
2219}
2220
2222 using VersionField = AMDGPU::EncodingField<7, 0>;
2223 using W64Bit = AMDGPU::EncodingBit<13>;
2224 using W32Bit = AMDGPU::EncodingBit<14>;
2225 using MDPBit = AMDGPU::EncodingBit<15>;
2227
2228 auto [Version, W64, W32, MDP] = Encoding::decode(Imm);
2229
2230 // Decode into a plain immediate if any unused bits are raised.
2231 if (Encoding::encode(Version, W64, W32, MDP) != Imm)
2232 return MCOperand::createImm(Imm);
2233
2234 const auto &Versions = AMDGPU::UCVersion::getGFXVersions();
2235 const auto *I = find_if(
2236 Versions, [Version = Version](const AMDGPU::UCVersion::GFXVersion &V) {
2237 return V.Code == Version;
2238 });
2239 MCContext &Ctx = getContext();
2240 const MCExpr *E;
2241 if (I == Versions.end())
2243 else
2244 E = MCSymbolRefExpr::create(Ctx.getOrCreateSymbol(I->Symbol), Ctx);
2245
2246 if (W64)
2247 E = MCBinaryExpr::createOr(E, UCVersionW64Expr, Ctx);
2248 if (W32)
2249 E = MCBinaryExpr::createOr(E, UCVersionW32Expr, Ctx);
2250 if (MDP)
2251 E = MCBinaryExpr::createOr(E, UCVersionMDPExpr, Ctx);
2252
2253 return MCOperand::createExpr(E);
2254}
2255
2257 return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
2258}
2259
2261
2263 return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
2264}
2265
2267
2269
2273
2275 return STI.hasFeature(AMDGPU::FeatureGFX11);
2276}
2277
2281
2283 return STI.hasFeature(AMDGPU::FeatureGFX11_7Insts);
2284}
2285
2287 return STI.hasFeature(AMDGPU::FeatureGFX12);
2288}
2289
2293
2295
2299
2301
2305
2307 return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
2308}
2309
2313
2314//===----------------------------------------------------------------------===//
2315// AMDGPU specific symbol handling
2316//===----------------------------------------------------------------------===//
2317
2318/// Print a string describing the reserved bit range specified by Mask with
2319/// offset BaseBytes for use in error comments. Mask is a single continuous
2320/// range of 1s surrounded by zeros. The format here is meant to align with the
2321/// tables that describe these bits in llvm.org/docs/AMDGPUUsage.html.
2322static SmallString<32> getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes) {
2323 SmallString<32> Result;
2324 raw_svector_ostream S(Result);
2325
2326 int TrailingZeros = llvm::countr_zero(Mask);
2327 int PopCount = llvm::popcount(Mask);
2328
2329 if (PopCount == 1) {
2330 S << "bit (" << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2331 } else {
2332 S << "bits in range ("
2333 << (TrailingZeros + PopCount - 1 + BaseBytes * CHAR_BIT) << ':'
2334 << (TrailingZeros + BaseBytes * CHAR_BIT) << ')';
2335 }
2336
2337 return Result;
2338}
2339
2340#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
2341#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2342 do { \
2343 KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
2344 } while (0)
2345#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
2346 do { \
2347 KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
2348 << GET_FIELD(MASK) << '\n'; \
2349 } while (0)
2350
2351#define CHECK_RESERVED_BITS_IMPL(MASK, DESC, MSG) \
2352 do { \
2353 if (FourByteBuffer & (MASK)) { \
2354 return createStringError(std::errc::invalid_argument, \
2355 "kernel descriptor " DESC \
2356 " reserved %s set" MSG, \
2357 getBitRangeFromMask((MASK), 0).c_str()); \
2358 } \
2359 } while (0)
2360
2361#define CHECK_RESERVED_BITS(MASK) CHECK_RESERVED_BITS_IMPL(MASK, #MASK, "")
2362#define CHECK_RESERVED_BITS_MSG(MASK, MSG) \
2363 CHECK_RESERVED_BITS_IMPL(MASK, #MASK, ", " MSG)
2364#define CHECK_RESERVED_BITS_DESC(MASK, DESC) \
2365 CHECK_RESERVED_BITS_IMPL(MASK, DESC, "")
2366#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG) \
2367 CHECK_RESERVED_BITS_IMPL(MASK, DESC, ", " MSG)
2368
2369// NOLINTNEXTLINE(readability-identifier-naming)
2371 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2372 using namespace amdhsa;
2373 StringRef Indent = "\t";
2374
2375 // We cannot accurately backward compute #VGPRs used from
2376 // GRANULATED_WORKITEM_VGPR_COUNT. But we are concerned with getting the same
2377 // value of GRANULATED_WORKITEM_VGPR_COUNT in the reassembled binary. So we
2378 // simply calculate the inverse of what the assembler does.
2379
2380 uint32_t GranulatedWorkitemVGPRCount =
2381 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
2382
2383 uint32_t NextFreeVGPR =
2384 (GranulatedWorkitemVGPRCount + 1) *
2385 AMDGPU::IsaInfo::getVGPREncodingGranule(STI, EnableWavefrontSize32);
2386
2387 KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
2388
2389 // We cannot backward compute values used to calculate
2390 // GRANULATED_WAVEFRONT_SGPR_COUNT. Hence the original values for following
2391 // directives can't be computed:
2392 // .amdhsa_reserve_vcc
2393 // .amdhsa_reserve_flat_scratch
2394 // .amdhsa_reserve_xnack_mask
2395 // They take their respective default values if not specified in the assembly.
2396 //
2397 // GRANULATED_WAVEFRONT_SGPR_COUNT
2398 // = f(NEXT_FREE_SGPR + VCC + FLAT_SCRATCH + XNACK_MASK)
2399 //
2400 // We compute the inverse as though all directives apart from NEXT_FREE_SGPR
2401 // are set to 0. So while disassembling we consider that:
2402 //
2403 // GRANULATED_WAVEFRONT_SGPR_COUNT
2404 // = f(NEXT_FREE_SGPR + 0 + 0 + 0)
2405 //
2406 // The disassembler cannot recover the original values of those 3 directives.
2407
2408 uint32_t GranulatedWavefrontSGPRCount =
2409 GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
2410
2411 if (isGFX10Plus())
2412 CHECK_RESERVED_BITS_MSG(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
2413 "must be zero on gfx10+");
2414
2415 uint32_t NextFreeSGPR = (GranulatedWavefrontSGPRCount + 1) *
2417
2418 KdStream << Indent << ".amdhsa_reserve_vcc " << 0 << '\n';
2420 KdStream << Indent << ".amdhsa_reserve_flat_scratch " << 0 << '\n';
2421 bool ReservedXnackMask = STI.hasFeature(AMDGPU::FeatureXNACK);
2422 assert(!ReservedXnackMask || STI.hasFeature(AMDGPU::FeatureSupportsXNACK));
2423 KdStream << Indent << ".amdhsa_reserve_xnack_mask " << ReservedXnackMask
2424 << '\n';
2425 KdStream << Indent << ".amdhsa_next_free_sgpr " << NextFreeSGPR << "\n";
2426
2427 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIORITY);
2428
2429 PRINT_DIRECTIVE(".amdhsa_float_round_mode_32",
2430 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
2431 PRINT_DIRECTIVE(".amdhsa_float_round_mode_16_64",
2432 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
2433 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_32",
2434 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
2435 PRINT_DIRECTIVE(".amdhsa_float_denorm_mode_16_64",
2436 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
2437
2438 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_PRIV);
2439
2440 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2441 PRINT_DIRECTIVE(".amdhsa_dx10_clamp",
2442 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP);
2443
2444 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_DEBUG_MODE);
2445
2446 if (STI.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
2447 PRINT_DIRECTIVE(".amdhsa_ieee_mode",
2448 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE);
2449
2450 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_BULKY);
2451 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC1_CDBG_USER);
2452
2453 // Bits [26].
2454 if (isGFX9Plus()) {
2455 PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
2456 } else {
2457 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0,
2458 "COMPUTE_PGM_RSRC1", "must be zero pre-gfx9");
2459 }
2460
2461 // Bits [27].
2462 if (isGFX1250Plus()) {
2463 PRINT_PSEUDO_DIRECTIVE_COMMENT("FLAT_SCRATCH_IS_NV",
2464 COMPUTE_PGM_RSRC1_GFX125_FLAT_SCRATCH_IS_NV);
2465 } else {
2466 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX120_RESERVED1,
2467 "COMPUTE_PGM_RSRC1");
2468 }
2469
2470 // Bits [28].
2471 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_RESERVED2, "COMPUTE_PGM_RSRC1");
2472
2473 // Bits [29-31].
2474 if (isGFX10Plus()) {
2475 // WGP_MODE is not available on GFX1250.
2476 if (!isGFX1250Plus()) {
2477 PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
2478 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
2479 }
2480 PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
2481 PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
2482 } else {
2483 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED3,
2484 "COMPUTE_PGM_RSRC1");
2485 }
2486
2487 if (isGFX12Plus())
2488 PRINT_DIRECTIVE(".amdhsa_round_robin_scheduling",
2489 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN);
2490
2491 return true;
2492}
2493
2494// NOLINTNEXTLINE(readability-identifier-naming)
2496 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2497 using namespace amdhsa;
2498 StringRef Indent = "\t";
2500 PRINT_DIRECTIVE(".amdhsa_enable_private_segment",
2501 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2502 else
2503 PRINT_DIRECTIVE(".amdhsa_system_sgpr_private_segment_wavefront_offset",
2504 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT);
2505 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_x",
2506 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
2507 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_y",
2508 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
2509 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_id_z",
2510 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
2511 PRINT_DIRECTIVE(".amdhsa_system_sgpr_workgroup_info",
2512 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
2513 PRINT_DIRECTIVE(".amdhsa_system_vgpr_workitem_id",
2514 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
2515
2516 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_ADDRESS_WATCH);
2517 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_MEMORY);
2518 CHECK_RESERVED_BITS(COMPUTE_PGM_RSRC2_GRANULATED_LDS_SIZE);
2519
2521 ".amdhsa_exception_fp_ieee_invalid_op",
2522 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
2523 PRINT_DIRECTIVE(".amdhsa_exception_fp_denorm_src",
2524 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
2526 ".amdhsa_exception_fp_ieee_div_zero",
2527 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
2528 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_overflow",
2529 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
2530 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_underflow",
2531 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
2532 PRINT_DIRECTIVE(".amdhsa_exception_fp_ieee_inexact",
2533 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
2534 PRINT_DIRECTIVE(".amdhsa_exception_int_div_zero",
2535 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
2536
2537 CHECK_RESERVED_BITS_DESC(COMPUTE_PGM_RSRC2_RESERVED0, "COMPUTE_PGM_RSRC2");
2538
2539 return true;
2540}
2541
2542// NOLINTNEXTLINE(readability-identifier-naming)
2544 uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
2545 using namespace amdhsa;
2546 StringRef Indent = "\t";
2547 if (isGFX90A()) {
2548 KdStream << Indent << ".amdhsa_accum_offset "
2549 << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
2550 << '\n';
2551
2552 PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
2553
2554 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED0,
2555 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2556 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX90A_RESERVED1,
2557 "COMPUTE_PGM_RSRC3", "must be zero on gfx90a");
2558 } else if (isGFX10Plus()) {
2559 // Bits [0-3].
2560 if (!isGFX12Plus()) {
2561 if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
2562 PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
2563 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2564 } else {
2566 "SHARED_VGPR_COUNT",
2567 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT);
2568 }
2569 } else {
2570 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX12_PLUS_RESERVED0,
2571 "COMPUTE_PGM_RSRC3",
2572 "must be zero on gfx12+");
2573 }
2574
2575 // Bits [4-11].
2576 if (isGFX11()) {
2577 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2578 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
2579 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
2580 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
2581 PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
2582 COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
2583 } else if (isGFX12Plus()) {
2584 PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
2585 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
2586 } else {
2587 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
2588 "COMPUTE_PGM_RSRC3",
2589 "must be zero on gfx10");
2590 }
2591
2592 // Bits [12].
2593 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED2,
2594 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2595
2596 // Bits [13].
2597 if (isGFX12Plus()) {
2599 COMPUTE_PGM_RSRC3_GFX12_PLUS_GLG_EN);
2600 } else {
2601 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX11_RESERVED3,
2602 "COMPUTE_PGM_RSRC3",
2603 "must be zero on gfx10 or gfx11");
2604 }
2605
2606 // Bits [14-21].
2607 if (isGFX1250Plus()) {
2608 PRINT_DIRECTIVE(".amdhsa_named_barrier_count",
2609 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT);
2611 "ENABLE_DYNAMIC_VGPR", COMPUTE_PGM_RSRC3_GFX125_ENABLE_DYNAMIC_VGPR);
2613 COMPUTE_PGM_RSRC3_GFX125_TCP_SPLIT);
2615 "ENABLE_DIDT_THROTTLE",
2616 COMPUTE_PGM_RSRC3_GFX125_ENABLE_DIDT_THROTTLE);
2617 } else {
2618 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_GFX120_RESERVED4,
2619 "COMPUTE_PGM_RSRC3",
2620 "must be zero on gfx10+");
2621 }
2622
2623 // Bits [22-30].
2624 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED5,
2625 "COMPUTE_PGM_RSRC3", "must be zero on gfx10+");
2626
2627 // Bits [31].
2628 if (isGFX11Plus()) {
2630 COMPUTE_PGM_RSRC3_GFX11_PLUS_IMAGE_OP);
2631 } else {
2632 CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED6,
2633 "COMPUTE_PGM_RSRC3",
2634 "must be zero on gfx10");
2635 }
2636 } else if (FourByteBuffer) {
2637 return createStringError(
2638 std::errc::invalid_argument,
2639 "kernel descriptor COMPUTE_PGM_RSRC3 must be all zero before gfx9");
2640 }
2641 return true;
2642}
2643#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
2644#undef PRINT_DIRECTIVE
2645#undef GET_FIELD
2646#undef CHECK_RESERVED_BITS_IMPL
2647#undef CHECK_RESERVED_BITS
2648#undef CHECK_RESERVED_BITS_MSG
2649#undef CHECK_RESERVED_BITS_DESC
2650#undef CHECK_RESERVED_BITS_DESC_MSG
2651
2652/// Create an error object to return from onSymbolStart for reserved kernel
2653/// descriptor bits being set.
2654static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes,
2655 const char *Msg = "") {
2656 return createStringError(
2657 std::errc::invalid_argument, "kernel descriptor reserved %s set%s%s",
2658 getBitRangeFromMask(Mask, BaseBytes).c_str(), *Msg ? ", " : "", Msg);
2659}
2660
2661/// Create an error object to return from onSymbolStart for reserved kernel
2662/// descriptor bytes being set.
2663static Error createReservedKDBytesError(unsigned BaseInBytes,
2664 unsigned WidthInBytes) {
2665 // Create an error comment in the same format as the "Kernel Descriptor"
2666 // table here: https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor .
2667 return createStringError(
2668 std::errc::invalid_argument,
2669 "kernel descriptor reserved bits in range (%u:%u) set",
2670 (BaseInBytes + WidthInBytes) * CHAR_BIT - 1, BaseInBytes * CHAR_BIT);
2671}
2672
2675 raw_string_ostream &KdStream) const {
2676#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
2677 do { \
2678 KdStream << Indent << DIRECTIVE " " \
2679 << ((TwoByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
2680 } while (0)
2681
2682 uint16_t TwoByteBuffer = 0;
2683 uint32_t FourByteBuffer = 0;
2684
2685 StringRef ReservedBytes;
2686 StringRef Indent = "\t";
2687
2688 assert(Bytes.size() == 64);
2689 DataExtractor DE(Bytes, /*IsLittleEndian=*/true);
2690
2691 switch (Cursor.tell()) {
2693 FourByteBuffer = DE.getU32(Cursor);
2694 KdStream << Indent << ".amdhsa_group_segment_fixed_size " << FourByteBuffer
2695 << '\n';
2696 return true;
2697
2699 FourByteBuffer = DE.getU32(Cursor);
2700 KdStream << Indent << ".amdhsa_private_segment_fixed_size "
2701 << FourByteBuffer << '\n';
2702 return true;
2703
2705 FourByteBuffer = DE.getU32(Cursor);
2706 KdStream << Indent << ".amdhsa_kernarg_size "
2707 << FourByteBuffer << '\n';
2708 return true;
2709
2711 // 4 reserved bytes, must be 0.
2712 ReservedBytes = DE.getBytes(Cursor, 4);
2713 for (char B : ReservedBytes) {
2714 if (B != 0)
2716 }
2717 return true;
2718
2720 // KERNEL_CODE_ENTRY_BYTE_OFFSET
2721 // So far no directive controls this for Code Object V3, so simply skip for
2722 // disassembly.
2723 DE.skip(Cursor, 8);
2724 return true;
2725
2727 // 20 reserved bytes, must be 0.
2728 ReservedBytes = DE.getBytes(Cursor, 20);
2729 for (char B : ReservedBytes) {
2730 if (B != 0)
2732 }
2733 return true;
2734
2736 FourByteBuffer = DE.getU32(Cursor);
2737 return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
2738
2740 FourByteBuffer = DE.getU32(Cursor);
2741 return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
2742
2744 FourByteBuffer = DE.getU32(Cursor);
2745 return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
2746
2748 using namespace amdhsa;
2749 TwoByteBuffer = DE.getU16(Cursor);
2750
2752 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_buffer",
2753 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
2754 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_ptr",
2755 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
2756 PRINT_DIRECTIVE(".amdhsa_user_sgpr_queue_ptr",
2757 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
2758 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_segment_ptr",
2759 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
2760 PRINT_DIRECTIVE(".amdhsa_user_sgpr_dispatch_id",
2761 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
2763 PRINT_DIRECTIVE(".amdhsa_user_sgpr_flat_scratch_init",
2764 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
2765 PRINT_DIRECTIVE(".amdhsa_user_sgpr_private_segment_size",
2766 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
2767
2768 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED0)
2769 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED0,
2771
2772 // Reserved for GFX9
2773 if (isGFX9() &&
2774 (TwoByteBuffer & KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32)) {
2776 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
2777 amdhsa::KERNEL_CODE_PROPERTIES_OFFSET, "must be zero on gfx9");
2778 }
2779 if (isGFX10Plus()) {
2780 PRINT_DIRECTIVE(".amdhsa_wavefront_size32",
2781 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2782 }
2783
2784 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
2785 PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
2786 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
2787
2788 if (TwoByteBuffer & KERNEL_CODE_PROPERTY_RESERVED1) {
2789 return createReservedKDBitsError(KERNEL_CODE_PROPERTY_RESERVED1,
2791 }
2792
2793 return true;
2794
2796 using namespace amdhsa;
2797 TwoByteBuffer = DE.getU16(Cursor);
2798 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_LENGTH) {
2799 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_length",
2800 KERNARG_PRELOAD_SPEC_LENGTH);
2801 }
2802
2803 if (TwoByteBuffer & KERNARG_PRELOAD_SPEC_OFFSET) {
2804 PRINT_DIRECTIVE(".amdhsa_user_sgpr_kernarg_preload_offset",
2805 KERNARG_PRELOAD_SPEC_OFFSET);
2806 }
2807 return true;
2808
2810 // 4 bytes from here are reserved, must be 0.
2811 ReservedBytes = DE.getBytes(Cursor, 4);
2812 for (char B : ReservedBytes) {
2813 if (B != 0)
2815 }
2816 return true;
2817
2818 default:
2819 llvm_unreachable("Unhandled index. Case statements cover everything.");
2820 return true;
2821 }
2822#undef PRINT_DIRECTIVE
2823}
2824
2826 StringRef KdName, ArrayRef<uint8_t> Bytes, uint64_t KdAddress) const {
2827
2828 // CP microcode requires the kernel descriptor to be 64 aligned.
2829 if (Bytes.size() != 64 || KdAddress % 64 != 0)
2830 return createStringError(std::errc::invalid_argument,
2831 "kernel descriptor must be 64-byte aligned");
2832
2833 // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
2834 // requires us to know the setting of .amdhsa_wavefront_size32 in order to
2835 // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
2836 // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
2837 // when required.
2838 if (isGFX10Plus()) {
2839 uint16_t KernelCodeProperties =
2842 EnableWavefrontSize32 =
2843 AMDHSA_BITS_GET(KernelCodeProperties,
2844 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
2845 }
2846
2847 std::string Kd;
2848 raw_string_ostream KdStream(Kd);
2849 KdStream << ".amdhsa_kernel " << KdName << '\n';
2850
2852 while (C && C.tell() < Bytes.size()) {
2853 Expected<bool> Res = decodeKernelDescriptorDirective(C, Bytes, KdStream);
2854
2855 cantFail(C.takeError());
2856
2857 if (!Res)
2858 return Res;
2859 }
2860 KdStream << ".end_amdhsa_kernel\n";
2861 outs() << KdStream.str();
2862 return true;
2863}
2864
2866 uint64_t &Size,
2867 ArrayRef<uint8_t> Bytes,
2868 uint64_t Address) const {
2869 // Right now only kernel descriptor needs to be handled.
2870 // We ignore all other symbols for target specific handling.
2871 // TODO:
2872 // Fix the spurious symbol issue for AMDGPU kernels. Exists for both Code
2873 // Object V2 and V3 when symbols are marked protected.
2874
2875 // amd_kernel_code_t for Code Object V2.
2876 if (Symbol.Type == ELF::STT_AMDGPU_HSA_KERNEL) {
2877 Size = 256;
2878 return createStringError(std::errc::invalid_argument,
2879 "code object v2 is not supported");
2880 }
2881
2882 // Code Object V3 kernel descriptors.
2883 StringRef Name = Symbol.Name;
2884 if (Symbol.Type == ELF::STT_OBJECT && Name.ends_with(StringRef(".kd"))) {
2885 Size = 64; // Size = 64 regardless of success or failure.
2886 return decodeKernelDescriptor(Name.drop_back(3), Bytes, Address);
2887 }
2888
2889 return false;
2890}
2891
2892const MCExpr *AMDGPUDisassembler::createConstantSymbolExpr(StringRef Id,
2893 int64_t Val) {
2894 MCContext &Ctx = getContext();
2895 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2896 // Note: only set value to Val on a new symbol in case an dissassembler
2897 // has already been initialized in this context.
2898 if (!Sym->isVariable()) {
2900 } else {
2901 int64_t Res = ~Val;
2902 bool Valid = Sym->getVariableValue()->evaluateAsAbsolute(Res);
2903 if (!Valid || Res != Val)
2904 Ctx.reportWarning(SMLoc(), "unsupported redefinition of " + Id);
2905 }
2906 return MCSymbolRefExpr::create(Sym, Ctx);
2907}
2908
2910 const uint64_t TSFlags = MCII->get(MI.getOpcode()).TSFlags;
2911
2912 // Check for MUBUF and MTBUF instructions
2913 if (TSFlags & (SIInstrFlags::MTBUF | SIInstrFlags::MUBUF))
2914 return true;
2915
2916 // Check for SMEM buffer instructions (S_BUFFER_* instructions)
2917 if ((TSFlags & SIInstrFlags::SMRD) && AMDGPU::getSMEMIsBuffer(MI.getOpcode()))
2918 return true;
2919
2920 return false;
2921}
2922
2923//===----------------------------------------------------------------------===//
2924// AMDGPUSymbolizer
2925//===----------------------------------------------------------------------===//
2926
2927// Try to find symbol name for specified label
2929 MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
2930 uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
2931 uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
2932
2933 if (!IsBranch) {
2934 return false;
2935 }
2936
2937 auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
2938 if (!Symbols)
2939 return false;
2940
2941 auto Result = llvm::find_if(*Symbols, [Value](const SymbolInfoTy &Val) {
2942 return Val.Addr == static_cast<uint64_t>(Value) &&
2943 Val.Type == ELF::STT_NOTYPE;
2944 });
2945 if (Result != Symbols->end()) {
2946 auto *Sym = Ctx.getOrCreateSymbol(Result->Name);
2947 const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
2949 return true;
2950 }
2951 // Add to list of referenced addresses, so caller can synthesize a label.
2952 ReferencedAddresses.push_back(static_cast<uint64_t>(Value));
2953 return false;
2954}
2955
2957 int64_t Value,
2958 uint64_t Address) {
2959 llvm_unreachable("unimplemented");
2960}
2961
2962//===----------------------------------------------------------------------===//
2963// Initialization
2964//===----------------------------------------------------------------------===//
2965
2967 LLVMOpInfoCallback /*GetOpInfo*/,
2968 LLVMSymbolLookupCallback /*SymbolLookUp*/,
2969 void *DisInfo,
2970 MCContext *Ctx,
2971 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
2972 return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
2973}
2974
2976 const MCSubtargetInfo &STI,
2977 MCContext &Ctx) {
2978 return new AMDGPUDisassembler(STI, Ctx, T.createMCInstrInfo());
2979}
2980
2981extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
MCDisassembler::DecodeStatus DecodeStatus
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
#define CHECK_RESERVED_BITS_DESC(MASK, DESC)
static VOPModifiers collectVOPModifiers(const MCInst &MI, bool IsVOP3P=false)
static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op, AMDGPU::OpName Name)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUDisassembler()
static DecodeStatus decodeOperand_VSrcT16_Lo128(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_KImmFP64(MCInst &Inst, uint64_t Imm, uint64_t Addr, const MCDisassembler *Decoder)
static SmallString< 32 > getBitRangeFromMask(uint32_t Mask, unsigned BaseBytes)
Print a string describing the reserved bit range specified by Mask with offset BaseBytes for use in e...
#define DECODE_OPERAND_SREG_8(RegClass, OpWidth)
static DecodeStatus decodeSMEMOffset(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static std::bitset< 128 > eat16Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeVersionImm(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define DECODE_OPERAND_SREG_7(RegClass, OpWidth)
static DecodeStatus decodeSrcA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK)
static DecodeStatus decodeSrcOp(MCInst &Inst, unsigned EncSize, unsigned OpWidth, unsigned Imm, unsigned EncImm, const MCDisassembler *Decoder)
static DecodeStatus decodeDpp8FI(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeOperand_VSrc_f64(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static MCRegister CheckVGPROverflow(MCRegister Reg, const MCRegisterClass &RC, const MCRegisterInfo &MRI)
static int64_t getInlineImmValBF16(unsigned Imm)
#define DECODE_SDWA(DecName)
static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
#define DECODE_OPERAND_REG_8(RegClass)
#define PRINT_DIRECTIVE(DIRECTIVE, MASK)
static DecodeStatus decodeSrcRegOrImm9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus decodeSrcReg9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static int64_t getInlineImmVal32(unsigned Imm)
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
#define CHECK_RESERVED_BITS(MASK)
static DecodeStatus decodeSrcAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define SGPR_MAX
static int64_t getInlineImmVal64(unsigned Imm)
static T eatBytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAVLdSt(MCInst &Inst, unsigned Imm, unsigned Opw, const MCDisassembler *Decoder)
static MCDisassembler * createAMDGPUDisassembler(const Target &T, const MCSubtargetInfo &STI, MCContext &Ctx)
static DecodeStatus decodeSrcRegOrImmA9(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static DecodeStatus DecodeVGPR_16_Lo128RegisterClass(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_MSG(MASK, MSG)
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val, uint64_t Addr, const void *Decoder)
static MCSymbolizer * createAMDGPUSymbolizer(const Triple &, LLVMOpInfoCallback, LLVMSymbolLookupCallback, void *DisInfo, MCContext *Ctx, std::unique_ptr< MCRelocationInfo > &&RelInfo)
static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static int64_t getInlineImmValF16(unsigned Imm)
#define GET_FIELD(MASK)
static std::bitset< 96 > eat12Bytes(ArrayRef< uint8_t > &Bytes)
static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
static Error createReservedKDBytesError(unsigned BaseInBytes, unsigned WidthInBytes)
Create an error object to return from onSymbolStart for reserved kernel descriptor bytes being set.
static DecodeStatus decodeSplitBarrier(MCInst &Inst, unsigned Val, uint64_t Addr, const MCDisassembler *Decoder)
static DecodeStatus decodeAV10(MCInst &Inst, unsigned Imm, uint64_t, const MCDisassembler *Decoder)
#define CHECK_RESERVED_BITS_DESC_MSG(MASK, DESC, MSG)
static Error createReservedKDBitsError(uint32_t Mask, unsigned BaseBytes, const char *Msg="")
Create an error object to return from onSymbolStart for reserved kernel descriptor bits being set.
static void adjustMFMA_F8F6F4OpRegClass(const MCRegisterInfo &MRI, MCOperand &MO, uint8_t NumRegs)
Adjust the register values used by V_MFMA_F8F6F4_f8_f8 instructions to the appropriate subregister fo...
This file contains declaration for AMDGPU ISA disassembler.
Provides AMDGPU specific target descriptions.
AMDHSA kernel descriptor definitions.
#define AMDHSA_BITS_GET(SRC, MSK)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
#define T
MachineInstr unsigned OpIdx
Interface definition for SIRegisterInfo.
MCOperand decodeNonVGPRSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeLiteral64Constant() const
void convertVOPC64DPPInst(MCInst &MI) const
bool isBufferInstruction(const MCInst &MI) const
Check if the instruction is a buffer operation (MUBUF, MTBUF, or S_BUFFER)
void convertEXPInst(MCInst &MI) const
MCOperand decodeSpecialReg64(unsigned Val) const
const char * getRegClassName(unsigned RegClassID) const
Expected< bool > decodeCOMPUTE_PGM_RSRC1(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC1.
MCOperand decodeSplitBarrier(const MCInst &Inst, unsigned Val) const
Expected< bool > decodeKernelDescriptorDirective(DataExtractor::Cursor &Cursor, ArrayRef< uint8_t > Bytes, raw_string_ostream &KdStream) const
void convertVOPCDPPInst(MCInst &MI) const
MCOperand decodeSpecialReg96Plus(unsigned Val) const
MCOperand decodeSDWASrc32(unsigned Val) const
void setABIVersion(unsigned Version) override
ELF-specific, set the ABI version from the object header.
Expected< bool > decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC2.
unsigned getAgprClassId(unsigned Width) const
MCOperand decodeDpp8FI(unsigned Val) const
MCOperand decodeSDWASrc(unsigned Width, unsigned Val) const
void convertFMAanyK(MCInst &MI) const
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst, uint64_t Address, raw_ostream &Comments) const
void convertMacDPPInst(MCInst &MI) const
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const
void convertDPP8Inst(MCInst &MI) const
MCOperand createVGPR16Operand(unsigned RegIdx, bool IsHi) const
MCOperand errOperand(unsigned V, const Twine &ErrMsg) const
MCOperand decodeVersionImm(unsigned Imm) const
Expected< bool > decodeKernelDescriptor(StringRef KdName, ArrayRef< uint8_t > Bytes, uint64_t KdAddress) const
void convertVOP3DPPInst(MCInst &MI) const
void convertTrue16OpSel(MCInst &MI) const
MCOperand decodeSrcOp(const MCInst &Inst, unsigned Width, unsigned Val) const
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const
MCOperand decodeLiteralConstant(const MCInstrDesc &Desc, const MCOperandInfo &OpDesc) const
Expected< bool > decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer, raw_string_ostream &KdStream) const
Decode as directives that handle COMPUTE_PGM_RSRC3.
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, MCInstrInfo const *MCII)
MCOperand decodeSpecialReg32(unsigned Val) const
MCOperand createRegOperand(MCRegister Reg) const
MCOperand decodeSDWAVopcDst(unsigned Val) const
void convertVINTERPInst(MCInst &MI) const
void convertSDWAInst(MCInst &MI) const
unsigned getSgprClassId(unsigned Width) const
static MCOperand decodeIntImmed(unsigned Imm)
void convertWMMAInst(MCInst &MI) const
MCOperand decodeBoolReg(const MCInst &Inst, unsigned Val) const
unsigned getVgprClassId(unsigned Width) const
void convertMAIInst(MCInst &MI) const
f8f6f4 instructions have different pseudos depending on the used formats.
unsigned getTtmpClassId(unsigned Width) const
DecodeStatus getInstruction(MCInst &MI, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address, raw_ostream &CS) const override
Returns the disassembly of a single instruction.
MCOperand decodeMandatoryLiteral64Constant(uint64_t Imm) const
void convertMIMGInst(MCInst &MI) const
bool isMacDPP(MCInst &MI) const
int getTTmpIdx(unsigned Val) const
void convertVOP3PDPPInst(MCInst &MI) const
MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const
MCOperand decodeSDWASrc16(unsigned Val) const
Expected< bool > onSymbolStart(SymbolInfoTy &Symbol, uint64_t &Size, ArrayRef< uint8_t > Bytes, uint64_t Address) const override
Used to perform separate target specific disassembly for a particular symbol.
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) override
Try to add a symbolic operand instead of Value to the MCInst.
void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) override
Try to add a comment on the PC-relative load.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
const T * data() const
Definition ArrayRef.h:138
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185
A class representing a position in a DataExtractor, as well as any error encountered during extractio...
LLVM_ABI uint32_t getU32(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint32_t value from *offset_ptr.
LLVM_ABI uint16_t getU16(uint64_t *offset_ptr, Error *Err=nullptr) const
Extract a uint16_t value from *offset_ptr.
LLVM_ABI void skip(Cursor &C, uint64_t Length) const
Advance the Cursor position by the given number of bytes.
LLVM_ABI StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length, Error *Err=nullptr) const
Extract a fixed number of bytes from the specified offset.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
Tagged union holding either a T or a Error.
Definition Error.h:485
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
const MCRegisterInfo * getRegisterInfo() const
Definition MCContext.h:411
Superclass for all disassemblers.
MCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
MCContext & getContext() const
const MCSubtargetInfo & STI
raw_ostream * CommentStream
DecodeStatus
Ternary decode status.
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getOpcode() const
Definition MCInst.h:202
void addOperand(const MCOperand Op)
Definition MCInst.h:215
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
Interface to description of machine instruction set.
Definition MCInstrInfo.h:27
This holds information about one operand of a machine instruction, indicating the register class for ...
Definition MCInstrDesc.h:86
uint8_t OperandType
Information about the type of the operand.
Definition MCInstrDesc.h:98
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
bool isValid() const
Definition MCInst.h:64
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getSizeInBits() const
Return the size of the physical register in bits if we are able to determine it.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
MCRegister getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const
Return a super-register of the specified register Reg so its sub-register of index SubIdx is Reg.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
Generic base class for all target subtargets.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
Symbolize and annotate disassembled instructions.
Represents a location in source code.
Definition SMLoc.h:22
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM Value Representation.
Definition Value.h:75
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
A raw_ostream that writes to an std::string.
std::string & str()
Returns the string's reference.
A raw_ostream that writes to an SmallVector or SmallString.
const char *(* LLVMSymbolLookupCallback)(void *DisInfo, uint64_t ReferenceValue, uint64_t *ReferenceType, uint64_t ReferencePC, const char **ReferenceName)
The type for the symbol lookup function.
int(* LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t OpSize, uint64_t InstSize, int TagType, void *TagBuf)
The type for the operand information call back function.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
ArrayRef< GFXVersion > getGFXVersions()
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
EncodingField< Bit, Bit, D > EncodingBit
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isGFX10(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2BF16(uint32_t Literal)
bool isGFX12Plus(const MCSubtargetInfo &STI)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
bool isVOPC64DPP(unsigned Opc)
unsigned getAMDHSACodeObjectVersion(const Module &M)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
const MFMA_F8F6F4_Info * getWMMA_F8F6F4_WithFormatArgs(unsigned FmtA, unsigned FmtB, unsigned F8F8Opcode)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isGFX10Plus(const MCSubtargetInfo &STI)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:236
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:206
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:213
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:227
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:229
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:215
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:205
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:212
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:211
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:214
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:223
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:221
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:216
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:209
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:230
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:242
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:217
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:208
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:226
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:228
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:218
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:243
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:225
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:207
bool hasGDS(const MCSubtargetInfo &STI)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
bool isGFX1250Plus(const MCSubtargetInfo &STI)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
const MFMA_F8F6F4_Info * getMFMA_F8F6F4_WithFormatArgs(unsigned CBSZ, unsigned BLGP, unsigned F8F8Opcode)
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_NOTYPE
Definition ELF.h:1419
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433
@ STT_OBJECT
Definition ELF.h:1420
value_type read(const void *memory, endianness endian)
Read a value of a particular endianness from memory.
Definition Endian.h:60
uint16_t read16(const void *P, endianness E)
Definition Endian.h:409
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
LLVM_ABI raw_fd_ostream & outs()
This returns a reference to a raw_fd_ostream for standard output.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
Definition Error.h:1321
Op::Description Desc
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
Definition Error.h:769
Target & getTheGCNTarget()
The target for GCN GPUs.
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Add
Sum of integers.
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition VE.h:376
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
std::vector< SymbolInfoTy > SectionSymbolsTy
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
static void RegisterMCSymbolizer(Target &T, Target::MCSymbolizerCtorTy Fn)
RegisterMCSymbolizer - Register an MCSymbolizer implementation for the given target.
static void RegisterMCDisassembler(Target &T, Target::MCDisassemblerCtorTy Fn)
RegisterMCDisassembler - Register a MCDisassembler implementation for the given target.