LLVM 23.0.0git
AMDGPUAsmParser.cpp
Go to the documentation of this file.
1//===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDKernelCodeT.h"
16#include "SIDefines.h"
17#include "SIInstrInfo.h"
22#include "llvm/ADT/APFloat.h"
24#include "llvm/ADT/StringSet.h"
25#include "llvm/ADT/Twine.h"
28#include "llvm/MC/MCAsmInfo.h"
29#include "llvm/MC/MCContext.h"
30#include "llvm/MC/MCExpr.h"
31#include "llvm/MC/MCInst.h"
32#include "llvm/MC/MCInstrDesc.h"
38#include "llvm/MC/MCSymbol.h"
46#include <optional>
47
48using namespace llvm;
49using namespace llvm::AMDGPU;
50using namespace llvm::amdhsa;
51
52namespace {
53
54class AMDGPUAsmParser;
55
56enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
57
58//===----------------------------------------------------------------------===//
59// Operand
60//===----------------------------------------------------------------------===//
61
62class AMDGPUOperand : public MCParsedAsmOperand {
63 enum KindTy {
64 Token,
65 Immediate,
66 Register,
67 Expression
68 } Kind;
69
70 SMLoc StartLoc, EndLoc;
71 const AMDGPUAsmParser *AsmParser;
72
73public:
74 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
75 : Kind(Kind_), AsmParser(AsmParser_) {}
76
77 using Ptr = std::unique_ptr<AMDGPUOperand>;
78
79 struct Modifiers {
80 bool Abs = false;
81 bool Neg = false;
82 bool Sext = false;
83 LitModifier Lit = LitModifier::None;
84
85 bool hasFPModifiers() const { return Abs || Neg; }
86 bool hasIntModifiers() const { return Sext; }
87 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
88
89 int64_t getFPModifiersOperand() const {
90 int64_t Operand = 0;
91 Operand |= Abs ? SISrcMods::ABS : 0u;
92 Operand |= Neg ? SISrcMods::NEG : 0u;
93 return Operand;
94 }
95
96 int64_t getIntModifiersOperand() const {
97 int64_t Operand = 0;
98 Operand |= Sext ? SISrcMods::SEXT : 0u;
99 return Operand;
100 }
101
102 int64_t getModifiersOperand() const {
103 assert(!(hasFPModifiers() && hasIntModifiers())
104 && "fp and int modifiers should not be used simultaneously");
105 if (hasFPModifiers())
106 return getFPModifiersOperand();
107 if (hasIntModifiers())
108 return getIntModifiersOperand();
109 return 0;
110 }
111
112 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
113 };
114
115 enum ImmTy {
116 ImmTyNone,
117 ImmTyGDS,
118 ImmTyLDS,
119 ImmTyOffen,
120 ImmTyIdxen,
121 ImmTyAddr64,
122 ImmTyOffset,
123 ImmTyInstOffset,
124 ImmTyOffset0,
125 ImmTyOffset1,
126 ImmTySMEMOffsetMod,
127 ImmTyCPol,
128 ImmTyTFE,
129 ImmTyIsAsync,
130 ImmTyD16,
131 ImmTyClamp,
132 ImmTyOModSI,
133 ImmTySDWADstSel,
134 ImmTySDWASrc0Sel,
135 ImmTySDWASrc1Sel,
136 ImmTySDWADstUnused,
137 ImmTyDMask,
138 ImmTyDim,
139 ImmTyUNorm,
140 ImmTyDA,
141 ImmTyR128A16,
142 ImmTyA16,
143 ImmTyLWE,
144 ImmTyExpTgt,
145 ImmTyExpCompr,
146 ImmTyExpVM,
147 ImmTyDone,
148 ImmTyRowEn,
149 ImmTyFORMAT,
150 ImmTyHwreg,
151 ImmTyOff,
152 ImmTySendMsg,
153 ImmTyWaitEvent,
154 ImmTyInterpSlot,
155 ImmTyInterpAttr,
156 ImmTyInterpAttrChan,
157 ImmTyOpSel,
158 ImmTyOpSelHi,
159 ImmTyNegLo,
160 ImmTyNegHi,
161 ImmTyIndexKey8bit,
162 ImmTyIndexKey16bit,
163 ImmTyIndexKey32bit,
164 ImmTyDPP8,
165 ImmTyDppCtrl,
166 ImmTyDppRowMask,
167 ImmTyDppBankMask,
168 ImmTyDppBoundCtrl,
169 ImmTyDppFI,
170 ImmTySwizzle,
171 ImmTyGprIdxMode,
172 ImmTyHigh,
173 ImmTyBLGP,
174 ImmTyCBSZ,
175 ImmTyABID,
176 ImmTyEndpgm,
177 ImmTyWaitVDST,
178 ImmTyWaitEXP,
179 ImmTyWaitVAVDst,
180 ImmTyWaitVMVSrc,
181 ImmTyBitOp3,
182 ImmTyMatrixAFMT,
183 ImmTyMatrixBFMT,
184 ImmTyMatrixAScale,
185 ImmTyMatrixBScale,
186 ImmTyMatrixAScaleFmt,
187 ImmTyMatrixBScaleFmt,
188 ImmTyMatrixAReuse,
189 ImmTyMatrixBReuse,
190 ImmTyScaleSel,
191 ImmTyByteSel,
192 };
193
194private:
195 struct TokOp {
196 const char *Data;
197 unsigned Length;
198 };
199
200 struct ImmOp {
201 int64_t Val;
202 ImmTy Type;
203 bool IsFPImm;
204 Modifiers Mods;
205 };
206
207 struct RegOp {
208 MCRegister RegNo;
209 Modifiers Mods;
210 };
211
212 union {
213 TokOp Tok;
214 ImmOp Imm;
215 RegOp Reg;
216 const MCExpr *Expr;
217 };
218
219 // The index of the associated MCInst operand.
220 mutable int MCOpIdx = -1;
221
222public:
223 bool isToken() const override { return Kind == Token; }
224
225 bool isSymbolRefExpr() const {
226 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
227 }
228
229 bool isImm() const override {
230 return Kind == Immediate;
231 }
232
233 bool isInlinableImm(MVT type) const;
234 bool isLiteralImm(MVT type) const;
235
236 bool isRegKind() const {
237 return Kind == Register;
238 }
239
240 bool isReg() const override {
241 return isRegKind() && !hasModifiers();
242 }
243
244 bool isRegOrInline(unsigned RCID, MVT type) const {
245 return isRegClass(RCID) || isInlinableImm(type);
246 }
247
248 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
249 return isRegOrInline(RCID, type) || isLiteralImm(type);
250 }
251
252 bool isRegOrImmWithInt16InputMods() const {
253 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
254 }
255
256 template <bool IsFake16> bool isRegOrImmWithIntT16InputMods() const {
258 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
259 }
260
261 bool isRegOrImmWithInt32InputMods() const {
262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
263 }
264
265 bool isRegOrInlineImmWithInt16InputMods() const {
266 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i16);
267 }
268
269 template <bool IsFake16> bool isRegOrInlineImmWithIntT16InputMods() const {
270 return isRegOrInline(
271 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::i16);
272 }
273
274 bool isRegOrInlineImmWithInt32InputMods() const {
275 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::i32);
276 }
277
278 bool isRegOrImmWithInt64InputMods() const {
279 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
280 }
281
282 bool isRegOrImmWithFP16InputMods() const {
283 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
284 }
285
286 template <bool IsFake16> bool isRegOrImmWithFPT16InputMods() const {
288 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
289 }
290
291 bool isRegOrImmWithFP32InputMods() const {
292 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
293 }
294
295 bool isRegOrImmWithFP64InputMods() const {
296 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
297 }
298
299 template <bool IsFake16> bool isRegOrInlineImmWithFP16InputMods() const {
300 return isRegOrInline(
301 IsFake16 ? AMDGPU::VS_32RegClassID : AMDGPU::VS_16RegClassID, MVT::f16);
302 }
303
304 bool isRegOrInlineImmWithFP32InputMods() const {
305 return isRegOrInline(AMDGPU::VS_32RegClassID, MVT::f32);
306 }
307
308 bool isRegOrInlineImmWithFP64InputMods() const {
309 return isRegOrInline(AMDGPU::VS_64RegClassID, MVT::f64);
310 }
311
312 bool isVRegWithInputMods(unsigned RCID) const { return isRegClass(RCID); }
313
314 bool isVRegWithFP32InputMods() const {
315 return isVRegWithInputMods(AMDGPU::VGPR_32RegClassID);
316 }
317
318 bool isVRegWithFP64InputMods() const {
319 return isVRegWithInputMods(AMDGPU::VReg_64RegClassID);
320 }
321
322 bool isPackedFP16InputMods() const {
323 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::v2f16);
324 }
325
326 bool isPackedVGPRFP32InputMods() const {
327 return isRegOrImmWithInputMods(AMDGPU::VReg_64RegClassID, MVT::v2f32);
328 }
329
330 bool isVReg() const {
331 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
332 isRegClass(AMDGPU::VReg_64RegClassID) ||
333 isRegClass(AMDGPU::VReg_96RegClassID) ||
334 isRegClass(AMDGPU::VReg_128RegClassID) ||
335 isRegClass(AMDGPU::VReg_160RegClassID) ||
336 isRegClass(AMDGPU::VReg_192RegClassID) ||
337 isRegClass(AMDGPU::VReg_256RegClassID) ||
338 isRegClass(AMDGPU::VReg_512RegClassID) ||
339 isRegClass(AMDGPU::VReg_1024RegClassID);
340 }
341
342 bool isVReg32() const {
343 return isRegClass(AMDGPU::VGPR_32RegClassID);
344 }
345
346 bool isVReg32OrOff() const {
347 return isOff() || isVReg32();
348 }
349
350 bool isNull() const {
351 return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
352 }
353
354 bool isAV_LdSt_32_Align2_RegOp() const {
355 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
356 isRegClass(AMDGPU::AGPR_32RegClassID);
357 }
358
359 bool isVRegWithInputMods() const;
360 template <bool IsFake16> bool isT16_Lo128VRegWithInputMods() const;
361 template <bool IsFake16> bool isT16VRegWithInputMods() const;
362
363 bool isSDWAOperand(MVT type) const;
364 bool isSDWAFP16Operand() const;
365 bool isSDWAFP32Operand() const;
366 bool isSDWAInt16Operand() const;
367 bool isSDWAInt32Operand() const;
368
369 bool isImmTy(ImmTy ImmT) const {
370 return isImm() && Imm.Type == ImmT;
371 }
372
373 template <ImmTy Ty> bool isImmTy() const { return isImmTy(Ty); }
374
375 bool isImmLiteral() const { return isImmTy(ImmTyNone); }
376
377 bool isImmModifier() const {
378 return isImm() && Imm.Type != ImmTyNone;
379 }
380
381 bool isOModSI() const { return isImmTy(ImmTyOModSI); }
382 bool isDim() const { return isImmTy(ImmTyDim); }
383 bool isR128A16() const { return isImmTy(ImmTyR128A16); }
384 bool isOff() const { return isImmTy(ImmTyOff); }
385 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
386 bool isOffen() const { return isImmTy(ImmTyOffen); }
387 bool isIdxen() const { return isImmTy(ImmTyIdxen); }
388 bool isAddr64() const { return isImmTy(ImmTyAddr64); }
389 bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
390 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
391 bool isGDS() const { return isImmTy(ImmTyGDS); }
392 bool isLDS() const { return isImmTy(ImmTyLDS); }
393 bool isCPol() const { return isImmTy(ImmTyCPol); }
394 bool isIndexKey8bit() const { return isImmTy(ImmTyIndexKey8bit); }
395 bool isIndexKey16bit() const { return isImmTy(ImmTyIndexKey16bit); }
396 bool isIndexKey32bit() const { return isImmTy(ImmTyIndexKey32bit); }
397 bool isMatrixAFMT() const { return isImmTy(ImmTyMatrixAFMT); }
398 bool isMatrixBFMT() const { return isImmTy(ImmTyMatrixBFMT); }
399 bool isMatrixAScale() const { return isImmTy(ImmTyMatrixAScale); }
400 bool isMatrixBScale() const { return isImmTy(ImmTyMatrixBScale); }
401 bool isMatrixAScaleFmt() const { return isImmTy(ImmTyMatrixAScaleFmt); }
402 bool isMatrixBScaleFmt() const { return isImmTy(ImmTyMatrixBScaleFmt); }
403 bool isMatrixAReuse() const { return isImmTy(ImmTyMatrixAReuse); }
404 bool isMatrixBReuse() const { return isImmTy(ImmTyMatrixBReuse); }
405 bool isTFE() const { return isImmTy(ImmTyTFE); }
406 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
407 bool isDppFI() const { return isImmTy(ImmTyDppFI); }
408 bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
409 bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
410 bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
411 bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
412 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
413 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
414 bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
415 bool isOpSel() const { return isImmTy(ImmTyOpSel); }
416 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
417 bool isNegLo() const { return isImmTy(ImmTyNegLo); }
418 bool isNegHi() const { return isImmTy(ImmTyNegHi); }
419 bool isBitOp3() const { return isImmTy(ImmTyBitOp3) && isUInt<8>(getImm()); }
420 bool isDone() const { return isImmTy(ImmTyDone); }
421 bool isRowEn() const { return isImmTy(ImmTyRowEn); }
422
423 bool isRegOrImm() const {
424 return isReg() || isImm();
425 }
426
427 bool isRegClass(unsigned RCID) const;
428
429 bool isInlineValue() const;
430
431 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
432 return isRegOrInline(RCID, type) && !hasModifiers();
433 }
434
435 bool isSCSrcB16() const {
436 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
437 }
438
439 bool isSCSrcV2B16() const {
440 return isSCSrcB16();
441 }
442
443 bool isSCSrc_b32() const {
444 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
445 }
446
447 bool isSCSrc_b64() const {
448 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
449 }
450
451 bool isBoolReg() const;
452
453 bool isSCSrcF16() const {
454 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
455 }
456
457 bool isSCSrcV2F16() const {
458 return isSCSrcF16();
459 }
460
461 bool isSCSrcF32() const {
462 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
463 }
464
465 bool isSCSrcF64() const {
466 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
467 }
468
469 bool isSSrc_b32() const {
470 return isSCSrc_b32() || isLiteralImm(MVT::i32) || isExpr();
471 }
472
473 bool isSSrc_b16() const { return isSCSrcB16() || isLiteralImm(MVT::i16); }
474
475 bool isSSrcV2B16() const {
476 llvm_unreachable("cannot happen");
477 return isSSrc_b16();
478 }
479
480 bool isSSrc_b64() const {
481 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
482 // See isVSrc64().
483 return isSCSrc_b64() || isLiteralImm(MVT::i64) ||
484 (((const MCTargetAsmParser *)AsmParser)
485 ->getAvailableFeatures()[AMDGPU::Feature64BitLiterals] &&
486 isExpr());
487 }
488
489 bool isSSrc_f32() const {
490 return isSCSrc_b32() || isLiteralImm(MVT::f32) || isExpr();
491 }
492
493 bool isSSrcF64() const { return isSCSrc_b64() || isLiteralImm(MVT::f64); }
494
495 bool isSSrc_bf16() const { return isSCSrcB16() || isLiteralImm(MVT::bf16); }
496
497 bool isSSrc_f16() const { return isSCSrcB16() || isLiteralImm(MVT::f16); }
498
499 bool isSSrcV2F16() const {
500 llvm_unreachable("cannot happen");
501 return isSSrc_f16();
502 }
503
504 bool isSSrcV2FP32() const {
505 llvm_unreachable("cannot happen");
506 return isSSrc_f32();
507 }
508
509 bool isSCSrcV2FP32() const {
510 llvm_unreachable("cannot happen");
511 return isSCSrcF32();
512 }
513
514 bool isSSrcV2INT32() const {
515 llvm_unreachable("cannot happen");
516 return isSSrc_b32();
517 }
518
519 bool isSCSrcV2INT32() const {
520 llvm_unreachable("cannot happen");
521 return isSCSrc_b32();
522 }
523
524 bool isSSrcOrLds_b32() const {
525 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
526 isLiteralImm(MVT::i32) || isExpr();
527 }
528
529 bool isVCSrc_b32() const {
530 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
531 }
532
533 bool isVCSrc_b32_Lo256() const {
534 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
535 }
536
537 bool isVCSrc_b64_Lo256() const {
538 return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
539 }
540
541 bool isVCSrc_b64() const {
542 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
543 }
544
545 bool isVCSrcT_b16() const {
546 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::i16);
547 }
548
549 bool isVCSrcTB16_Lo128() const {
550 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::i16);
551 }
552
553 bool isVCSrcFake16B16_Lo128() const {
554 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::i16);
555 }
556
557 bool isVCSrc_b16() const {
558 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
559 }
560
561 bool isVCSrc_v2b16() const { return isVCSrc_b16(); }
562
563 bool isVCSrc_f32() const {
564 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
565 }
566
567 bool isVCSrc_f64() const {
568 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
569 }
570
571 bool isVCSrcTBF16() const {
572 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::bf16);
573 }
574
575 bool isVCSrcT_f16() const {
576 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
577 }
578
579 bool isVCSrcT_bf16() const {
580 return isRegOrInlineNoMods(AMDGPU::VS_16RegClassID, MVT::f16);
581 }
582
583 bool isVCSrcTBF16_Lo128() const {
584 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::bf16);
585 }
586
587 bool isVCSrcTF16_Lo128() const {
588 return isRegOrInlineNoMods(AMDGPU::VS_16_Lo128RegClassID, MVT::f16);
589 }
590
591 bool isVCSrcFake16BF16_Lo128() const {
592 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::bf16);
593 }
594
595 bool isVCSrcFake16F16_Lo128() const {
596 return isRegOrInlineNoMods(AMDGPU::VS_32_Lo128RegClassID, MVT::f16);
597 }
598
599 bool isVCSrc_bf16() const {
600 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::bf16);
601 }
602
603 bool isVCSrc_f16() const {
604 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
605 }
606
607 bool isVCSrc_v2bf16() const { return isVCSrc_bf16(); }
608
609 bool isVCSrc_v2f16() const { return isVCSrc_f16(); }
610
611 bool isVSrc_b32() const {
612 return isVCSrc_f32() || isLiteralImm(MVT::i32) || isExpr();
613 }
614
615 bool isVSrc_b64() const { return isVCSrc_f64() || isLiteralImm(MVT::i64); }
616
617 bool isVSrcT_b16() const { return isVCSrcT_b16() || isLiteralImm(MVT::i16); }
618
619 bool isVSrcT_b16_Lo128() const {
620 return isVCSrcTB16_Lo128() || isLiteralImm(MVT::i16);
621 }
622
623 bool isVSrcFake16_b16_Lo128() const {
624 return isVCSrcFake16B16_Lo128() || isLiteralImm(MVT::i16);
625 }
626
627 bool isVSrc_b16() const { return isVCSrc_b16() || isLiteralImm(MVT::i16); }
628
629 bool isVSrc_v2b16() const { return isVSrc_b16() || isLiteralImm(MVT::v2i16); }
630
631 bool isVCSrcV2FP32() const { return isVCSrc_f64(); }
632
633 bool isVSrc_v2f32() const { return isVSrc_f64() || isLiteralImm(MVT::v2f32); }
634
635 bool isVCSrc_v2b32() const { return isVCSrc_b64(); }
636
637 bool isVSrc_v2b32() const { return isVSrc_b64() || isLiteralImm(MVT::v2i32); }
638
639 bool isVSrc_f32() const {
640 return isVCSrc_f32() || isLiteralImm(MVT::f32) || isExpr();
641 }
642
643 bool isVSrc_f64() const { return isVCSrc_f64() || isLiteralImm(MVT::f64); }
644
645 bool isVSrcT_bf16() const { return isVCSrcTBF16() || isLiteralImm(MVT::bf16); }
646
647 bool isVSrcT_f16() const { return isVCSrcT_f16() || isLiteralImm(MVT::f16); }
648
649 bool isVSrcT_bf16_Lo128() const {
650 return isVCSrcTBF16_Lo128() || isLiteralImm(MVT::bf16);
651 }
652
653 bool isVSrcT_f16_Lo128() const {
654 return isVCSrcTF16_Lo128() || isLiteralImm(MVT::f16);
655 }
656
657 bool isVSrcFake16_bf16_Lo128() const {
658 return isVCSrcFake16BF16_Lo128() || isLiteralImm(MVT::bf16);
659 }
660
661 bool isVSrcFake16_f16_Lo128() const {
662 return isVCSrcFake16F16_Lo128() || isLiteralImm(MVT::f16);
663 }
664
665 bool isVSrc_bf16() const { return isVCSrc_bf16() || isLiteralImm(MVT::bf16); }
666
667 bool isVSrc_f16() const { return isVCSrc_f16() || isLiteralImm(MVT::f16); }
668
669 bool isVSrc_v2bf16() const {
670 return isVSrc_bf16() || isLiteralImm(MVT::v2bf16);
671 }
672
673 bool isVSrc_v2f16() const { return isVSrc_f16() || isLiteralImm(MVT::v2f16); }
674
675 bool isVSrc_v2f16_splat() const { return isVSrc_v2f16(); }
676
677 bool isVSrc_NoInline_v2f16() const { return isVSrc_v2f16(); }
678
679 bool isVISrcB32() const {
680 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
681 }
682
683 bool isVISrcB16() const {
684 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
685 }
686
687 bool isVISrcV2B16() const {
688 return isVISrcB16();
689 }
690
691 bool isVISrcF32() const {
692 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
693 }
694
695 bool isVISrcF16() const {
696 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
697 }
698
699 bool isVISrcV2F16() const {
700 return isVISrcF16() || isVISrcB32();
701 }
702
703 bool isVISrc_64_bf16() const {
704 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::bf16);
705 }
706
707 bool isVISrc_64_f16() const {
708 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f16);
709 }
710
711 bool isVISrc_64_b32() const {
712 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
713 }
714
715 bool isVISrc_64B64() const {
716 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
717 }
718
719 bool isVISrc_64_f64() const {
720 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
721 }
722
723 bool isVISrc_64V2FP32() const {
724 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
725 }
726
727 bool isVISrc_64V2INT32() const {
728 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
729 }
730
731 bool isVISrc_256_b32() const {
732 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
733 }
734
735 bool isVISrc_256_f32() const {
736 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
737 }
738
739 bool isVISrc_256B64() const {
740 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
741 }
742
743 bool isVISrc_256_f64() const {
744 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
745 }
746
747 bool isVISrc_512_f64() const {
748 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f64);
749 }
750
751 bool isVISrc_128B16() const {
752 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
753 }
754
755 bool isVISrc_128V2B16() const {
756 return isVISrc_128B16();
757 }
758
759 bool isVISrc_128_b32() const {
760 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
761 }
762
763 bool isVISrc_128_f32() const {
764 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
765 }
766
767 bool isVISrc_256V2FP32() const {
768 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
769 }
770
771 bool isVISrc_256V2INT32() const {
772 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
773 }
774
775 bool isVISrc_512_b32() const {
776 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
777 }
778
779 bool isVISrc_512B16() const {
780 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
781 }
782
783 bool isVISrc_512V2B16() const {
784 return isVISrc_512B16();
785 }
786
787 bool isVISrc_512_f32() const {
788 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
789 }
790
791 bool isVISrc_512F16() const {
792 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
793 }
794
795 bool isVISrc_512V2F16() const {
796 return isVISrc_512F16() || isVISrc_512_b32();
797 }
798
799 bool isVISrc_1024_b32() const {
800 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
801 }
802
803 bool isVISrc_1024B16() const {
804 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
805 }
806
807 bool isVISrc_1024V2B16() const {
808 return isVISrc_1024B16();
809 }
810
811 bool isVISrc_1024_f32() const {
812 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
813 }
814
815 bool isVISrc_1024F16() const {
816 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
817 }
818
819 bool isVISrc_1024V2F16() const {
820 return isVISrc_1024F16() || isVISrc_1024_b32();
821 }
822
823 bool isAISrcB32() const {
824 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
825 }
826
827 bool isAISrcB16() const {
828 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
829 }
830
831 bool isAISrcV2B16() const {
832 return isAISrcB16();
833 }
834
835 bool isAISrcF32() const {
836 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
837 }
838
839 bool isAISrcF16() const {
840 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
841 }
842
843 bool isAISrcV2F16() const {
844 return isAISrcF16() || isAISrcB32();
845 }
846
847 bool isAISrc_64B64() const {
848 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
849 }
850
851 bool isAISrc_64_f64() const {
852 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
853 }
854
855 bool isAISrc_128_b32() const {
856 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
857 }
858
859 bool isAISrc_128B16() const {
860 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
861 }
862
863 bool isAISrc_128V2B16() const {
864 return isAISrc_128B16();
865 }
866
867 bool isAISrc_128_f32() const {
868 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
869 }
870
871 bool isAISrc_128F16() const {
872 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
873 }
874
875 bool isAISrc_128V2F16() const {
876 return isAISrc_128F16() || isAISrc_128_b32();
877 }
878
879 bool isVISrc_128_bf16() const {
880 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::bf16);
881 }
882
883 bool isVISrc_128_f16() const {
884 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
885 }
886
887 bool isVISrc_128V2F16() const {
888 return isVISrc_128_f16() || isVISrc_128_b32();
889 }
890
891 bool isAISrc_256B64() const {
892 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
893 }
894
895 bool isAISrc_256_f64() const {
896 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
897 }
898
899 bool isAISrc_512_b32() const {
900 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
901 }
902
903 bool isAISrc_512B16() const {
904 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
905 }
906
907 bool isAISrc_512V2B16() const {
908 return isAISrc_512B16();
909 }
910
911 bool isAISrc_512_f32() const {
912 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
913 }
914
915 bool isAISrc_512F16() const {
916 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
917 }
918
919 bool isAISrc_512V2F16() const {
920 return isAISrc_512F16() || isAISrc_512_b32();
921 }
922
923 bool isAISrc_1024_b32() const {
924 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
925 }
926
927 bool isAISrc_1024B16() const {
928 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
929 }
930
931 bool isAISrc_1024V2B16() const {
932 return isAISrc_1024B16();
933 }
934
935 bool isAISrc_1024_f32() const {
936 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
937 }
938
939 bool isAISrc_1024F16() const {
940 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
941 }
942
943 bool isAISrc_1024V2F16() const {
944 return isAISrc_1024F16() || isAISrc_1024_b32();
945 }
946
947 bool isKImmFP32() const {
948 return isLiteralImm(MVT::f32);
949 }
950
951 bool isKImmFP16() const {
952 return isLiteralImm(MVT::f16);
953 }
954
955 bool isKImmFP64() const { return isLiteralImm(MVT::f64); }
956
957 bool isMem() const override {
958 return false;
959 }
960
961 bool isExpr() const {
962 return Kind == Expression;
963 }
964
965 bool isSOPPBrTarget() const { return isExpr() || isImm(); }
966
967 bool isSWaitCnt() const;
968 bool isDepCtr() const;
969 bool isSDelayALU() const;
970 bool isHwreg() const;
971 bool isSendMsg() const;
972 bool isWaitEvent() const;
973 bool isSplitBarrier() const;
974 bool isSwizzle() const;
975 bool isSMRDOffset8() const;
976 bool isSMEMOffset() const;
977 bool isSMRDLiteralOffset() const;
978 bool isDPP8() const;
979 bool isDPPCtrl() const;
980 bool isBLGP() const;
981 bool isGPRIdxMode() const;
982 bool isS16Imm() const;
983 bool isU16Imm() const;
984 bool isEndpgm() const;
985
986 auto getPredicate(std::function<bool(const AMDGPUOperand &Op)> P) const {
987 return [this, P]() { return P(*this); };
988 }
989
990 StringRef getToken() const {
991 assert(isToken());
992 return StringRef(Tok.Data, Tok.Length);
993 }
994
995 int64_t getImm() const {
996 assert(isImm());
997 return Imm.Val;
998 }
999
1000 void setImm(int64_t Val) {
1001 assert(isImm());
1002 Imm.Val = Val;
1003 }
1004
1005 ImmTy getImmTy() const {
1006 assert(isImm());
1007 return Imm.Type;
1008 }
1009
1010 MCRegister getReg() const override {
1011 assert(isRegKind());
1012 return Reg.RegNo;
1013 }
1014
1015 SMLoc getStartLoc() const override {
1016 return StartLoc;
1017 }
1018
1019 SMLoc getEndLoc() const override {
1020 return EndLoc;
1021 }
1022
1023 SMRange getLocRange() const {
1024 return SMRange(StartLoc, EndLoc);
1025 }
1026
1027 int getMCOpIdx() const { return MCOpIdx; }
1028
1029 Modifiers getModifiers() const {
1030 assert(isRegKind() || isImmTy(ImmTyNone));
1031 return isRegKind() ? Reg.Mods : Imm.Mods;
1032 }
1033
1034 void setModifiers(Modifiers Mods) {
1035 assert(isRegKind() || isImmTy(ImmTyNone));
1036 if (isRegKind())
1037 Reg.Mods = Mods;
1038 else
1039 Imm.Mods = Mods;
1040 }
1041
1042 bool hasModifiers() const {
1043 return getModifiers().hasModifiers();
1044 }
1045
1046 bool hasFPModifiers() const {
1047 return getModifiers().hasFPModifiers();
1048 }
1049
1050 bool hasIntModifiers() const {
1051 return getModifiers().hasIntModifiers();
1052 }
1053
1054 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
1055
1056 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
1057
1058 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
1059
1060 void addRegOperands(MCInst &Inst, unsigned N) const;
1061
1062 void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
1063 if (isRegKind())
1064 addRegOperands(Inst, N);
1065 else
1066 addImmOperands(Inst, N);
1067 }
1068
1069 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
1070 Modifiers Mods = getModifiers();
1071 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1072 if (isRegKind()) {
1073 addRegOperands(Inst, N);
1074 } else {
1075 addImmOperands(Inst, N, false);
1076 }
1077 }
1078
1079 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1080 assert(!hasIntModifiers());
1081 addRegOrImmWithInputModsOperands(Inst, N);
1082 }
1083
1084 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1085 assert(!hasFPModifiers());
1086 addRegOrImmWithInputModsOperands(Inst, N);
1087 }
1088
1089 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
1090 Modifiers Mods = getModifiers();
1091 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
1092 assert(isRegKind());
1093 addRegOperands(Inst, N);
1094 }
1095
1096 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
1097 assert(!hasIntModifiers());
1098 addRegWithInputModsOperands(Inst, N);
1099 }
1100
1101 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
1102 assert(!hasFPModifiers());
1103 addRegWithInputModsOperands(Inst, N);
1104 }
1105
1106 static void printImmTy(raw_ostream& OS, ImmTy Type) {
1107 // clang-format off
1108 switch (Type) {
1109 case ImmTyNone: OS << "None"; break;
1110 case ImmTyGDS: OS << "GDS"; break;
1111 case ImmTyLDS: OS << "LDS"; break;
1112 case ImmTyOffen: OS << "Offen"; break;
1113 case ImmTyIdxen: OS << "Idxen"; break;
1114 case ImmTyAddr64: OS << "Addr64"; break;
1115 case ImmTyOffset: OS << "Offset"; break;
1116 case ImmTyInstOffset: OS << "InstOffset"; break;
1117 case ImmTyOffset0: OS << "Offset0"; break;
1118 case ImmTyOffset1: OS << "Offset1"; break;
1119 case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
1120 case ImmTyCPol: OS << "CPol"; break;
1121 case ImmTyIndexKey8bit: OS << "index_key"; break;
1122 case ImmTyIndexKey16bit: OS << "index_key"; break;
1123 case ImmTyIndexKey32bit: OS << "index_key"; break;
1124 case ImmTyTFE: OS << "TFE"; break;
1125 case ImmTyIsAsync: OS << "IsAsync"; break;
1126 case ImmTyD16: OS << "D16"; break;
1127 case ImmTyFORMAT: OS << "FORMAT"; break;
1128 case ImmTyClamp: OS << "Clamp"; break;
1129 case ImmTyOModSI: OS << "OModSI"; break;
1130 case ImmTyDPP8: OS << "DPP8"; break;
1131 case ImmTyDppCtrl: OS << "DppCtrl"; break;
1132 case ImmTyDppRowMask: OS << "DppRowMask"; break;
1133 case ImmTyDppBankMask: OS << "DppBankMask"; break;
1134 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
1135 case ImmTyDppFI: OS << "DppFI"; break;
1136 case ImmTySDWADstSel: OS << "SDWADstSel"; break;
1137 case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
1138 case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
1139 case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
1140 case ImmTyDMask: OS << "DMask"; break;
1141 case ImmTyDim: OS << "Dim"; break;
1142 case ImmTyUNorm: OS << "UNorm"; break;
1143 case ImmTyDA: OS << "DA"; break;
1144 case ImmTyR128A16: OS << "R128A16"; break;
1145 case ImmTyA16: OS << "A16"; break;
1146 case ImmTyLWE: OS << "LWE"; break;
1147 case ImmTyOff: OS << "Off"; break;
1148 case ImmTyExpTgt: OS << "ExpTgt"; break;
1149 case ImmTyExpCompr: OS << "ExpCompr"; break;
1150 case ImmTyExpVM: OS << "ExpVM"; break;
1151 case ImmTyDone: OS << "Done"; break;
1152 case ImmTyRowEn: OS << "RowEn"; break;
1153 case ImmTyHwreg: OS << "Hwreg"; break;
1154 case ImmTySendMsg: OS << "SendMsg"; break;
1155 case ImmTyWaitEvent: OS << "WaitEvent"; break;
1156 case ImmTyInterpSlot: OS << "InterpSlot"; break;
1157 case ImmTyInterpAttr: OS << "InterpAttr"; break;
1158 case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
1159 case ImmTyOpSel: OS << "OpSel"; break;
1160 case ImmTyOpSelHi: OS << "OpSelHi"; break;
1161 case ImmTyNegLo: OS << "NegLo"; break;
1162 case ImmTyNegHi: OS << "NegHi"; break;
1163 case ImmTySwizzle: OS << "Swizzle"; break;
1164 case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
1165 case ImmTyHigh: OS << "High"; break;
1166 case ImmTyBLGP: OS << "BLGP"; break;
1167 case ImmTyCBSZ: OS << "CBSZ"; break;
1168 case ImmTyABID: OS << "ABID"; break;
1169 case ImmTyEndpgm: OS << "Endpgm"; break;
1170 case ImmTyWaitVDST: OS << "WaitVDST"; break;
1171 case ImmTyWaitEXP: OS << "WaitEXP"; break;
1172 case ImmTyWaitVAVDst: OS << "WaitVAVDst"; break;
1173 case ImmTyWaitVMVSrc: OS << "WaitVMVSrc"; break;
1174 case ImmTyBitOp3: OS << "BitOp3"; break;
1175 case ImmTyMatrixAFMT: OS << "ImmTyMatrixAFMT"; break;
1176 case ImmTyMatrixBFMT: OS << "ImmTyMatrixBFMT"; break;
1177 case ImmTyMatrixAScale: OS << "ImmTyMatrixAScale"; break;
1178 case ImmTyMatrixBScale: OS << "ImmTyMatrixBScale"; break;
1179 case ImmTyMatrixAScaleFmt: OS << "ImmTyMatrixAScaleFmt"; break;
1180 case ImmTyMatrixBScaleFmt: OS << "ImmTyMatrixBScaleFmt"; break;
1181 case ImmTyMatrixAReuse: OS << "ImmTyMatrixAReuse"; break;
1182 case ImmTyMatrixBReuse: OS << "ImmTyMatrixBReuse"; break;
1183 case ImmTyScaleSel: OS << "ScaleSel" ; break;
1184 case ImmTyByteSel: OS << "ByteSel" ; break;
1185 }
1186 // clang-format on
1187 }
1188
1189 void print(raw_ostream &OS, const MCAsmInfo &MAI) const override {
1190 switch (Kind) {
1191 case Register:
1192 OS << "<register " << AMDGPUInstPrinter::getRegisterName(getReg())
1193 << " mods: " << Reg.Mods << '>';
1194 break;
1195 case Immediate:
1196 OS << '<' << getImm();
1197 if (getImmTy() != ImmTyNone) {
1198 OS << " type: "; printImmTy(OS, getImmTy());
1199 }
1200 OS << " mods: " << Imm.Mods << '>';
1201 break;
1202 case Token:
1203 OS << '\'' << getToken() << '\'';
1204 break;
1205 case Expression:
1206 OS << "<expr ";
1207 MAI.printExpr(OS, *Expr);
1208 OS << '>';
1209 break;
1210 }
1211 }
1212
1213 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
1214 int64_t Val, SMLoc Loc,
1215 ImmTy Type = ImmTyNone,
1216 bool IsFPImm = false) {
1217 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
1218 Op->Imm.Val = Val;
1219 Op->Imm.IsFPImm = IsFPImm;
1220 Op->Imm.Type = Type;
1221 Op->Imm.Mods = Modifiers();
1222 Op->StartLoc = Loc;
1223 Op->EndLoc = Loc;
1224 return Op;
1225 }
1226
1227 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
1228 StringRef Str, SMLoc Loc,
1229 bool HasExplicitEncodingSize = true) {
1230 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
1231 Res->Tok.Data = Str.data();
1232 Res->Tok.Length = Str.size();
1233 Res->StartLoc = Loc;
1234 Res->EndLoc = Loc;
1235 return Res;
1236 }
1237
1238 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
1239 MCRegister Reg, SMLoc S, SMLoc E) {
1240 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
1241 Op->Reg.RegNo = Reg;
1242 Op->Reg.Mods = Modifiers();
1243 Op->StartLoc = S;
1244 Op->EndLoc = E;
1245 return Op;
1246 }
1247
1248 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
1249 const class MCExpr *Expr, SMLoc S) {
1250 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
1251 Op->Expr = Expr;
1252 Op->StartLoc = S;
1253 Op->EndLoc = S;
1254 return Op;
1255 }
1256};
1257
1258raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
1259 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
1260 return OS;
1261}
1262
1263//===----------------------------------------------------------------------===//
1264// AsmParser
1265//===----------------------------------------------------------------------===//
1266
1267// TODO: define GET_SUBTARGET_FEATURE_NAME
1268#define GET_REGISTER_MATCHER
1269#include "AMDGPUGenAsmMatcher.inc"
1270#undef GET_REGISTER_MATCHER
1271#undef GET_SUBTARGET_FEATURE_NAME
1272
1273// Holds info related to the current kernel, e.g. count of SGPRs used.
1274// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
1275// .amdgpu_hsa_kernel or at EOF.
1276class KernelScopeInfo {
1277 int SgprIndexUnusedMin = -1;
1278 int VgprIndexUnusedMin = -1;
1279 int AgprIndexUnusedMin = -1;
1280 MCContext *Ctx = nullptr;
1281 MCSubtargetInfo const *MSTI = nullptr;
1282
1283 void usesSgprAt(int i) {
1284 if (i >= SgprIndexUnusedMin) {
1285 SgprIndexUnusedMin = ++i;
1286 if (Ctx) {
1287 MCSymbol* const Sym =
1288 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
1289 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
1290 }
1291 }
1292 }
1293
1294 void usesVgprAt(int i) {
1295 if (i >= VgprIndexUnusedMin) {
1296 VgprIndexUnusedMin = ++i;
1297 if (Ctx) {
1298 MCSymbol* const Sym =
1299 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1300 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1301 VgprIndexUnusedMin);
1302 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1303 }
1304 }
1305 }
1306
1307 void usesAgprAt(int i) {
1308 // Instruction will error in AMDGPUAsmParser::matchAndEmitInstruction
1309 if (!hasMAIInsts(*MSTI))
1310 return;
1311
1312 if (i >= AgprIndexUnusedMin) {
1313 AgprIndexUnusedMin = ++i;
1314 if (Ctx) {
1315 MCSymbol* const Sym =
1316 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count"));
1317 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx));
1318
1319 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a)
1320 MCSymbol* const vSym =
1321 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
1322 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin,
1323 VgprIndexUnusedMin);
1324 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx));
1325 }
1326 }
1327 }
1328
1329public:
1330 KernelScopeInfo() = default;
1331
1332 void initialize(MCContext &Context) {
1333 Ctx = &Context;
1334 MSTI = Ctx->getSubtargetInfo();
1335
1336 usesSgprAt(SgprIndexUnusedMin = -1);
1337 usesVgprAt(VgprIndexUnusedMin = -1);
1338 if (hasMAIInsts(*MSTI)) {
1339 usesAgprAt(AgprIndexUnusedMin = -1);
1340 }
1341 }
1342
1343 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex,
1344 unsigned RegWidth) {
1345 switch (RegKind) {
1346 case IS_SGPR:
1347 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1348 break;
1349 case IS_AGPR:
1350 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1351 break;
1352 case IS_VGPR:
1353 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1);
1354 break;
1355 default:
1356 break;
1357 }
1358 }
1359};
1360
1361class AMDGPUAsmParser : public MCTargetAsmParser {
1362 MCAsmParser &Parser;
1363
1364 unsigned ForcedEncodingSize = 0;
1365 bool ForcedDPP = false;
1366 bool ForcedSDWA = false;
1367 KernelScopeInfo KernelScope;
1368 const unsigned HwMode;
1369
1370 /// @name Auto-generated Match Functions
1371 /// {
1372
1373#define GET_ASSEMBLER_HEADER
1374#include "AMDGPUGenAsmMatcher.inc"
1375
1376 /// }
1377
1378 /// Get size of register operand
1379 unsigned getRegOperandSize(const MCInstrDesc &Desc, unsigned OpNo) const {
1380 assert(OpNo < Desc.NumOperands);
1381 int16_t RCID = MII.getOpRegClassID(Desc.operands()[OpNo], HwMode);
1382 return getRegBitWidth(RCID) / 8;
1383 }
1384
1385private:
1386 void createConstantSymbol(StringRef Id, int64_t Val);
1387
1388 bool ParseAsAbsoluteExpression(uint32_t &Ret);
1389 bool OutOfRangeError(SMRange Range);
1390 /// Calculate VGPR/SGPR blocks required for given target, reserved
1391 /// registers, and user-specified NextFreeXGPR values.
1392 ///
1393 /// \param Features [in] Target features, used for bug corrections.
1394 /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
1395 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
1396 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
1397 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
1398 /// descriptor field, if valid.
1399 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
1400 /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
1401 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
1402 /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
1403 /// \param VGPRBlocks [out] Result VGPR block count.
1404 /// \param SGPRBlocks [out] Result SGPR block count.
1405 bool calculateGPRBlocks(const FeatureBitset &Features, const MCExpr *VCCUsed,
1406 const MCExpr *FlatScrUsed, bool XNACKUsed,
1407 std::optional<bool> EnableWavefrontSize32,
1408 const MCExpr *NextFreeVGPR, SMRange VGPRRange,
1409 const MCExpr *NextFreeSGPR, SMRange SGPRRange,
1410 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks);
1411 bool ParseDirectiveAMDGCNTarget();
1412 bool ParseDirectiveAMDHSACodeObjectVersion();
1413 bool ParseDirectiveAMDHSAKernel();
1414 bool ParseAMDKernelCodeTValue(StringRef ID, AMDGPUMCKernelCodeT &Header);
1415 bool ParseDirectiveAMDKernelCodeT();
1416 // TODO: Possibly make subtargetHasRegister const.
1417 bool subtargetHasRegister(const MCRegisterInfo &MRI, MCRegister Reg);
1418 bool ParseDirectiveAMDGPUHsaKernel();
1419
1420 bool ParseDirectiveISAVersion();
1421 bool ParseDirectiveHSAMetadata();
1422 bool ParseDirectivePALMetadataBegin();
1423 bool ParseDirectivePALMetadata();
1424 bool ParseDirectiveAMDGPULDS();
1425
1426 /// Common code to parse out a block of text (typically YAML) between start and
1427 /// end directives.
1428 bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
1429 const char *AssemblerDirectiveEnd,
1430 std::string &CollectString);
1431
1432 bool AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
1433 RegisterKind RegKind, MCRegister Reg1, SMLoc Loc);
1434 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1435 unsigned &RegNum, unsigned &RegWidth,
1436 bool RestoreOnFailure = false);
1437 bool ParseAMDGPURegister(RegisterKind &RegKind, MCRegister &Reg,
1438 unsigned &RegNum, unsigned &RegWidth,
1439 SmallVectorImpl<AsmToken> &Tokens);
1440 MCRegister ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
1441 unsigned &RegWidth,
1442 SmallVectorImpl<AsmToken> &Tokens);
1443 MCRegister ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
1444 unsigned &RegWidth,
1445 SmallVectorImpl<AsmToken> &Tokens);
1446 MCRegister ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
1447 unsigned &RegWidth,
1448 SmallVectorImpl<AsmToken> &Tokens);
1449 bool ParseRegRange(unsigned &Num, unsigned &Width, unsigned &SubReg);
1450 MCRegister getRegularReg(RegisterKind RegKind, unsigned RegNum,
1451 unsigned SubReg, unsigned RegWidth, SMLoc Loc);
1452
1453 bool isRegister();
1454 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
1455 std::optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
1456 void initializeGprCountSymbol(RegisterKind RegKind);
1457 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
1458 unsigned RegWidth);
1459 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
1460 bool IsAtomic);
1461
1462public:
1463 enum OperandMode {
1464 OperandMode_Default,
1465 OperandMode_NSA,
1466 };
1467
1468 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
1469
1470 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
1471 const MCInstrInfo &MII, const MCTargetOptions &Options)
1472 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser),
1473 HwMode(STI.getHwMode(MCSubtargetInfo::HwMode_RegInfo)) {
1475
1476 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
1477
1478 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
1479 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1480 createConstantSymbol(".amdgcn.gfx_generation_number", ISA.Major);
1481 createConstantSymbol(".amdgcn.gfx_generation_minor", ISA.Minor);
1482 createConstantSymbol(".amdgcn.gfx_generation_stepping", ISA.Stepping);
1483 } else {
1484 createConstantSymbol(".option.machine_version_major", ISA.Major);
1485 createConstantSymbol(".option.machine_version_minor", ISA.Minor);
1486 createConstantSymbol(".option.machine_version_stepping", ISA.Stepping);
1487 }
1488 if (ISA.Major >= 6 && isHsaAbi(getSTI())) {
1489 initializeGprCountSymbol(IS_VGPR);
1490 initializeGprCountSymbol(IS_SGPR);
1491 } else
1492 KernelScope.initialize(getContext());
1493
1494 for (auto [Symbol, Code] : AMDGPU::UCVersion::getGFXVersions())
1495 createConstantSymbol(Symbol, Code);
1496
1497 createConstantSymbol("UC_VERSION_W64_BIT", 0x2000);
1498 createConstantSymbol("UC_VERSION_W32_BIT", 0x4000);
1499 createConstantSymbol("UC_VERSION_MDP_BIT", 0x8000);
1500 }
1501
1502 bool hasMIMG_R128() const {
1503 return AMDGPU::hasMIMG_R128(getSTI());
1504 }
1505
1506 bool hasPackedD16() const {
1507 return AMDGPU::hasPackedD16(getSTI());
1508 }
1509
1510 bool hasA16() const { return AMDGPU::hasA16(getSTI()); }
1511
1512 bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
1513
1514 bool hasGDS() const { return AMDGPU::hasGDS(getSTI()); }
1515
1516 bool isSI() const {
1517 return AMDGPU::isSI(getSTI());
1518 }
1519
1520 bool isCI() const {
1521 return AMDGPU::isCI(getSTI());
1522 }
1523
1524 bool isVI() const {
1525 return AMDGPU::isVI(getSTI());
1526 }
1527
1528 bool isGFX9() const {
1529 return AMDGPU::isGFX9(getSTI());
1530 }
1531
1532 // TODO: isGFX90A is also true for GFX940. We need to clean it.
1533 bool isGFX90A() const {
1534 return AMDGPU::isGFX90A(getSTI());
1535 }
1536
1537 bool isGFX940() const {
1538 return AMDGPU::isGFX940(getSTI());
1539 }
1540
1541 bool isGFX9Plus() const {
1542 return AMDGPU::isGFX9Plus(getSTI());
1543 }
1544
1545 bool isGFX10() const {
1546 return AMDGPU::isGFX10(getSTI());
1547 }
1548
1549 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
1550
1551 bool isGFX11() const {
1552 return AMDGPU::isGFX11(getSTI());
1553 }
1554
1555 bool isGFX11Plus() const {
1556 return AMDGPU::isGFX11Plus(getSTI());
1557 }
1558
1559 bool isGFX1170() const { return AMDGPU::isGFX1170(getSTI()); }
1560
1561 bool isGFX12() const { return AMDGPU::isGFX12(getSTI()); }
1562
1563 bool isGFX12Plus() const { return AMDGPU::isGFX12Plus(getSTI()); }
1564
1565 bool isGFX1250() const { return AMDGPU::isGFX1250(getSTI()); }
1566
1567 bool isGFX1250Plus() const { return AMDGPU::isGFX1250Plus(getSTI()); }
1568
1569 bool isGFX13() const { return AMDGPU::isGFX13(getSTI()); }
1570
1571 bool isGFX13Plus() const { return AMDGPU::isGFX13Plus(getSTI()); }
1572
1573 bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
1574
1575 bool isGFX10_BEncoding() const {
1576 return AMDGPU::isGFX10_BEncoding(getSTI());
1577 }
1578
1579 bool isWave32() const { return getAvailableFeatures()[Feature_isWave32Bit]; }
1580
1581 bool isWave64() const { return getAvailableFeatures()[Feature_isWave64Bit]; }
1582
1583 bool hasInv2PiInlineImm() const {
1584 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
1585 }
1586
1587 bool has64BitLiterals() const {
1588 return getFeatureBits()[AMDGPU::Feature64BitLiterals];
1589 }
1590
1591 bool hasFlatOffsets() const {
1592 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
1593 }
1594
1595 bool hasTrue16Insts() const {
1596 return getFeatureBits()[AMDGPU::FeatureTrue16BitInsts];
1597 }
1598
1599 bool hasArchitectedFlatScratch() const {
1600 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1601 }
1602
1603 bool hasSGPR102_SGPR103() const {
1604 return !isVI() && !isGFX9();
1605 }
1606
1607 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
1608
1609 bool hasIntClamp() const {
1610 return getFeatureBits()[AMDGPU::FeatureIntClamp];
1611 }
1612
1613 bool hasPartialNSAEncoding() const {
1614 return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
1615 }
1616
1617 bool hasGloballyAddressableScratch() const {
1618 return getFeatureBits()[AMDGPU::FeatureGloballyAddressableScratch];
1619 }
1620
1621 unsigned getNSAMaxSize(bool HasSampler = false) const {
1622 return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
1623 }
1624
1625 unsigned getMaxNumUserSGPRs() const {
1626 return AMDGPU::getMaxNumUserSGPRs(getSTI());
1627 }
1628
1629 bool hasKernargPreload() const { return AMDGPU::hasKernargPreload(getSTI()); }
1630
1631 AMDGPUTargetStreamer &getTargetStreamer() {
1632 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
1633 return static_cast<AMDGPUTargetStreamer &>(TS);
1634 }
1635
1636 MCContext &getContext() const {
1637 // We need this const_cast because for some reason getContext() is not const
1638 // in MCAsmParser.
1639 return const_cast<AMDGPUAsmParser *>(this)->MCTargetAsmParser::getContext();
1640 }
1641
1642 const MCRegisterInfo *getMRI() const {
1643 return getContext().getRegisterInfo();
1644 }
1645
1646 const MCInstrInfo *getMII() const {
1647 return &MII;
1648 }
1649
1650 // FIXME: This should not be used. Instead, should use queries derived from
1651 // getAvailableFeatures().
1652 const FeatureBitset &getFeatureBits() const {
1653 return getSTI().getFeatureBits();
1654 }
1655
1656 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
1657 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
1658 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
1659
1660 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
1661 bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
1662 bool isForcedDPP() const { return ForcedDPP; }
1663 bool isForcedSDWA() const { return ForcedSDWA; }
1664 ArrayRef<unsigned> getMatchedVariants() const;
1665 StringRef getMatchedVariantName() const;
1666
1667 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
1668 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1669 bool RestoreOnFailure);
1670 bool parseRegister(MCRegister &Reg, SMLoc &StartLoc, SMLoc &EndLoc) override;
1671 ParseStatus tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
1672 SMLoc &EndLoc) override;
1673 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1674 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
1675 unsigned Kind) override;
1676 bool matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1677 OperandVector &Operands, MCStreamer &Out,
1678 uint64_t &ErrorInfo,
1679 bool MatchingInlineAsm) override;
1680 bool ParseDirective(AsmToken DirectiveID) override;
1681 ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
1682 OperandMode Mode = OperandMode_Default);
1683 StringRef parseMnemonicSuffix(StringRef Name);
1684 bool parseInstruction(ParseInstructionInfo &Info, StringRef Name,
1685 SMLoc NameLoc, OperandVector &Operands) override;
1686 //bool ProcessInstruction(MCInst &Inst);
1687
1688 ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
1689
1690 ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
1691
1692 ParseStatus
1693 parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
1694 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1695 std::function<bool(int64_t &)> ConvertResult = nullptr);
1696
1697 ParseStatus parseOperandArrayWithPrefix(
1698 const char *Prefix, OperandVector &Operands,
1699 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1700 bool (*ConvertResult)(int64_t &) = nullptr);
1701
1702 ParseStatus
1703 parseNamedBit(StringRef Name, OperandVector &Operands,
1704 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
1705 bool IgnoreNegative = false);
1706 unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
1707 ParseStatus parseCPol(OperandVector &Operands);
1708 ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
1709 ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
1710 ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
1711 SMLoc &StringLoc);
1712 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1713 StringRef Name,
1714 ArrayRef<const char *> Ids,
1715 int64_t &IntVal);
1716 ParseStatus parseStringOrIntWithPrefix(OperandVector &Operands,
1717 StringRef Name,
1718 ArrayRef<const char *> Ids,
1719 AMDGPUOperand::ImmTy Type);
1720
1721 bool isModifier();
1722 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1723 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1724 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
1725 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
1726 bool parseSP3NegModifier();
1727 ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false,
1728 LitModifier Lit = LitModifier::None);
1729 ParseStatus parseReg(OperandVector &Operands);
1730 ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false,
1731 LitModifier Lit = LitModifier::None);
1732 ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
1733 bool AllowImm = true);
1734 ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
1735 bool AllowImm = true);
1736 ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
1737 ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
1738 ParseStatus parseVReg32OrOff(OperandVector &Operands);
1739 ParseStatus tryParseIndexKey(OperandVector &Operands,
1740 AMDGPUOperand::ImmTy ImmTy);
1741 ParseStatus parseIndexKey8bit(OperandVector &Operands);
1742 ParseStatus parseIndexKey16bit(OperandVector &Operands);
1743 ParseStatus parseIndexKey32bit(OperandVector &Operands);
1744 ParseStatus tryParseMatrixFMT(OperandVector &Operands, StringRef Name,
1745 AMDGPUOperand::ImmTy Type);
1746 ParseStatus parseMatrixAFMT(OperandVector &Operands);
1747 ParseStatus parseMatrixBFMT(OperandVector &Operands);
1748 ParseStatus tryParseMatrixScale(OperandVector &Operands, StringRef Name,
1749 AMDGPUOperand::ImmTy Type);
1750 ParseStatus parseMatrixAScale(OperandVector &Operands);
1751 ParseStatus parseMatrixBScale(OperandVector &Operands);
1752 ParseStatus tryParseMatrixScaleFmt(OperandVector &Operands, StringRef Name,
1753 AMDGPUOperand::ImmTy Type);
1754 ParseStatus parseMatrixAScaleFmt(OperandVector &Operands);
1755 ParseStatus parseMatrixBScaleFmt(OperandVector &Operands);
1756
1757 ParseStatus parseDfmtNfmt(int64_t &Format);
1758 ParseStatus parseUfmt(int64_t &Format);
1759 ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
1760 int64_t &Format);
1761 ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
1762 int64_t &Format);
1763 ParseStatus parseFORMAT(OperandVector &Operands);
1764 ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
1765 ParseStatus parseNumericFormat(int64_t &Format);
1766 ParseStatus parseFlatOffset(OperandVector &Operands);
1767 ParseStatus parseR128A16(OperandVector &Operands);
1768 ParseStatus parseBLGP(OperandVector &Operands);
1769 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
1770 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
1771
1772 void cvtExp(MCInst &Inst, const OperandVector &Operands);
1773
1774 bool parseCnt(int64_t &IntVal);
1775 ParseStatus parseSWaitCnt(OperandVector &Operands);
1776
1777 bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
1778 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
1779 ParseStatus parseDepCtr(OperandVector &Operands);
1780
1781 bool parseDelay(int64_t &Delay);
1782 ParseStatus parseSDelayALU(OperandVector &Operands);
1783
1784 ParseStatus parseHwreg(OperandVector &Operands);
1785
1786private:
1787 struct OperandInfoTy {
1788 SMLoc Loc;
1789 int64_t Val;
1790 bool IsSymbolic = false;
1791 bool IsDefined = false;
1792
1793 constexpr OperandInfoTy(int64_t Val) : Val(Val) {}
1794 };
1795
1796 struct StructuredOpField : OperandInfoTy {
1797 StringLiteral Id;
1798 StringLiteral Desc;
1799 unsigned Width;
1800 bool IsDefined = false;
1801
1802 constexpr StructuredOpField(StringLiteral Id, StringLiteral Desc,
1803 unsigned Width, int64_t Default)
1804 : OperandInfoTy(Default), Id(Id), Desc(Desc), Width(Width) {}
1805 virtual ~StructuredOpField() = default;
1806
1807 bool Error(AMDGPUAsmParser &Parser, const Twine &Err) const {
1808 Parser.Error(Loc, "invalid " + Desc + ": " + Err);
1809 return false;
1810 }
1811
1812 virtual bool validate(AMDGPUAsmParser &Parser) const {
1813 if (IsSymbolic && Val == OPR_ID_UNSUPPORTED)
1814 return Error(Parser, "not supported on this GPU");
1815 if (!isUIntN(Width, Val))
1816 return Error(Parser, "only " + Twine(Width) + "-bit values are legal");
1817 return true;
1818 }
1819 };
1820
1821 ParseStatus parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields);
1822 bool validateStructuredOpFields(ArrayRef<const StructuredOpField *> Fields);
1823
1824 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
1825 bool validateSendMsg(const OperandInfoTy &Msg,
1826 const OperandInfoTy &Op,
1827 const OperandInfoTy &Stream);
1828
1829 ParseStatus parseHwregFunc(OperandInfoTy &HwReg, OperandInfoTy &Offset,
1830 OperandInfoTy &Width);
1831
1832 static SMLoc getLaterLoc(SMLoc a, SMLoc b);
1833
1834 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
1835 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
1836 SMLoc getBLGPLoc(const OperandVector &Operands) const;
1837
1838 SMLoc getOperandLoc(const OperandVector &Operands, int MCOpIdx) const;
1839 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
1840 const OperandVector &Operands) const;
1841 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type,
1842 const OperandVector &Operands) const;
1843 SMLoc getInstLoc(const OperandVector &Operands) const;
1844
1845 bool validateInstruction(const MCInst &Inst, SMLoc IDLoc,
1846 const OperandVector &Operands);
1847 bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
1848 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
1849 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
1850 bool validateSOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1851 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
1852 std::optional<unsigned> checkVOPDRegBankConstraints(const MCInst &Inst,
1853 bool AsVOPD3);
1854 bool validateVOPD(const MCInst &Inst, const OperandVector &Operands);
1855 bool tryVOPD(const MCInst &Inst);
1856 bool tryVOPD3(const MCInst &Inst);
1857 bool tryAnotherVOPDEncoding(const MCInst &Inst);
1858
1859 bool validateIntClampSupported(const MCInst &Inst);
1860 bool validateMIMGAtomicDMask(const MCInst &Inst);
1861 bool validateMIMGGatherDMask(const MCInst &Inst);
1862 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
1863 bool validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc);
1864 bool validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc);
1865 bool validateMIMGD16(const MCInst &Inst);
1866 bool validateMIMGDim(const MCInst &Inst, const OperandVector &Operands);
1867 bool validateTensorR128(const MCInst &Inst);
1868 bool validateMIMGMSAA(const MCInst &Inst);
1869 bool validateOpSel(const MCInst &Inst);
1870 bool validateTrue16OpSel(const MCInst &Inst);
1871 bool validateNeg(const MCInst &Inst, AMDGPU::OpName OpName);
1872 bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
1873 bool validateVccOperand(MCRegister Reg) const;
1874 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
1875 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
1876 bool validateMAISrc2(const MCInst &Inst, const OperandVector &Operands);
1877 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
1878 bool validateAGPRLdSt(const MCInst &Inst) const;
1879 bool validateVGPRAlign(const MCInst &Inst) const;
1880 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands);
1881 bool validateDS(const MCInst &Inst, const OperandVector &Operands);
1882 bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
1883 bool validateDivScale(const MCInst &Inst);
1884 bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
1885 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
1886 SMLoc IDLoc);
1887 bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
1888 const unsigned CPol);
1889 bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
1890 bool validateLdsDirect(const MCInst &Inst, const OperandVector &Operands);
1891 bool validateWMMA(const MCInst &Inst, const OperandVector &Operands);
1892 unsigned getConstantBusLimit(unsigned Opcode) const;
1893 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
1894 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
1895 MCRegister findImplicitSGPRReadInVOP(const MCInst &Inst) const;
1896
1897 bool isSupportedMnemo(StringRef Mnemo,
1898 const FeatureBitset &FBS);
1899 bool isSupportedMnemo(StringRef Mnemo,
1900 const FeatureBitset &FBS,
1901 ArrayRef<unsigned> Variants);
1902 bool checkUnsupportedInstruction(StringRef Name, SMLoc IDLoc);
1903
1904 bool isId(const StringRef Id) const;
1905 bool isId(const AsmToken &Token, const StringRef Id) const;
1906 bool isToken(const AsmToken::TokenKind Kind) const;
1907 StringRef getId() const;
1908 bool trySkipId(const StringRef Id);
1909 bool trySkipId(const StringRef Pref, const StringRef Id);
1910 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
1911 bool trySkipToken(const AsmToken::TokenKind Kind);
1912 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
1913 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
1914 bool parseId(StringRef &Val, const StringRef ErrMsg = "");
1915
1916 void peekTokens(MutableArrayRef<AsmToken> Tokens);
1917 AsmToken::TokenKind getTokenKind() const;
1918 bool parseExpr(int64_t &Imm, StringRef Expected = "");
1919 bool parseExpr(OperandVector &Operands);
1920 StringRef getTokenStr() const;
1921 AsmToken peekToken(bool ShouldSkipSpace = true);
1922 AsmToken getToken() const;
1923 SMLoc getLoc() const;
1924 void lex();
1925
1926public:
1927 void onBeginOfFile() override;
1928 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1929
1930 ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
1931
1932 ParseStatus parseExpTgt(OperandVector &Operands);
1933 ParseStatus parseSendMsg(OperandVector &Operands);
1934 ParseStatus parseWaitEvent(OperandVector &Operands);
1935 ParseStatus parseInterpSlot(OperandVector &Operands);
1936 ParseStatus parseInterpAttr(OperandVector &Operands);
1937 ParseStatus parseSOPPBrTarget(OperandVector &Operands);
1938 ParseStatus parseBoolReg(OperandVector &Operands);
1939
1940 bool parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
1941 const unsigned MaxVal, const Twine &ErrMsg,
1942 SMLoc &Loc);
1943 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
1944 const unsigned MinVal,
1945 const unsigned MaxVal,
1946 const StringRef ErrMsg);
1947 ParseStatus parseSwizzle(OperandVector &Operands);
1948 bool parseSwizzleOffset(int64_t &Imm);
1949 bool parseSwizzleMacro(int64_t &Imm);
1950 bool parseSwizzleQuadPerm(int64_t &Imm);
1951 bool parseSwizzleBitmaskPerm(int64_t &Imm);
1952 bool parseSwizzleBroadcast(int64_t &Imm);
1953 bool parseSwizzleSwap(int64_t &Imm);
1954 bool parseSwizzleReverse(int64_t &Imm);
1955 bool parseSwizzleFFT(int64_t &Imm);
1956 bool parseSwizzleRotate(int64_t &Imm);
1957
1958 ParseStatus parseGPRIdxMode(OperandVector &Operands);
1959 int64_t parseGPRIdxMacro();
1960
1961 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
1962 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
1963
1964 ParseStatus parseOModSI(OperandVector &Operands);
1965
1966 void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
1967 OptionalImmIndexMap &OptionalIdx);
1968 void cvtScaledMFMA(MCInst &Inst, const OperandVector &Operands);
1969 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
1970 void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
1971 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
1972 void cvtSWMMAC(MCInst &Inst, const OperandVector &Operands);
1973
1974 void cvtVOPD(MCInst &Inst, const OperandVector &Operands);
1975 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
1976 OptionalImmIndexMap &OptionalIdx);
1977 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
1978 OptionalImmIndexMap &OptionalIdx);
1979
1980 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
1981 void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
1982 void cvtOpSelHelper(MCInst &Inst, unsigned OpSel);
1983
1984 bool parseDimId(unsigned &Encoding);
1985 ParseStatus parseDim(OperandVector &Operands);
1986 bool convertDppBoundCtrl(int64_t &BoundCtrl);
1987 ParseStatus parseDPP8(OperandVector &Operands);
1988 ParseStatus parseDPPCtrl(OperandVector &Operands);
1989 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
1990 int64_t parseDPPCtrlSel(StringRef Ctrl);
1991 int64_t parseDPPCtrlPerm();
1992 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
1993 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
1994 cvtDPP(Inst, Operands, true);
1995 }
1996 void cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
1997 bool IsDPP8 = false);
1998 void cvtVOP3DPP8(MCInst &Inst, const OperandVector &Operands) {
1999 cvtVOP3DPP(Inst, Operands, true);
2000 }
2001
2002 ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
2003 AMDGPUOperand::ImmTy Type);
2004 ParseStatus parseSDWADstUnused(OperandVector &Operands);
2005 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
2006 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
2007 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
2008 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
2009 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
2010 void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
2011 uint64_t BasicInstType,
2012 bool SkipDstVcc = false,
2013 bool SkipSrcVcc = false);
2014
2015 ParseStatus parseEndpgm(OperandVector &Operands);
2016
2017 ParseStatus parseVOPD(OperandVector &Operands);
2018};
2019
2020} // end anonymous namespace
2021
2022// May be called with integer type with equivalent bitwidth.
2023static const fltSemantics *getFltSemantics(unsigned Size) {
2024 switch (Size) {
2025 case 4:
2026 return &APFloat::IEEEsingle();
2027 case 8:
2028 return &APFloat::IEEEdouble();
2029 case 2:
2030 return &APFloat::IEEEhalf();
2031 default:
2032 llvm_unreachable("unsupported fp type");
2033 }
2034}
2035
2037 return getFltSemantics(VT.getSizeInBits() / 8);
2038}
2039
2041 switch (OperandType) {
2042 // When floating-point immediate is used as operand of type i16, the 32-bit
2043 // representation of the constant truncated to the 16 LSBs should be used.
2058 return &APFloat::IEEEsingle();
2065 return &APFloat::IEEEdouble();
2073 return &APFloat::IEEEhalf();
2078 return &APFloat::BFloat();
2079 default:
2080 llvm_unreachable("unsupported fp type");
2081 }
2082}
2083
2084//===----------------------------------------------------------------------===//
2085// Operand
2086//===----------------------------------------------------------------------===//
2087
2088static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
2089 bool Lost;
2090
2091 // Convert literal to single precision
2094 &Lost);
2095 // We allow precision lost but not overflow or underflow
2096 if (Status != APFloat::opOK &&
2097 Lost &&
2098 ((Status & APFloat::opOverflow) != 0 ||
2099 (Status & APFloat::opUnderflow) != 0)) {
2100 return false;
2101 }
2102
2103 return true;
2104}
2105
2106static bool isSafeTruncation(int64_t Val, unsigned Size) {
2107 return isUIntN(Size, Val) || isIntN(Size, Val);
2108}
2109
2110static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
2111 if (VT.getScalarType() == MVT::i16)
2112 return isInlinableLiteral32(Val, HasInv2Pi);
2113
2114 if (VT.getScalarType() == MVT::f16)
2115 return AMDGPU::isInlinableLiteralFP16(Val, HasInv2Pi);
2116
2117 assert(VT.getScalarType() == MVT::bf16);
2118
2119 return AMDGPU::isInlinableLiteralBF16(Val, HasInv2Pi);
2120}
2121
2122bool AMDGPUOperand::isInlinableImm(MVT type) const {
2123
2124 // This is a hack to enable named inline values like
2125 // shared_base with both 32-bit and 64-bit operands.
2126 // Note that these values are defined as
2127 // 32-bit operands only.
2128 if (isInlineValue()) {
2129 return true;
2130 }
2131
2132 if (!isImmTy(ImmTyNone)) {
2133 // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
2134 return false;
2135 }
2136
2137 if (getModifiers().Lit != LitModifier::None)
2138 return false;
2139
2140 // TODO: We should avoid using host float here. It would be better to
2141 // check the float bit values which is what a few other places do.
2142 // We've had bot failures before due to weird NaN support on mips hosts.
2143
2144 APInt Literal(64, Imm.Val);
2145
2146 if (Imm.IsFPImm) { // We got fp literal token
2147 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2149 AsmParser->hasInv2PiInlineImm());
2150 }
2151
2152 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2153 if (!canLosslesslyConvertToFPType(FPLiteral, type))
2154 return false;
2155
2156 if (type.getScalarSizeInBits() == 16) {
2157 bool Lost = false;
2158 switch (type.getScalarType().SimpleTy) {
2159 default:
2160 llvm_unreachable("unknown 16-bit type");
2161 case MVT::bf16:
2162 FPLiteral.convert(APFloatBase::BFloat(), APFloat::rmNearestTiesToEven,
2163 &Lost);
2164 break;
2165 case MVT::f16:
2166 FPLiteral.convert(APFloatBase::IEEEhalf(), APFloat::rmNearestTiesToEven,
2167 &Lost);
2168 break;
2169 case MVT::i16:
2170 FPLiteral.convert(APFloatBase::IEEEsingle(),
2171 APFloat::rmNearestTiesToEven, &Lost);
2172 break;
2173 }
2174 // We need to use 32-bit representation here because when a floating-point
2175 // inline constant is used as an i16 operand, its 32-bit representation
2176 // representation will be used. We will need the 32-bit value to check if
2177 // it is FP inline constant.
2178 uint32_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
2179 return isInlineableLiteralOp16(ImmVal, type,
2180 AsmParser->hasInv2PiInlineImm());
2181 }
2182
2183 // Check if single precision literal is inlinable
2185 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
2186 AsmParser->hasInv2PiInlineImm());
2187 }
2188
2189 // We got int literal token.
2190 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
2192 AsmParser->hasInv2PiInlineImm());
2193 }
2194
2195 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
2196 return false;
2197 }
2198
2199 if (type.getScalarSizeInBits() == 16) {
2201 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
2202 type, AsmParser->hasInv2PiInlineImm());
2203 }
2204
2206 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
2207 AsmParser->hasInv2PiInlineImm());
2208}
2209
2210bool AMDGPUOperand::isLiteralImm(MVT type) const {
2211 // Check that this immediate can be added as literal
2212 if (!isImmTy(ImmTyNone)) {
2213 return false;
2214 }
2215
2216 bool Allow64Bit =
2217 (type == MVT::i64 || type == MVT::f64) && AsmParser->has64BitLiterals();
2218
2219 if (!Imm.IsFPImm) {
2220 // We got int literal token.
2221
2222 if (type == MVT::f64 && hasFPModifiers()) {
2223 // Cannot apply fp modifiers to int literals preserving the same semantics
2224 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
2225 // disable these cases.
2226 return false;
2227 }
2228
2229 unsigned Size = type.getSizeInBits();
2230 if (Size == 64) {
2231 if (Allow64Bit && !AMDGPU::isValid32BitLiteral(Imm.Val, false))
2232 return true;
2233 Size = 32;
2234 }
2235
2236 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
2237 // types.
2238 return isSafeTruncation(Imm.Val, Size);
2239 }
2240
2241 // We got fp literal token
2242 if (type == MVT::f64) { // Expected 64-bit fp operand
2243 // We would set low 64-bits of literal to zeroes but we accept this literals
2244 return true;
2245 }
2246
2247 if (type == MVT::i64) { // Expected 64-bit int operand
2248 // We don't allow fp literals in 64-bit integer instructions. It is
2249 // unclear how we should encode them.
2250 return false;
2251 }
2252
2253 // We allow fp literals with f16x2 operands assuming that the specified
2254 // literal goes into the lower half and the upper half is zero. We also
2255 // require that the literal may be losslessly converted to f16.
2256 //
2257 // For i16x2 operands, we assume that the specified literal is encoded as a
2258 // single-precision float. This is pretty odd, but it matches SP3 and what
2259 // happens in hardware.
2260 MVT ExpectedType = (type == MVT::v2f16) ? MVT::f16
2261 : (type == MVT::v2i16) ? MVT::f32
2262 : (type == MVT::v2f32) ? MVT::f32
2263 : type;
2264
2265 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
2266 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
2267}
2268
2269bool AMDGPUOperand::isRegClass(unsigned RCID) const {
2270 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
2271}
2272
2273bool AMDGPUOperand::isVRegWithInputMods() const {
2274 return isRegClass(AMDGPU::VGPR_32RegClassID) ||
2275 // GFX90A allows DPP on 64-bit operands.
2276 (isRegClass(AMDGPU::VReg_64RegClassID) &&
2277 AsmParser->getFeatureBits()[AMDGPU::FeatureDPALU_DPP]);
2278}
2279
2280template <bool IsFake16>
2281bool AMDGPUOperand::isT16_Lo128VRegWithInputMods() const {
2282 return isRegClass(IsFake16 ? AMDGPU::VGPR_32_Lo128RegClassID
2283 : AMDGPU::VGPR_16_Lo128RegClassID);
2284}
2285
2286template <bool IsFake16> bool AMDGPUOperand::isT16VRegWithInputMods() const {
2287 return isRegClass(IsFake16 ? AMDGPU::VGPR_32RegClassID
2288 : AMDGPU::VGPR_16RegClassID);
2289}
2290
2291bool AMDGPUOperand::isSDWAOperand(MVT type) const {
2292 if (AsmParser->isVI())
2293 return isVReg32();
2294 if (AsmParser->isGFX9Plus())
2295 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
2296 return false;
2297}
2298
2299bool AMDGPUOperand::isSDWAFP16Operand() const {
2300 return isSDWAOperand(MVT::f16);
2301}
2302
2303bool AMDGPUOperand::isSDWAFP32Operand() const {
2304 return isSDWAOperand(MVT::f32);
2305}
2306
2307bool AMDGPUOperand::isSDWAInt16Operand() const {
2308 return isSDWAOperand(MVT::i16);
2309}
2310
2311bool AMDGPUOperand::isSDWAInt32Operand() const {
2312 return isSDWAOperand(MVT::i32);
2313}
2314
2315bool AMDGPUOperand::isBoolReg() const {
2316 return isReg() && ((AsmParser->isWave64() && isSCSrc_b64()) ||
2317 (AsmParser->isWave32() && isSCSrc_b32()));
2318}
2319
2320uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
2321{
2322 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2323 assert(Size == 2 || Size == 4 || Size == 8);
2324
2325 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
2326
2327 if (Imm.Mods.Abs) {
2328 Val &= ~FpSignMask;
2329 }
2330 if (Imm.Mods.Neg) {
2331 Val ^= FpSignMask;
2332 }
2333
2334 return Val;
2335}
2336
2337void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
2338 MCOpIdx = Inst.getNumOperands();
2339
2340 if (isExpr()) {
2342 return;
2343 }
2344
2345 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
2346 Inst.getNumOperands())) {
2347 addLiteralImmOperand(Inst, Imm.Val,
2348 ApplyModifiers &
2349 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
2350 } else {
2351 assert(!isImmTy(ImmTyNone) || !hasModifiers());
2353 }
2354}
2355
2356void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
2357 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
2358 auto OpNum = Inst.getNumOperands();
2359 // Check that this operand accepts literals
2360 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
2361
2362 if (ApplyModifiers) {
2363 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
2364 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
2365 Val = applyInputFPModifiers(Val, Size);
2366 }
2367
2368 APInt Literal(64, Val);
2369 uint8_t OpTy = InstDesc.operands()[OpNum].OperandType;
2370
2371 bool CanUse64BitLiterals =
2372 AsmParser->has64BitLiterals() &&
2373 !(InstDesc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P));
2374 LitModifier Lit = getModifiers().Lit;
2375 MCContext &Ctx = AsmParser->getContext();
2376
2377 if (Imm.IsFPImm) { // We got fp literal token
2378 switch (OpTy) {
2384 if (Lit == LitModifier::None &&
2386 AsmParser->hasInv2PiInlineImm())) {
2387 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
2388 return;
2389 }
2390
2391 // Non-inlineable
2392 if (AMDGPU::isSISrcFPOperand(InstDesc,
2393 OpNum)) { // Expected 64-bit fp operand
2394 bool HasMandatoryLiteral =
2395 AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::imm);
2396 // For fp operands we check if low 32 bits are zeros
2397 if (Literal.getLoBits(32) != 0 &&
2398 (InstDesc.getSize() != 4 || !AsmParser->has64BitLiterals()) &&
2399 !HasMandatoryLiteral) {
2400 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(
2401 Inst.getLoc(),
2402 "Can't encode literal as exact 64-bit floating-point operand. "
2403 "Low 32-bits will be set to zero");
2404 Val &= 0xffffffff00000000u;
2405 }
2406
2407 if ((OpTy == AMDGPU::OPERAND_REG_IMM_FP64 ||
2410 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2411 (isInt<32>(Val) || isUInt<32>(Val))) {
2412 // The floating-point operand will be verbalized as an
2413 // integer one. If that integer happens to fit 32 bits, on
2414 // re-assembling it will be intepreted as the high half of
2415 // the actual value, so we have to wrap it into lit64().
2416 Lit = LitModifier::Lit64;
2417 } else if (Lit == LitModifier::Lit) {
2418 // For FP64 operands lit() specifies the high half of the value.
2419 Val = Hi_32(Val);
2420 }
2421 }
2422 break;
2423 }
2424
2425 // We don't allow fp literals in 64-bit integer instructions. It is
2426 // unclear how we should encode them. This case should be checked earlier
2427 // in predicate methods (isLiteralImm())
2428 llvm_unreachable("fp literal in 64-bit integer instruction.");
2429
2431 if (CanUse64BitLiterals && Lit == LitModifier::None &&
2432 (isInt<32>(Val) || isUInt<32>(Val)))
2433 Lit = LitModifier::Lit64;
2434 break;
2435
2440 if (Lit == LitModifier::None && AsmParser->hasInv2PiInlineImm() &&
2441 Literal == 0x3fc45f306725feed) {
2442 // This is the 1/(2*pi) which is going to be truncated to bf16 with the
2443 // loss of precision. The constant represents ideomatic fp32 value of
2444 // 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2445 // bits. Prevent rounding below.
2446 Inst.addOperand(MCOperand::createImm(0x3e22));
2447 return;
2448 }
2449 [[fallthrough]];
2450
2472 bool lost;
2473 APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
2474 // Convert literal to single precision
2475 FPLiteral.convert(*getOpFltSemantics(OpTy),
2476 APFloat::rmNearestTiesToEven, &lost);
2477 // We allow precision lost but not overflow or underflow. This should be
2478 // checked earlier in isLiteralImm()
2479
2480 Val = FPLiteral.bitcastToAPInt().getZExtValue();
2481 break;
2482 }
2483 default:
2484 llvm_unreachable("invalid operand size");
2485 }
2486
2487 if (Lit != LitModifier::None) {
2488 Inst.addOperand(
2490 } else {
2492 }
2493 return;
2494 }
2495
2496 // We got int literal token.
2497 // Only sign extend inline immediates.
2498 switch (OpTy) {
2513 break;
2514
2517 if (Lit == LitModifier::None &&
2518 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2520 return;
2521 }
2522
2523 // When the 32 MSBs are not zero (effectively means it can't be safely
2524 // truncated to uint32_t), if the target doesn't support 64-bit literals, or
2525 // the lit modifier is explicitly used, we need to truncate it to the 32
2526 // LSBs.
2527 if (!AsmParser->has64BitLiterals() || Lit == LitModifier::Lit)
2528 Val = Lo_32(Val);
2529 break;
2530
2534 if (Lit == LitModifier::None &&
2535 AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
2537 return;
2538 }
2539
2540 // If the target doesn't support 64-bit literals, we need to use the
2541 // constant as the high 32 MSBs of a double-precision floating point value.
2542 if (!AsmParser->has64BitLiterals()) {
2543 Val = static_cast<uint64_t>(Val) << 32;
2544 } else {
2545 // Now the target does support 64-bit literals, there are two cases
2546 // where we still want to use src_literal encoding:
2547 // 1) explicitly forced by using lit modifier;
2548 // 2) the value is a valid 32-bit representation (signed or unsigned),
2549 // meanwhile not forced by lit64 modifier.
2550 if (Lit == LitModifier::Lit ||
2551 (Lit != LitModifier::Lit64 && (isInt<32>(Val) || isUInt<32>(Val))))
2552 Val = static_cast<uint64_t>(Val) << 32;
2553 }
2554
2555 // For FP64 operands lit() specifies the high half of the value.
2556 if (Lit == LitModifier::Lit)
2557 Val = Hi_32(Val);
2558 break;
2559
2571 break;
2572
2574 if ((isInt<32>(Val) || isUInt<32>(Val)) && Lit != LitModifier::Lit64)
2575 Val <<= 32;
2576 break;
2577
2578 default:
2579 llvm_unreachable("invalid operand type");
2580 }
2581
2582 if (Lit != LitModifier::None) {
2583 Inst.addOperand(
2585 } else {
2587 }
2588}
2589
2590void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
2591 MCOpIdx = Inst.getNumOperands();
2592 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
2593}
2594
2595bool AMDGPUOperand::isInlineValue() const {
2596 return isRegKind() && ::isInlineValue(getReg());
2597}
2598
2599//===----------------------------------------------------------------------===//
2600// AsmParser
2601//===----------------------------------------------------------------------===//
2602
2603void AMDGPUAsmParser::createConstantSymbol(StringRef Id, int64_t Val) {
2604 // TODO: make those pre-defined variables read-only.
2605 // Currently there is none suitable machinery in the core llvm-mc for this.
2606 // MCSymbol::isRedefinable is intended for another purpose, and
2607 // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
2608 MCContext &Ctx = getContext();
2609 MCSymbol *Sym = Ctx.getOrCreateSymbol(Id);
2611}
2612
2613static int getRegClass(RegisterKind Is, unsigned RegWidth) {
2614 if (Is == IS_VGPR) {
2615 switch (RegWidth) {
2616 default: return -1;
2617 case 32:
2618 return AMDGPU::VGPR_32RegClassID;
2619 case 64:
2620 return AMDGPU::VReg_64RegClassID;
2621 case 96:
2622 return AMDGPU::VReg_96RegClassID;
2623 case 128:
2624 return AMDGPU::VReg_128RegClassID;
2625 case 160:
2626 return AMDGPU::VReg_160RegClassID;
2627 case 192:
2628 return AMDGPU::VReg_192RegClassID;
2629 case 224:
2630 return AMDGPU::VReg_224RegClassID;
2631 case 256:
2632 return AMDGPU::VReg_256RegClassID;
2633 case 288:
2634 return AMDGPU::VReg_288RegClassID;
2635 case 320:
2636 return AMDGPU::VReg_320RegClassID;
2637 case 352:
2638 return AMDGPU::VReg_352RegClassID;
2639 case 384:
2640 return AMDGPU::VReg_384RegClassID;
2641 case 512:
2642 return AMDGPU::VReg_512RegClassID;
2643 case 1024:
2644 return AMDGPU::VReg_1024RegClassID;
2645 }
2646 } else if (Is == IS_TTMP) {
2647 switch (RegWidth) {
2648 default: return -1;
2649 case 32:
2650 return AMDGPU::TTMP_32RegClassID;
2651 case 64:
2652 return AMDGPU::TTMP_64RegClassID;
2653 case 128:
2654 return AMDGPU::TTMP_128RegClassID;
2655 case 256:
2656 return AMDGPU::TTMP_256RegClassID;
2657 case 512:
2658 return AMDGPU::TTMP_512RegClassID;
2659 }
2660 } else if (Is == IS_SGPR) {
2661 switch (RegWidth) {
2662 default: return -1;
2663 case 32:
2664 return AMDGPU::SGPR_32RegClassID;
2665 case 64:
2666 return AMDGPU::SGPR_64RegClassID;
2667 case 96:
2668 return AMDGPU::SGPR_96RegClassID;
2669 case 128:
2670 return AMDGPU::SGPR_128RegClassID;
2671 case 160:
2672 return AMDGPU::SGPR_160RegClassID;
2673 case 192:
2674 return AMDGPU::SGPR_192RegClassID;
2675 case 224:
2676 return AMDGPU::SGPR_224RegClassID;
2677 case 256:
2678 return AMDGPU::SGPR_256RegClassID;
2679 case 288:
2680 return AMDGPU::SGPR_288RegClassID;
2681 case 320:
2682 return AMDGPU::SGPR_320RegClassID;
2683 case 352:
2684 return AMDGPU::SGPR_352RegClassID;
2685 case 384:
2686 return AMDGPU::SGPR_384RegClassID;
2687 case 512:
2688 return AMDGPU::SGPR_512RegClassID;
2689 }
2690 } else if (Is == IS_AGPR) {
2691 switch (RegWidth) {
2692 default: return -1;
2693 case 32:
2694 return AMDGPU::AGPR_32RegClassID;
2695 case 64:
2696 return AMDGPU::AReg_64RegClassID;
2697 case 96:
2698 return AMDGPU::AReg_96RegClassID;
2699 case 128:
2700 return AMDGPU::AReg_128RegClassID;
2701 case 160:
2702 return AMDGPU::AReg_160RegClassID;
2703 case 192:
2704 return AMDGPU::AReg_192RegClassID;
2705 case 224:
2706 return AMDGPU::AReg_224RegClassID;
2707 case 256:
2708 return AMDGPU::AReg_256RegClassID;
2709 case 288:
2710 return AMDGPU::AReg_288RegClassID;
2711 case 320:
2712 return AMDGPU::AReg_320RegClassID;
2713 case 352:
2714 return AMDGPU::AReg_352RegClassID;
2715 case 384:
2716 return AMDGPU::AReg_384RegClassID;
2717 case 512:
2718 return AMDGPU::AReg_512RegClassID;
2719 case 1024:
2720 return AMDGPU::AReg_1024RegClassID;
2721 }
2722 }
2723 return -1;
2724}
2725
2728 .Case("exec", AMDGPU::EXEC)
2729 .Case("vcc", AMDGPU::VCC)
2730 .Case("flat_scratch", AMDGPU::FLAT_SCR)
2731 .Case("xnack_mask", AMDGPU::XNACK_MASK)
2732 .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
2733 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
2734 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2735 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
2736 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
2737 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
2738 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2739 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
2740 .Case("src_flat_scratch_base_lo", AMDGPU::SRC_FLAT_SCRATCH_BASE_LO)
2741 .Case("src_flat_scratch_base_hi", AMDGPU::SRC_FLAT_SCRATCH_BASE_HI)
2742 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2743 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
2744 .Case("lds_direct", AMDGPU::LDS_DIRECT)
2745 .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
2746 .Case("m0", AMDGPU::M0)
2747 .Case("vccz", AMDGPU::SRC_VCCZ)
2748 .Case("src_vccz", AMDGPU::SRC_VCCZ)
2749 .Case("execz", AMDGPU::SRC_EXECZ)
2750 .Case("src_execz", AMDGPU::SRC_EXECZ)
2751 .Case("scc", AMDGPU::SRC_SCC)
2752 .Case("src_scc", AMDGPU::SRC_SCC)
2753 .Case("tba", AMDGPU::TBA)
2754 .Case("tma", AMDGPU::TMA)
2755 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
2756 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
2757 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
2758 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
2759 .Case("vcc_lo", AMDGPU::VCC_LO)
2760 .Case("vcc_hi", AMDGPU::VCC_HI)
2761 .Case("exec_lo", AMDGPU::EXEC_LO)
2762 .Case("exec_hi", AMDGPU::EXEC_HI)
2763 .Case("tma_lo", AMDGPU::TMA_LO)
2764 .Case("tma_hi", AMDGPU::TMA_HI)
2765 .Case("tba_lo", AMDGPU::TBA_LO)
2766 .Case("tba_hi", AMDGPU::TBA_HI)
2767 .Case("pc", AMDGPU::PC_REG)
2768 .Case("null", AMDGPU::SGPR_NULL)
2769 .Default(AMDGPU::NoRegister);
2770}
2771
2772bool AMDGPUAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
2773 SMLoc &EndLoc, bool RestoreOnFailure) {
2774 auto R = parseRegister();
2775 if (!R) return true;
2776 assert(R->isReg());
2777 RegNo = R->getReg();
2778 StartLoc = R->getStartLoc();
2779 EndLoc = R->getEndLoc();
2780 return false;
2781}
2782
2783bool AMDGPUAsmParser::parseRegister(MCRegister &Reg, SMLoc &StartLoc,
2784 SMLoc &EndLoc) {
2785 return ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
2786}
2787
2788ParseStatus AMDGPUAsmParser::tryParseRegister(MCRegister &Reg, SMLoc &StartLoc,
2789 SMLoc &EndLoc) {
2790 bool Result = ParseRegister(Reg, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
2791 bool PendingErrors = getParser().hasPendingError();
2792 getParser().clearPendingErrors();
2793 if (PendingErrors)
2794 return ParseStatus::Failure;
2795 if (Result)
2796 return ParseStatus::NoMatch;
2797 return ParseStatus::Success;
2798}
2799
2800bool AMDGPUAsmParser::AddNextRegisterToList(MCRegister &Reg, unsigned &RegWidth,
2801 RegisterKind RegKind,
2802 MCRegister Reg1, SMLoc Loc) {
2803 switch (RegKind) {
2804 case IS_SPECIAL:
2805 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
2806 Reg = AMDGPU::EXEC;
2807 RegWidth = 64;
2808 return true;
2809 }
2810 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
2811 Reg = AMDGPU::FLAT_SCR;
2812 RegWidth = 64;
2813 return true;
2814 }
2815 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
2816 Reg = AMDGPU::XNACK_MASK;
2817 RegWidth = 64;
2818 return true;
2819 }
2820 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
2821 Reg = AMDGPU::VCC;
2822 RegWidth = 64;
2823 return true;
2824 }
2825 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
2826 Reg = AMDGPU::TBA;
2827 RegWidth = 64;
2828 return true;
2829 }
2830 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
2831 Reg = AMDGPU::TMA;
2832 RegWidth = 64;
2833 return true;
2834 }
2835 Error(Loc, "register does not fit in the list");
2836 return false;
2837 case IS_VGPR:
2838 case IS_SGPR:
2839 case IS_AGPR:
2840 case IS_TTMP:
2841 if (Reg1 != Reg + RegWidth / 32) {
2842 Error(Loc, "registers in a list must have consecutive indices");
2843 return false;
2844 }
2845 RegWidth += 32;
2846 return true;
2847 default:
2848 llvm_unreachable("unexpected register kind");
2849 }
2850}
2851
2852struct RegInfo {
2854 RegisterKind Kind;
2855};
2856
2857static constexpr RegInfo RegularRegisters[] = {
2858 {{"v"}, IS_VGPR},
2859 {{"s"}, IS_SGPR},
2860 {{"ttmp"}, IS_TTMP},
2861 {{"acc"}, IS_AGPR},
2862 {{"a"}, IS_AGPR},
2863};
2864
2865static bool isRegularReg(RegisterKind Kind) {
2866 return Kind == IS_VGPR ||
2867 Kind == IS_SGPR ||
2868 Kind == IS_TTMP ||
2869 Kind == IS_AGPR;
2870}
2871
2873 for (const RegInfo &Reg : RegularRegisters)
2874 if (Str.starts_with(Reg.Name))
2875 return &Reg;
2876 return nullptr;
2877}
2878
2879static bool getRegNum(StringRef Str, unsigned& Num) {
2880 return !Str.getAsInteger(10, Num);
2881}
2882
2883bool
2884AMDGPUAsmParser::isRegister(const AsmToken &Token,
2885 const AsmToken &NextToken) const {
2886
2887 // A list of consecutive registers: [s0,s1,s2,s3]
2888 if (Token.is(AsmToken::LBrac))
2889 return true;
2890
2891 if (!Token.is(AsmToken::Identifier))
2892 return false;
2893
2894 // A single register like s0 or a range of registers like s[0:1]
2895
2896 StringRef Str = Token.getString();
2897 const RegInfo *Reg = getRegularRegInfo(Str);
2898 if (Reg) {
2899 StringRef RegName = Reg->Name;
2900 StringRef RegSuffix = Str.substr(RegName.size());
2901 if (!RegSuffix.empty()) {
2902 RegSuffix.consume_back(".l");
2903 RegSuffix.consume_back(".h");
2904 unsigned Num;
2905 // A single register with an index: rXX
2906 if (getRegNum(RegSuffix, Num))
2907 return true;
2908 } else {
2909 // A range of registers: r[XX:YY].
2910 if (NextToken.is(AsmToken::LBrac))
2911 return true;
2912 }
2913 }
2914
2915 return getSpecialRegForName(Str).isValid();
2916}
2917
2918bool
2919AMDGPUAsmParser::isRegister()
2920{
2921 return isRegister(getToken(), peekToken());
2922}
2923
2924MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
2925 unsigned SubReg, unsigned RegWidth,
2926 SMLoc Loc) {
2927 assert(isRegularReg(RegKind));
2928
2929 unsigned AlignSize = 1;
2930 if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
2931 // SGPR and TTMP registers must be aligned.
2932 // Max required alignment is 4 dwords.
2933 AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
2934 }
2935
2936 if (RegNum % AlignSize != 0) {
2937 Error(Loc, "invalid register alignment");
2938 return MCRegister();
2939 }
2940
2941 unsigned RegIdx = RegNum / AlignSize;
2942 int RCID = getRegClass(RegKind, RegWidth);
2943 if (RCID == -1) {
2944 Error(Loc, "invalid or unsupported register size");
2945 return MCRegister();
2946 }
2947
2948 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
2949 const MCRegisterClass RC = TRI->getRegClass(RCID);
2950 if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
2951 Error(Loc, "register index is out of range");
2952 return AMDGPU::NoRegister;
2953 }
2954
2955 if (RegKind == IS_VGPR && !isGFX1250Plus() && RegIdx + RegWidth / 32 > 256) {
2956 Error(Loc, "register index is out of range");
2957 return MCRegister();
2958 }
2959
2960 MCRegister Reg = RC.getRegister(RegIdx);
2961
2962 if (SubReg) {
2963 Reg = TRI->getSubReg(Reg, SubReg);
2964
2965 // Currently all regular registers have their .l and .h subregisters, so
2966 // we should never need to generate an error here.
2967 assert(Reg && "Invalid subregister!");
2968 }
2969
2970 return Reg;
2971}
2972
2973bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth,
2974 unsigned &SubReg) {
2975 int64_t RegLo, RegHi;
2976 if (!skipToken(AsmToken::LBrac, "missing register index"))
2977 return false;
2978
2979 SMLoc FirstIdxLoc = getLoc();
2980 SMLoc SecondIdxLoc;
2981
2982 if (!parseExpr(RegLo))
2983 return false;
2984
2985 if (trySkipToken(AsmToken::Colon)) {
2986 SecondIdxLoc = getLoc();
2987 if (!parseExpr(RegHi))
2988 return false;
2989 } else {
2990 RegHi = RegLo;
2991 }
2992
2993 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
2994 return false;
2995
2996 if (!isUInt<32>(RegLo)) {
2997 Error(FirstIdxLoc, "invalid register index");
2998 return false;
2999 }
3000
3001 if (!isUInt<32>(RegHi)) {
3002 Error(SecondIdxLoc, "invalid register index");
3003 return false;
3004 }
3005
3006 if (RegLo > RegHi) {
3007 Error(FirstIdxLoc, "first register index should not exceed second index");
3008 return false;
3009 }
3010
3011 if (RegHi == RegLo) {
3012 StringRef RegSuffix = getTokenStr();
3013 if (RegSuffix == ".l") {
3014 SubReg = AMDGPU::lo16;
3015 lex();
3016 } else if (RegSuffix == ".h") {
3017 SubReg = AMDGPU::hi16;
3018 lex();
3019 }
3020 }
3021
3022 Num = static_cast<unsigned>(RegLo);
3023 RegWidth = 32 * ((RegHi - RegLo) + 1);
3024
3025 return true;
3026}
3027
3028MCRegister AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
3029 unsigned &RegNum,
3030 unsigned &RegWidth,
3031 SmallVectorImpl<AsmToken> &Tokens) {
3032 assert(isToken(AsmToken::Identifier));
3033 MCRegister Reg = getSpecialRegForName(getTokenStr());
3034 if (Reg) {
3035 RegNum = 0;
3036 RegWidth = 32;
3037 RegKind = IS_SPECIAL;
3038 Tokens.push_back(getToken());
3039 lex(); // skip register name
3040 }
3041 return Reg;
3042}
3043
3044MCRegister AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
3045 unsigned &RegNum,
3046 unsigned &RegWidth,
3047 SmallVectorImpl<AsmToken> &Tokens) {
3048 assert(isToken(AsmToken::Identifier));
3049 StringRef RegName = getTokenStr();
3050 auto Loc = getLoc();
3051
3052 const RegInfo *RI = getRegularRegInfo(RegName);
3053 if (!RI) {
3054 Error(Loc, "invalid register name");
3055 return MCRegister();
3056 }
3057
3058 Tokens.push_back(getToken());
3059 lex(); // skip register name
3060
3061 RegKind = RI->Kind;
3062 StringRef RegSuffix = RegName.substr(RI->Name.size());
3063 unsigned SubReg = NoSubRegister;
3064 if (!RegSuffix.empty()) {
3065 if (RegSuffix.consume_back(".l"))
3066 SubReg = AMDGPU::lo16;
3067 else if (RegSuffix.consume_back(".h"))
3068 SubReg = AMDGPU::hi16;
3069
3070 // Single 32-bit register: vXX.
3071 if (!getRegNum(RegSuffix, RegNum)) {
3072 Error(Loc, "invalid register index");
3073 return MCRegister();
3074 }
3075 RegWidth = 32;
3076 } else {
3077 // Range of registers: v[XX:YY]. ":YY" is optional.
3078 if (!ParseRegRange(RegNum, RegWidth, SubReg))
3079 return MCRegister();
3080 }
3081
3082 return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
3083}
3084
3085MCRegister AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind,
3086 unsigned &RegNum, unsigned &RegWidth,
3087 SmallVectorImpl<AsmToken> &Tokens) {
3088 MCRegister Reg;
3089 auto ListLoc = getLoc();
3090
3091 if (!skipToken(AsmToken::LBrac,
3092 "expected a register or a list of registers")) {
3093 return MCRegister();
3094 }
3095
3096 // List of consecutive registers, e.g.: [s0,s1,s2,s3]
3097
3098 auto Loc = getLoc();
3099 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
3100 return MCRegister();
3101 if (RegWidth != 32) {
3102 Error(Loc, "expected a single 32-bit register");
3103 return MCRegister();
3104 }
3105
3106 for (; trySkipToken(AsmToken::Comma); ) {
3107 RegisterKind NextRegKind;
3108 MCRegister NextReg;
3109 unsigned NextRegNum, NextRegWidth;
3110 Loc = getLoc();
3111
3112 if (!ParseAMDGPURegister(NextRegKind, NextReg,
3113 NextRegNum, NextRegWidth,
3114 Tokens)) {
3115 return MCRegister();
3116 }
3117 if (NextRegWidth != 32) {
3118 Error(Loc, "expected a single 32-bit register");
3119 return MCRegister();
3120 }
3121 if (NextRegKind != RegKind) {
3122 Error(Loc, "registers in a list must be of the same kind");
3123 return MCRegister();
3124 }
3125 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
3126 return MCRegister();
3127 }
3128
3129 if (!skipToken(AsmToken::RBrac,
3130 "expected a comma or a closing square bracket")) {
3131 return MCRegister();
3132 }
3133
3134 if (isRegularReg(RegKind))
3135 Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
3136
3137 return Reg;
3138}
3139
3140bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3141 MCRegister &Reg, unsigned &RegNum,
3142 unsigned &RegWidth,
3143 SmallVectorImpl<AsmToken> &Tokens) {
3144 auto Loc = getLoc();
3145 Reg = MCRegister();
3146
3147 if (isToken(AsmToken::Identifier)) {
3148 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
3149 if (!Reg)
3150 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
3151 } else {
3152 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
3153 }
3154
3155 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3156 if (!Reg) {
3157 assert(Parser.hasPendingError());
3158 return false;
3159 }
3160
3161 if (!subtargetHasRegister(*TRI, Reg)) {
3162 if (Reg == AMDGPU::SGPR_NULL) {
3163 Error(Loc, "'null' operand is not supported on this GPU");
3164 } else {
3166 " register not available on this GPU");
3167 }
3168 return false;
3169 }
3170
3171 return true;
3172}
3173
3174bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind,
3175 MCRegister &Reg, unsigned &RegNum,
3176 unsigned &RegWidth,
3177 bool RestoreOnFailure /*=false*/) {
3178 Reg = MCRegister();
3179
3181 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
3182 if (RestoreOnFailure) {
3183 while (!Tokens.empty()) {
3184 getLexer().UnLex(Tokens.pop_back_val());
3185 }
3186 }
3187 return true;
3188 }
3189 return false;
3190}
3191
3192std::optional<StringRef>
3193AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
3194 switch (RegKind) {
3195 case IS_VGPR:
3196 return StringRef(".amdgcn.next_free_vgpr");
3197 case IS_SGPR:
3198 return StringRef(".amdgcn.next_free_sgpr");
3199 default:
3200 return std::nullopt;
3201 }
3202}
3203
3204void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
3205 auto SymbolName = getGprCountSymbolName(RegKind);
3206 assert(SymbolName && "initializing invalid register kind");
3207 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3209 Sym->setRedefinable(true);
3210}
3211
3212bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
3213 unsigned DwordRegIndex,
3214 unsigned RegWidth) {
3215 // Symbols are only defined for GCN targets
3216 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
3217 return true;
3218
3219 auto SymbolName = getGprCountSymbolName(RegKind);
3220 if (!SymbolName)
3221 return true;
3222 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
3223
3224 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1;
3225 int64_t OldCount;
3226
3227 if (!Sym->isVariable())
3228 return !Error(getLoc(),
3229 ".amdgcn.next_free_{v,s}gpr symbols must be variable");
3230 if (!Sym->getVariableValue()->evaluateAsAbsolute(OldCount))
3231 return !Error(
3232 getLoc(),
3233 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
3234
3235 if (OldCount <= NewMax)
3237
3238 return true;
3239}
3240
3241std::unique_ptr<AMDGPUOperand>
3242AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
3243 const auto &Tok = getToken();
3244 SMLoc StartLoc = Tok.getLoc();
3245 SMLoc EndLoc = Tok.getEndLoc();
3246 RegisterKind RegKind;
3247 MCRegister Reg;
3248 unsigned RegNum, RegWidth;
3249
3250 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
3251 return nullptr;
3252 }
3253 if (isHsaAbi(getSTI())) {
3254 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
3255 return nullptr;
3256 } else
3257 KernelScope.usesRegister(RegKind, RegNum, RegWidth);
3258 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
3259}
3260
3261ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
3262 bool HasSP3AbsModifier, LitModifier Lit) {
3263 // TODO: add syntactic sugar for 1/(2*PI)
3264
3265 if (isRegister() || isModifier())
3266 return ParseStatus::NoMatch;
3267
3268 if (Lit == LitModifier::None) {
3269 if (trySkipId("lit"))
3270 Lit = LitModifier::Lit;
3271 else if (trySkipId("lit64"))
3272 Lit = LitModifier::Lit64;
3273
3274 if (Lit != LitModifier::None) {
3275 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3276 return ParseStatus::Failure;
3277 ParseStatus S = parseImm(Operands, HasSP3AbsModifier, Lit);
3278 if (S.isSuccess() &&
3279 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3280 return ParseStatus::Failure;
3281 return S;
3282 }
3283 }
3284
3285 const auto& Tok = getToken();
3286 const auto& NextTok = peekToken();
3287 bool IsReal = Tok.is(AsmToken::Real);
3288 SMLoc S = getLoc();
3289 bool Negate = false;
3290
3291 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
3292 lex();
3293 IsReal = true;
3294 Negate = true;
3295 }
3296
3297 AMDGPUOperand::Modifiers Mods;
3298 Mods.Lit = Lit;
3299
3300 if (IsReal) {
3301 // Floating-point expressions are not supported.
3302 // Can only allow floating-point literals with an
3303 // optional sign.
3304
3305 StringRef Num = getTokenStr();
3306 lex();
3307
3308 APFloat RealVal(APFloat::IEEEdouble());
3309 auto roundMode = APFloat::rmNearestTiesToEven;
3310 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
3311 return ParseStatus::Failure;
3312 if (Negate)
3313 RealVal.changeSign();
3314
3315 Operands.push_back(
3316 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
3317 AMDGPUOperand::ImmTyNone, true));
3318 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3319 Op.setModifiers(Mods);
3320
3321 return ParseStatus::Success;
3322
3323 } else {
3324 int64_t IntVal;
3325 const MCExpr *Expr;
3326 SMLoc S = getLoc();
3327
3328 if (HasSP3AbsModifier) {
3329 // This is a workaround for handling expressions
3330 // as arguments of SP3 'abs' modifier, for example:
3331 // |1.0|
3332 // |-1|
3333 // |1+x|
3334 // This syntax is not compatible with syntax of standard
3335 // MC expressions (due to the trailing '|').
3336 SMLoc EndLoc;
3337 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
3338 return ParseStatus::Failure;
3339 } else {
3340 if (Parser.parseExpression(Expr))
3341 return ParseStatus::Failure;
3342 }
3343
3344 if (Expr->evaluateAsAbsolute(IntVal)) {
3345 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
3346 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3347 Op.setModifiers(Mods);
3348 } else {
3349 if (Lit != LitModifier::None)
3350 return ParseStatus::NoMatch;
3351 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
3352 }
3353
3354 return ParseStatus::Success;
3355 }
3356
3357 return ParseStatus::NoMatch;
3358}
3359
3360ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
3361 if (!isRegister())
3362 return ParseStatus::NoMatch;
3363
3364 if (auto R = parseRegister()) {
3365 assert(R->isReg());
3366 Operands.push_back(std::move(R));
3367 return ParseStatus::Success;
3368 }
3369 return ParseStatus::Failure;
3370}
3371
3372ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
3373 bool HasSP3AbsMod, LitModifier Lit) {
3374 ParseStatus Res = parseReg(Operands);
3375 if (!Res.isNoMatch())
3376 return Res;
3377 if (isModifier())
3378 return ParseStatus::NoMatch;
3379 return parseImm(Operands, HasSP3AbsMod, Lit);
3380}
3381
3382bool
3383AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3384 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
3385 const auto &str = Token.getString();
3386 return str == "abs" || str == "neg" || str == "sext";
3387 }
3388 return false;
3389}
3390
3391bool
3392AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
3393 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
3394}
3395
3396bool
3397AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3398 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
3399}
3400
3401bool
3402AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
3403 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
3404}
3405
3406// Check if this is an operand modifier or an opcode modifier
3407// which may look like an expression but it is not. We should
3408// avoid parsing these modifiers as expressions. Currently
3409// recognized sequences are:
3410// |...|
3411// abs(...)
3412// neg(...)
3413// sext(...)
3414// -reg
3415// -|...|
3416// -abs(...)
3417// name:...
3418//
3419bool
3420AMDGPUAsmParser::isModifier() {
3421
3422 AsmToken Tok = getToken();
3423 AsmToken NextToken[2];
3424 peekTokens(NextToken);
3425
3426 return isOperandModifier(Tok, NextToken[0]) ||
3427 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
3428 isOpcodeModifierWithVal(Tok, NextToken[0]);
3429}
3430
3431// Check if the current token is an SP3 'neg' modifier.
3432// Currently this modifier is allowed in the following context:
3433//
3434// 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
3435// 2. Before an 'abs' modifier: -abs(...)
3436// 3. Before an SP3 'abs' modifier: -|...|
3437//
3438// In all other cases "-" is handled as a part
3439// of an expression that follows the sign.
3440//
3441// Note: When "-" is followed by an integer literal,
3442// this is interpreted as integer negation rather
3443// than a floating-point NEG modifier applied to N.
3444// Beside being contr-intuitive, such use of floating-point
3445// NEG modifier would have resulted in different meaning
3446// of integer literals used with VOP1/2/C and VOP3,
3447// for example:
3448// v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
3449// v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
3450// Negative fp literals with preceding "-" are
3451// handled likewise for uniformity
3452//
3453bool
3454AMDGPUAsmParser::parseSP3NegModifier() {
3455
3456 AsmToken NextToken[2];
3457 peekTokens(NextToken);
3458
3459 if (isToken(AsmToken::Minus) &&
3460 (isRegister(NextToken[0], NextToken[1]) ||
3461 NextToken[0].is(AsmToken::Pipe) ||
3462 isId(NextToken[0], "abs"))) {
3463 lex();
3464 return true;
3465 }
3466
3467 return false;
3468}
3469
3470ParseStatus
3471AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
3472 bool AllowImm) {
3473 bool Neg, SP3Neg;
3474 bool Abs, SP3Abs;
3475 SMLoc Loc;
3476
3477 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
3478 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
3479 return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
3480
3481 SP3Neg = parseSP3NegModifier();
3482
3483 Loc = getLoc();
3484 Neg = trySkipId("neg");
3485 if (Neg && SP3Neg)
3486 return Error(Loc, "expected register or immediate");
3487 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
3488 return ParseStatus::Failure;
3489
3490 Abs = trySkipId("abs");
3491 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
3492 return ParseStatus::Failure;
3493
3494 LitModifier Lit = LitModifier::None;
3495 if (trySkipId("lit")) {
3496 Lit = LitModifier::Lit;
3497 if (!skipToken(AsmToken::LParen, "expected left paren after lit"))
3498 return ParseStatus::Failure;
3499 } else if (trySkipId("lit64")) {
3500 Lit = LitModifier::Lit64;
3501 if (!skipToken(AsmToken::LParen, "expected left paren after lit64"))
3502 return ParseStatus::Failure;
3503 if (!has64BitLiterals())
3504 return Error(Loc, "lit64 is not supported on this GPU");
3505 }
3506
3507 Loc = getLoc();
3508 SP3Abs = trySkipToken(AsmToken::Pipe);
3509 if (Abs && SP3Abs)
3510 return Error(Loc, "expected register or immediate");
3511
3512 ParseStatus Res;
3513 if (AllowImm) {
3514 Res = parseRegOrImm(Operands, SP3Abs, Lit);
3515 } else {
3516 Res = parseReg(Operands);
3517 }
3518 if (!Res.isSuccess())
3519 return (SP3Neg || Neg || SP3Abs || Abs || Lit != LitModifier::None)
3521 : Res;
3522
3523 if (Lit != LitModifier::None && !Operands.back()->isImm())
3524 Error(Loc, "expected immediate with lit modifier");
3525
3526 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
3527 return ParseStatus::Failure;
3528 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3529 return ParseStatus::Failure;
3530 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3531 return ParseStatus::Failure;
3532 if (Lit != LitModifier::None &&
3533 !skipToken(AsmToken::RParen, "expected closing parentheses"))
3534 return ParseStatus::Failure;
3535
3536 AMDGPUOperand::Modifiers Mods;
3537 Mods.Abs = Abs || SP3Abs;
3538 Mods.Neg = Neg || SP3Neg;
3539 Mods.Lit = Lit;
3540
3541 if (Mods.hasFPModifiers() || Lit != LitModifier::None) {
3542 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3543 if (Op.isExpr())
3544 return Error(Op.getStartLoc(), "expected an absolute expression");
3545 Op.setModifiers(Mods);
3546 }
3547 return ParseStatus::Success;
3548}
3549
3550ParseStatus
3551AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
3552 bool AllowImm) {
3553 bool Sext = trySkipId("sext");
3554 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
3555 return ParseStatus::Failure;
3556
3557 ParseStatus Res;
3558 if (AllowImm) {
3559 Res = parseRegOrImm(Operands);
3560 } else {
3561 Res = parseReg(Operands);
3562 }
3563 if (!Res.isSuccess())
3564 return Sext ? ParseStatus::Failure : Res;
3565
3566 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
3567 return ParseStatus::Failure;
3568
3569 AMDGPUOperand::Modifiers Mods;
3570 Mods.Sext = Sext;
3571
3572 if (Mods.hasIntModifiers()) {
3573 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
3574 if (Op.isExpr())
3575 return Error(Op.getStartLoc(), "expected an absolute expression");
3576 Op.setModifiers(Mods);
3577 }
3578
3579 return ParseStatus::Success;
3580}
3581
3582ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
3583 return parseRegOrImmWithFPInputMods(Operands, false);
3584}
3585
3586ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
3587 return parseRegOrImmWithIntInputMods(Operands, false);
3588}
3589
3590ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
3591 auto Loc = getLoc();
3592 if (trySkipId("off")) {
3593 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
3594 AMDGPUOperand::ImmTyOff, false));
3595 return ParseStatus::Success;
3596 }
3597
3598 if (!isRegister())
3599 return ParseStatus::NoMatch;
3600
3601 std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
3602 if (Reg) {
3603 Operands.push_back(std::move(Reg));
3604 return ParseStatus::Success;
3605 }
3606
3607 return ParseStatus::Failure;
3608}
3609
3610unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
3611 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
3612
3613 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
3614 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
3615 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
3616 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
3617 return Match_InvalidOperand;
3618
3619 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
3620 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
3621 // v_mac_f32/16 allow only dst_sel == DWORD;
3622 auto OpNum =
3623 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
3624 const auto &Op = Inst.getOperand(OpNum);
3625 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
3626 return Match_InvalidOperand;
3627 }
3628 }
3629
3630 // Asm can first try to match VOPD or VOPD3. By failing early here with
3631 // Match_InvalidOperand, the parser will retry parsing as VOPD3 or VOPD.
3632 // Checking later during validateInstruction does not give a chance to retry
3633 // parsing as a different encoding.
3634 if (tryAnotherVOPDEncoding(Inst))
3635 return Match_InvalidOperand;
3636
3637 return Match_Success;
3638}
3639
3649
3650// What asm variants we should check
3651ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
3652 if (isForcedDPP() && isForcedVOP3()) {
3653 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3_DPP};
3654 return ArrayRef(Variants);
3655 }
3656 if (getForcedEncodingSize() == 32) {
3657 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
3658 return ArrayRef(Variants);
3659 }
3660
3661 if (isForcedVOP3()) {
3662 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
3663 return ArrayRef(Variants);
3664 }
3665
3666 if (isForcedSDWA()) {
3667 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
3669 return ArrayRef(Variants);
3670 }
3671
3672 if (isForcedDPP()) {
3673 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
3674 return ArrayRef(Variants);
3675 }
3676
3677 return getAllVariants();
3678}
3679
3680StringRef AMDGPUAsmParser::getMatchedVariantName() const {
3681 if (isForcedDPP() && isForcedVOP3())
3682 return "e64_dpp";
3683
3684 if (getForcedEncodingSize() == 32)
3685 return "e32";
3686
3687 if (isForcedVOP3())
3688 return "e64";
3689
3690 if (isForcedSDWA())
3691 return "sdwa";
3692
3693 if (isForcedDPP())
3694 return "dpp";
3695
3696 return "";
3697}
3698
3699MCRegister
3700AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
3701 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3702 for (MCPhysReg Reg : Desc.implicit_uses()) {
3703 switch (Reg) {
3704 case AMDGPU::FLAT_SCR:
3705 case AMDGPU::VCC:
3706 case AMDGPU::VCC_LO:
3707 case AMDGPU::VCC_HI:
3708 case AMDGPU::M0:
3709 return Reg;
3710 default:
3711 break;
3712 }
3713 }
3714 return MCRegister();
3715}
3716
3717// NB: This code is correct only when used to check constant
3718// bus limitations because GFX7 support no f16 inline constants.
3719// Note that there are no cases when a GFX7 opcode violates
3720// constant bus limitations due to the use of an f16 constant.
3721bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
3722 unsigned OpIdx) const {
3723 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
3724
3727 return false;
3728 }
3729
3730 const MCOperand &MO = Inst.getOperand(OpIdx);
3731
3732 int64_t Val = MO.isImm() ? MO.getImm() : getLitValue(MO.getExpr());
3733 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
3734
3735 switch (OpSize) { // expected operand size
3736 case 8:
3737 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
3738 case 4:
3739 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
3740 case 2: {
3741 const unsigned OperandType = Desc.operands()[OpIdx].OperandType;
3744 return AMDGPU::isInlinableLiteralI16(Val, hasInv2PiInlineImm());
3745
3749
3753
3756
3760
3763 return AMDGPU::isInlinableLiteralFP16(Val, hasInv2PiInlineImm());
3764
3767 return AMDGPU::isInlinableLiteralBF16(Val, hasInv2PiInlineImm());
3768
3770 return false;
3771
3772 llvm_unreachable("invalid operand type");
3773 }
3774 default:
3775 llvm_unreachable("invalid operand size");
3776 }
3777}
3778
3779unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
3780 if (!isGFX10Plus())
3781 return 1;
3782
3783 switch (Opcode) {
3784 // 64-bit shift instructions can use only one scalar value input
3785 case AMDGPU::V_LSHLREV_B64_e64:
3786 case AMDGPU::V_LSHLREV_B64_gfx10:
3787 case AMDGPU::V_LSHLREV_B64_e64_gfx11:
3788 case AMDGPU::V_LSHLREV_B64_e32_gfx12:
3789 case AMDGPU::V_LSHLREV_B64_e64_gfx12:
3790 case AMDGPU::V_LSHRREV_B64_e64:
3791 case AMDGPU::V_LSHRREV_B64_gfx10:
3792 case AMDGPU::V_LSHRREV_B64_e64_gfx11:
3793 case AMDGPU::V_LSHRREV_B64_e64_gfx12:
3794 case AMDGPU::V_ASHRREV_I64_e64:
3795 case AMDGPU::V_ASHRREV_I64_gfx10:
3796 case AMDGPU::V_ASHRREV_I64_e64_gfx11:
3797 case AMDGPU::V_ASHRREV_I64_e64_gfx12:
3798 case AMDGPU::V_LSHL_B64_e64:
3799 case AMDGPU::V_LSHR_B64_e64:
3800 case AMDGPU::V_ASHR_I64_e64:
3801 return 1;
3802 default:
3803 return 2;
3804 }
3805}
3806
3807constexpr unsigned MAX_SRC_OPERANDS_NUM = 6;
3809
3810// Get regular operand indices in the same order as specified
3811// in the instruction (but append mandatory literals to the end).
3813 bool AddMandatoryLiterals = false) {
3814
3815 int16_t ImmIdx =
3816 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::imm) : -1;
3817
3818 if (isVOPD(Opcode)) {
3819 int16_t ImmXIdx =
3820 AddMandatoryLiterals ? getNamedOperandIdx(Opcode, OpName::immX) : -1;
3821
3822 return {getNamedOperandIdx(Opcode, OpName::src0X),
3823 getNamedOperandIdx(Opcode, OpName::vsrc1X),
3824 getNamedOperandIdx(Opcode, OpName::vsrc2X),
3825 getNamedOperandIdx(Opcode, OpName::src0Y),
3826 getNamedOperandIdx(Opcode, OpName::vsrc1Y),
3827 getNamedOperandIdx(Opcode, OpName::vsrc2Y),
3828 ImmXIdx,
3829 ImmIdx};
3830 }
3831
3832 return {getNamedOperandIdx(Opcode, OpName::src0),
3833 getNamedOperandIdx(Opcode, OpName::src1),
3834 getNamedOperandIdx(Opcode, OpName::src2), ImmIdx};
3835}
3836
3837bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
3838 const MCOperand &MO = Inst.getOperand(OpIdx);
3839 if (MO.isImm())
3840 return !isInlineConstant(Inst, OpIdx);
3841 if (MO.isReg()) {
3842 auto Reg = MO.getReg();
3843 if (!Reg)
3844 return false;
3845 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3846 auto PReg = mc2PseudoReg(Reg);
3847 return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
3848 }
3849 return true;
3850}
3851
3852// Based on the comment for `AMDGPUInstructionSelector::selectWritelane`:
3853// Writelane is special in that it can use SGPR and M0 (which would normally
3854// count as using the constant bus twice - but in this case it is allowed since
3855// the lane selector doesn't count as a use of the constant bus). However, it is
3856// still required to abide by the 1 SGPR rule.
3857static bool checkWriteLane(const MCInst &Inst) {
3858 const unsigned Opcode = Inst.getOpcode();
3859 if (Opcode != V_WRITELANE_B32_gfx6_gfx7 && Opcode != V_WRITELANE_B32_vi)
3860 return false;
3861 const MCOperand &LaneSelOp = Inst.getOperand(2);
3862 if (!LaneSelOp.isReg())
3863 return false;
3864 auto LaneSelReg = mc2PseudoReg(LaneSelOp.getReg());
3865 return LaneSelReg == M0 || LaneSelReg == M0_gfxpre11;
3866}
3867
3868bool AMDGPUAsmParser::validateConstantBusLimitations(
3869 const MCInst &Inst, const OperandVector &Operands) {
3870 const unsigned Opcode = Inst.getOpcode();
3871 const MCInstrDesc &Desc = MII.get(Opcode);
3872 MCRegister LastSGPR;
3873 unsigned ConstantBusUseCount = 0;
3874 unsigned NumLiterals = 0;
3875 unsigned LiteralSize;
3876
3877 if (!(Desc.TSFlags &
3880 !isVOPD(Opcode))
3881 return true;
3882
3883 if (checkWriteLane(Inst))
3884 return true;
3885
3886 // Check special imm operands (used by madmk, etc)
3887 if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::imm)) {
3888 ++NumLiterals;
3889 LiteralSize = 4;
3890 }
3891
3892 SmallDenseSet<MCRegister> SGPRsUsed;
3893 MCRegister SGPRUsed = findImplicitSGPRReadInVOP(Inst);
3894 if (SGPRUsed) {
3895 SGPRsUsed.insert(SGPRUsed);
3896 ++ConstantBusUseCount;
3897 }
3898
3899 OperandIndices OpIndices = getSrcOperandIndices(Opcode);
3900
3901 unsigned ConstantBusLimit = getConstantBusLimit(Opcode);
3902
3903 for (int OpIdx : OpIndices) {
3904 if (OpIdx == -1)
3905 continue;
3906
3907 const MCOperand &MO = Inst.getOperand(OpIdx);
3908 if (usesConstantBus(Inst, OpIdx)) {
3909 if (MO.isReg()) {
3910 LastSGPR = mc2PseudoReg(MO.getReg());
3911 // Pairs of registers with a partial intersections like these
3912 // s0, s[0:1]
3913 // flat_scratch_lo, flat_scratch
3914 // flat_scratch_lo, flat_scratch_hi
3915 // are theoretically valid but they are disabled anyway.
3916 // Note that this code mimics SIInstrInfo::verifyInstruction
3917 if (SGPRsUsed.insert(LastSGPR).second) {
3918 ++ConstantBusUseCount;
3919 }
3920 } else { // Expression or a literal
3921
3922 if (Desc.operands()[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
3923 continue; // special operand like VINTERP attr_chan
3924
3925 // An instruction may use only one literal.
3926 // This has been validated on the previous step.
3927 // See validateVOPLiteral.
3928 // This literal may be used as more than one operand.
3929 // If all these operands are of the same size,
3930 // this literal counts as one scalar value.
3931 // Otherwise it counts as 2 scalar values.
3932 // See "GFX10 Shader Programming", section 3.6.2.3.
3933
3935 if (Size < 4)
3936 Size = 4;
3937
3938 if (NumLiterals == 0) {
3939 NumLiterals = 1;
3940 LiteralSize = Size;
3941 } else if (LiteralSize != Size) {
3942 NumLiterals = 2;
3943 }
3944 }
3945 }
3946
3947 if (ConstantBusUseCount + NumLiterals > ConstantBusLimit) {
3948 Error(getOperandLoc(Operands, OpIdx),
3949 "invalid operand (violates constant bus restrictions)");
3950 return false;
3951 }
3952 }
3953 return true;
3954}
3955
3956std::optional<unsigned>
3957AMDGPUAsmParser::checkVOPDRegBankConstraints(const MCInst &Inst, bool AsVOPD3) {
3958
3959 const unsigned Opcode = Inst.getOpcode();
3960 if (!isVOPD(Opcode))
3961 return {};
3962
3963 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
3964
3965 auto getVRegIdx = [&](unsigned, unsigned OperandIdx) {
3966 const MCOperand &Opr = Inst.getOperand(OperandIdx);
3967 return (Opr.isReg() && !isSGPR(mc2PseudoReg(Opr.getReg()), TRI))
3968 ? Opr.getReg()
3969 : MCRegister();
3970 };
3971
3972 // On GFX12+ if both OpX and OpY are V_MOV_B32 then OPY uses SRC2
3973 // source-cache.
3974 bool SkipSrc =
3975 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx12 ||
3976 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx1250 ||
3977 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_gfx13 ||
3978 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx1250 ||
3979 Opcode == AMDGPU::V_DUAL_MOV_B32_e32_X_MOV_B32_e32_e96_gfx13;
3980 bool AllowSameVGPR = isGFX1250Plus();
3981
3982 if (AsVOPD3) { // Literal constants are not allowed with VOPD3.
3983 for (auto OpName : {OpName::src0X, OpName::src0Y}) {
3984 int I = getNamedOperandIdx(Opcode, OpName);
3985 const MCOperand &Op = Inst.getOperand(I);
3986 if (!Op.isImm())
3987 continue;
3988 int64_t Imm = Op.getImm();
3989 if (!AMDGPU::isInlinableLiteral32(Imm, hasInv2PiInlineImm()) &&
3990 !AMDGPU::isInlinableLiteral64(Imm, hasInv2PiInlineImm()))
3991 return (unsigned)I;
3992 }
3993
3994 for (auto OpName : {OpName::vsrc1X, OpName::vsrc1Y, OpName::vsrc2X,
3995 OpName::vsrc2Y, OpName::imm}) {
3996 int I = getNamedOperandIdx(Opcode, OpName);
3997 if (I == -1)
3998 continue;
3999 const MCOperand &Op = Inst.getOperand(I);
4000 if (Op.isImm())
4001 return (unsigned)I;
4002 }
4003 }
4004
4005 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4006 auto InvalidCompOprIdx = InstInfo.getInvalidCompOperandIndex(
4007 getVRegIdx, *TRI, SkipSrc, AllowSameVGPR, AsVOPD3);
4008
4009 return InvalidCompOprIdx;
4010}
4011
4012bool AMDGPUAsmParser::validateVOPD(const MCInst &Inst,
4013 const OperandVector &Operands) {
4014
4015 unsigned Opcode = Inst.getOpcode();
4016 bool AsVOPD3 = MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3;
4017
4018 if (AsVOPD3) {
4019 for (const std::unique_ptr<MCParsedAsmOperand> &Operand : Operands) {
4020 AMDGPUOperand &Op = (AMDGPUOperand &)*Operand;
4021 if ((Op.isRegKind() || Op.isImmTy(AMDGPUOperand::ImmTyNone)) &&
4022 (Op.getModifiers().getFPModifiersOperand() & SISrcMods::ABS))
4023 Error(Op.getStartLoc(), "ABS not allowed in VOPD3 instructions");
4024 }
4025 }
4026
4027 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, AsVOPD3);
4028 if (!InvalidCompOprIdx.has_value())
4029 return true;
4030
4031 auto CompOprIdx = *InvalidCompOprIdx;
4032 const auto &InstInfo = getVOPDInstInfo(Opcode, &MII);
4033 auto ParsedIdx =
4034 std::max(InstInfo[VOPD::X].getIndexInParsedOperands(CompOprIdx),
4035 InstInfo[VOPD::Y].getIndexInParsedOperands(CompOprIdx));
4036 assert(ParsedIdx > 0 && ParsedIdx < Operands.size());
4037
4038 auto Loc = ((AMDGPUOperand &)*Operands[ParsedIdx]).getStartLoc();
4039 if (CompOprIdx == VOPD::Component::DST) {
4040 if (AsVOPD3)
4041 Error(Loc, "dst registers must be distinct");
4042 else
4043 Error(Loc, "one dst register must be even and the other odd");
4044 } else {
4045 auto CompSrcIdx = CompOprIdx - VOPD::Component::DST_NUM;
4046 Error(Loc, Twine("src") + Twine(CompSrcIdx) +
4047 " operands must use different VGPR banks");
4048 }
4049
4050 return false;
4051}
4052
4053// \returns true if \p Inst does not satisfy VOPD constraints, but can be
4054// potentially used as VOPD3 with the same operands.
4055bool AMDGPUAsmParser::tryVOPD3(const MCInst &Inst) {
4056 // First check if it fits VOPD
4057 auto InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, false);
4058 if (!InvalidCompOprIdx.has_value())
4059 return false;
4060
4061 // Then if it fits VOPD3
4062 InvalidCompOprIdx = checkVOPDRegBankConstraints(Inst, true);
4063 if (InvalidCompOprIdx.has_value()) {
4064 // If failed operand is dst it is better to show error about VOPD3
4065 // instruction as it has more capabilities and error message will be
4066 // more informative. If the dst is not legal for VOPD3, then it is not
4067 // legal for VOPD either.
4068 if (*InvalidCompOprIdx == VOPD::Component::DST)
4069 return true;
4070
4071 // Otherwise prefer VOPD as we may find ourselves in an awkward situation
4072 // with a conflict in tied implicit src2 of fmac and no asm operand to
4073 // to point to.
4074 return false;
4075 }
4076 return true;
4077}
4078
4079// \returns true is a VOPD3 instruction can be also represented as a shorter
4080// VOPD encoding.
4081bool AMDGPUAsmParser::tryVOPD(const MCInst &Inst) {
4082 const unsigned Opcode = Inst.getOpcode();
4083 const auto &II = getVOPDInstInfo(Opcode, &MII);
4084 unsigned EncodingFamily = AMDGPU::getVOPDEncodingFamily(getSTI());
4085 if (!getCanBeVOPD(II[VOPD::X].getOpcode(), EncodingFamily, false).X ||
4086 !getCanBeVOPD(II[VOPD::Y].getOpcode(), EncodingFamily, false).Y)
4087 return false;
4088
4089 // This is an awkward exception, VOPD3 variant of V_DUAL_CNDMASK_B32 has
4090 // explicit src2 even if it is vcc_lo. If it was parsed as VOPD3 it cannot
4091 // be parsed as VOPD which does not accept src2.
4092 if (II[VOPD::X].getOpcode() == AMDGPU::V_CNDMASK_B32_e32 ||
4093 II[VOPD::Y].getOpcode() == AMDGPU::V_CNDMASK_B32_e32)
4094 return false;
4095
4096 // If any modifiers are set this cannot be VOPD.
4097 for (auto OpName : {OpName::src0X_modifiers, OpName::src0Y_modifiers,
4098 OpName::vsrc1X_modifiers, OpName::vsrc1Y_modifiers,
4099 OpName::vsrc2X_modifiers, OpName::vsrc2Y_modifiers}) {
4100 int I = getNamedOperandIdx(Opcode, OpName);
4101 if (I == -1)
4102 continue;
4103 if (Inst.getOperand(I).getImm())
4104 return false;
4105 }
4106
4107 return !tryVOPD3(Inst);
4108}
4109
4110// VOPD3 has more relaxed register constraints than VOPD. We prefer shorter VOPD
4111// form but switch to VOPD3 otherwise.
4112bool AMDGPUAsmParser::tryAnotherVOPDEncoding(const MCInst &Inst) {
4113 const unsigned Opcode = Inst.getOpcode();
4114 if (!isGFX1250Plus() || !isVOPD(Opcode))
4115 return false;
4116
4117 if (MII.get(Opcode).TSFlags & SIInstrFlags::VOPD3)
4118 return tryVOPD(Inst);
4119 return tryVOPD3(Inst);
4120}
4121
4122bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
4123
4124 const unsigned Opc = Inst.getOpcode();
4125 const MCInstrDesc &Desc = MII.get(Opc);
4126
4127 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
4128 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
4129 assert(ClampIdx != -1);
4130 return Inst.getOperand(ClampIdx).getImm() == 0;
4131 }
4132
4133 return true;
4134}
4135
4138
4139bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst, SMLoc IDLoc) {
4140
4141 const unsigned Opc = Inst.getOpcode();
4142 const MCInstrDesc &Desc = MII.get(Opc);
4143
4144 if ((Desc.TSFlags & MIMGFlags) == 0)
4145 return true;
4146
4147 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
4148 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4149 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
4150
4151 if (VDataIdx == -1 && isGFX10Plus()) // no return image_sample
4152 return true;
4153
4154 if ((DMaskIdx == -1 || TFEIdx == -1) && isGFX10_AEncoding()) // intersect_ray
4155 return true;
4156
4157 unsigned VDataSize = getRegOperandSize(Desc, VDataIdx);
4158 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
4159 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4160 if (DMask == 0)
4161 DMask = 1;
4162
4163 bool IsPackedD16 = false;
4164 unsigned DataSize =
4165 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : llvm::popcount(DMask);
4166 if (hasPackedD16()) {
4167 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4168 IsPackedD16 = D16Idx >= 0;
4169 if (IsPackedD16 && Inst.getOperand(D16Idx).getImm())
4170 DataSize = (DataSize + 1) / 2;
4171 }
4172
4173 if ((VDataSize / 4) == DataSize + TFESize)
4174 return true;
4175
4176 StringRef Modifiers;
4177 if (isGFX90A())
4178 Modifiers = IsPackedD16 ? "dmask and d16" : "dmask";
4179 else
4180 Modifiers = IsPackedD16 ? "dmask, d16 and tfe" : "dmask and tfe";
4181
4182 Error(IDLoc, Twine("image data size does not match ") + Modifiers);
4183 return false;
4184}
4185
4186bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst, SMLoc IDLoc) {
4187 const unsigned Opc = Inst.getOpcode();
4188 const MCInstrDesc &Desc = MII.get(Opc);
4189
4190 if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
4191 return true;
4192
4193 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4194
4195 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4197 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
4198 AMDGPU::OpName RSrcOpName = (Desc.TSFlags & SIInstrFlags::MIMG)
4199 ? AMDGPU::OpName::srsrc
4200 : AMDGPU::OpName::rsrc;
4201 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
4202 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4203 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
4204
4205 assert(VAddr0Idx != -1);
4206 assert(SrsrcIdx != -1);
4207 assert(SrsrcIdx > VAddr0Idx);
4208
4209 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
4210 if (BaseOpcode->BVH) {
4211 if (IsA16 == BaseOpcode->A16)
4212 return true;
4213 Error(IDLoc, "image address size does not match a16");
4214 return false;
4215 }
4216
4217 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4218 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4219 bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
4220 unsigned ActualAddrSize =
4221 IsNSA ? SrsrcIdx - VAddr0Idx : getRegOperandSize(Desc, VAddr0Idx) / 4;
4222
4223 unsigned ExpectedAddrSize =
4224 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
4225
4226 if (IsNSA) {
4227 if (hasPartialNSAEncoding() &&
4228 ExpectedAddrSize >
4230 int VAddrLastIdx = SrsrcIdx - 1;
4231 unsigned VAddrLastSize = getRegOperandSize(Desc, VAddrLastIdx) / 4;
4232
4233 ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
4234 }
4235 } else {
4236 if (ExpectedAddrSize > 12)
4237 ExpectedAddrSize = 16;
4238
4239 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required.
4240 // This provides backward compatibility for assembly created
4241 // before 160b/192b/224b types were directly supported.
4242 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7))
4243 return true;
4244 }
4245
4246 if (ActualAddrSize == ExpectedAddrSize)
4247 return true;
4248
4249 Error(IDLoc, "image address size does not match dim and a16");
4250 return false;
4251}
4252
4253bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
4254
4255 const unsigned Opc = Inst.getOpcode();
4256 const MCInstrDesc &Desc = MII.get(Opc);
4257
4258 if ((Desc.TSFlags & MIMGFlags) == 0)
4259 return true;
4260 if (!Desc.mayLoad() || !Desc.mayStore())
4261 return true; // Not atomic
4262
4263 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4264 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4265
4266 // This is an incomplete check because image_atomic_cmpswap
4267 // may only use 0x3 and 0xf while other atomic operations
4268 // may use 0x1 and 0x3. However these limitations are
4269 // verified when we check that dmask matches dst size.
4270 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
4271}
4272
4273bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
4274
4275 const unsigned Opc = Inst.getOpcode();
4276 const MCInstrDesc &Desc = MII.get(Opc);
4277
4278 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
4279 return true;
4280
4281 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
4282 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
4283
4284 // GATHER4 instructions use dmask in a different fashion compared to
4285 // other MIMG instructions. The only useful DMASK values are
4286 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4287 // (red,red,red,red) etc.) The ISA document doesn't mention
4288 // this.
4289 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
4290}
4291
4292bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst,
4293 const OperandVector &Operands) {
4294 if (!isGFX10Plus())
4295 return true;
4296
4297 const unsigned Opc = Inst.getOpcode();
4298 const MCInstrDesc &Desc = MII.get(Opc);
4299
4300 if ((Desc.TSFlags & MIMGFlags) == 0)
4301 return true;
4302
4303 // image_bvh_intersect_ray instructions do not have dim
4305 return true;
4306
4307 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4308 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4309 if (Op.isDim())
4310 return true;
4311 }
4312 return false;
4313}
4314
4315bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
4316 const unsigned Opc = Inst.getOpcode();
4317 const MCInstrDesc &Desc = MII.get(Opc);
4318
4319 if ((Desc.TSFlags & MIMGFlags) == 0)
4320 return true;
4321
4322 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
4323 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
4325
4326 if (!BaseOpcode->MSAA)
4327 return true;
4328
4329 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
4330 assert(DimIdx != -1);
4331
4332 unsigned Dim = Inst.getOperand(DimIdx).getImm();
4333 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
4334
4335 return DimInfo->MSAA;
4336}
4337
4338static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
4339{
4340 switch (Opcode) {
4341 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
4342 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
4343 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
4344 return true;
4345 default:
4346 return false;
4347 }
4348}
4349
4350// movrels* opcodes should only allow VGPRS as src0.
4351// This is specified in .td description for vop1/vop3,
4352// but sdwa is handled differently. See isSDWAOperand.
4353bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
4354 const OperandVector &Operands) {
4355
4356 const unsigned Opc = Inst.getOpcode();
4357 const MCInstrDesc &Desc = MII.get(Opc);
4358
4359 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
4360 return true;
4361
4362 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4363 assert(Src0Idx != -1);
4364
4365 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4366 if (Src0.isReg()) {
4367 auto Reg = mc2PseudoReg(Src0.getReg());
4368 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4369 if (!isSGPR(Reg, TRI))
4370 return true;
4371 }
4372
4373 Error(getOperandLoc(Operands, Src0Idx), "source operand must be a VGPR");
4374 return false;
4375}
4376
4377bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
4378 const OperandVector &Operands) {
4379
4380 const unsigned Opc = Inst.getOpcode();
4381
4382 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
4383 return true;
4384
4385 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4386 assert(Src0Idx != -1);
4387
4388 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4389 if (!Src0.isReg())
4390 return true;
4391
4392 auto Reg = mc2PseudoReg(Src0.getReg());
4393 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4394 if (!isGFX90A() && isSGPR(Reg, TRI)) {
4395 Error(getOperandLoc(Operands, Src0Idx),
4396 "source operand must be either a VGPR or an inline constant");
4397 return false;
4398 }
4399
4400 return true;
4401}
4402
4403bool AMDGPUAsmParser::validateMAISrc2(const MCInst &Inst,
4404 const OperandVector &Operands) {
4405 unsigned Opcode = Inst.getOpcode();
4406 const MCInstrDesc &Desc = MII.get(Opcode);
4407
4408 if (!(Desc.TSFlags & SIInstrFlags::IsMAI) ||
4409 !getFeatureBits()[FeatureMFMAInlineLiteralBug])
4410 return true;
4411
4412 const int Src2Idx = getNamedOperandIdx(Opcode, OpName::src2);
4413 if (Src2Idx == -1)
4414 return true;
4415
4416 if (Inst.getOperand(Src2Idx).isImm() && isInlineConstant(Inst, Src2Idx)) {
4417 Error(getOperandLoc(Operands, Src2Idx),
4418 "inline constants are not allowed for this operand");
4419 return false;
4420 }
4421
4422 return true;
4423}
4424
4425bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
4426 const OperandVector &Operands) {
4427 const unsigned Opc = Inst.getOpcode();
4428 const MCInstrDesc &Desc = MII.get(Opc);
4429
4430 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
4431 return true;
4432
4433 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
4434 if (BlgpIdx != -1) {
4435 if (const MFMA_F8F6F4_Info *Info = AMDGPU::isMFMA_F8F6F4(Opc)) {
4436 int CbszIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
4437
4438 unsigned CBSZ = Inst.getOperand(CbszIdx).getImm();
4439 unsigned BLGP = Inst.getOperand(BlgpIdx).getImm();
4440
4441 // Validate the correct register size was used for the floating point
4442 // format operands
4443
4444 bool Success = true;
4445 if (Info->NumRegsSrcA != mfmaScaleF8F6F4FormatToNumRegs(CBSZ)) {
4446 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4447 Error(getOperandLoc(Operands, Src0Idx),
4448 "wrong register tuple size for cbsz value " + Twine(CBSZ));
4449 Success = false;
4450 }
4451
4452 if (Info->NumRegsSrcB != mfmaScaleF8F6F4FormatToNumRegs(BLGP)) {
4453 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4454 Error(getOperandLoc(Operands, Src1Idx),
4455 "wrong register tuple size for blgp value " + Twine(BLGP));
4456 Success = false;
4457 }
4458
4459 return Success;
4460 }
4461 }
4462
4463 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4464 if (Src2Idx == -1)
4465 return true;
4466
4467 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4468 if (!Src2.isReg())
4469 return true;
4470
4471 MCRegister Src2Reg = Src2.getReg();
4472 MCRegister DstReg = Inst.getOperand(0).getReg();
4473 if (Src2Reg == DstReg)
4474 return true;
4475
4476 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4477 if (TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[0], HwMode))
4478 .getSizeInBits() <= 128)
4479 return true;
4480
4481 if (TRI->regsOverlap(Src2Reg, DstReg)) {
4482 Error(getOperandLoc(Operands, Src2Idx),
4483 "source 2 operand must not partially overlap with dst");
4484 return false;
4485 }
4486
4487 return true;
4488}
4489
4490bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
4491 switch (Inst.getOpcode()) {
4492 default:
4493 return true;
4494 case V_DIV_SCALE_F32_gfx6_gfx7:
4495 case V_DIV_SCALE_F32_vi:
4496 case V_DIV_SCALE_F32_gfx10:
4497 case V_DIV_SCALE_F64_gfx6_gfx7:
4498 case V_DIV_SCALE_F64_vi:
4499 case V_DIV_SCALE_F64_gfx10:
4500 break;
4501 }
4502
4503 // TODO: Check that src0 = src1 or src2.
4504
4505 for (auto Name : {AMDGPU::OpName::src0_modifiers,
4506 AMDGPU::OpName::src2_modifiers,
4507 AMDGPU::OpName::src2_modifiers}) {
4508 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
4509 .getImm() &
4511 return false;
4512 }
4513 }
4514
4515 return true;
4516}
4517
4518bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
4519
4520 const unsigned Opc = Inst.getOpcode();
4521 const MCInstrDesc &Desc = MII.get(Opc);
4522
4523 if ((Desc.TSFlags & MIMGFlags) == 0)
4524 return true;
4525
4526 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
4527 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
4528 if (isCI() || isSI())
4529 return false;
4530 }
4531
4532 return true;
4533}
4534
4535bool AMDGPUAsmParser::validateTensorR128(const MCInst &Inst) {
4536 const unsigned Opc = Inst.getOpcode();
4537 const MCInstrDesc &Desc = MII.get(Opc);
4538
4539 if ((Desc.TSFlags & SIInstrFlags::TENSOR_CNT) == 0)
4540 return true;
4541
4542 int R128Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::r128);
4543
4544 return R128Idx < 0 || !Inst.getOperand(R128Idx).getImm();
4545}
4546
4547static bool IsRevOpcode(const unsigned Opcode)
4548{
4549 switch (Opcode) {
4550 case AMDGPU::V_SUBREV_F32_e32:
4551 case AMDGPU::V_SUBREV_F32_e64:
4552 case AMDGPU::V_SUBREV_F32_e32_gfx10:
4553 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
4554 case AMDGPU::V_SUBREV_F32_e32_vi:
4555 case AMDGPU::V_SUBREV_F32_e64_gfx10:
4556 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
4557 case AMDGPU::V_SUBREV_F32_e64_vi:
4558
4559 case AMDGPU::V_SUBREV_CO_U32_e32:
4560 case AMDGPU::V_SUBREV_CO_U32_e64:
4561 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
4562 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
4563
4564 case AMDGPU::V_SUBBREV_U32_e32:
4565 case AMDGPU::V_SUBBREV_U32_e64:
4566 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
4567 case AMDGPU::V_SUBBREV_U32_e32_vi:
4568 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
4569 case AMDGPU::V_SUBBREV_U32_e64_vi:
4570
4571 case AMDGPU::V_SUBREV_U32_e32:
4572 case AMDGPU::V_SUBREV_U32_e64:
4573 case AMDGPU::V_SUBREV_U32_e32_gfx9:
4574 case AMDGPU::V_SUBREV_U32_e32_vi:
4575 case AMDGPU::V_SUBREV_U32_e64_gfx9:
4576 case AMDGPU::V_SUBREV_U32_e64_vi:
4577
4578 case AMDGPU::V_SUBREV_F16_e32:
4579 case AMDGPU::V_SUBREV_F16_e64:
4580 case AMDGPU::V_SUBREV_F16_e32_gfx10:
4581 case AMDGPU::V_SUBREV_F16_e32_vi:
4582 case AMDGPU::V_SUBREV_F16_e64_gfx10:
4583 case AMDGPU::V_SUBREV_F16_e64_vi:
4584
4585 case AMDGPU::V_SUBREV_U16_e32:
4586 case AMDGPU::V_SUBREV_U16_e64:
4587 case AMDGPU::V_SUBREV_U16_e32_vi:
4588 case AMDGPU::V_SUBREV_U16_e64_vi:
4589
4590 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
4591 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
4592 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
4593
4594 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
4595 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
4596
4597 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
4598 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
4599
4600 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
4601 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
4602
4603 case AMDGPU::V_LSHRREV_B32_e32:
4604 case AMDGPU::V_LSHRREV_B32_e64:
4605 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
4606 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
4607 case AMDGPU::V_LSHRREV_B32_e32_vi:
4608 case AMDGPU::V_LSHRREV_B32_e64_vi:
4609 case AMDGPU::V_LSHRREV_B32_e32_gfx10:
4610 case AMDGPU::V_LSHRREV_B32_e64_gfx10:
4611
4612 case AMDGPU::V_ASHRREV_I32_e32:
4613 case AMDGPU::V_ASHRREV_I32_e64:
4614 case AMDGPU::V_ASHRREV_I32_e32_gfx10:
4615 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
4616 case AMDGPU::V_ASHRREV_I32_e32_vi:
4617 case AMDGPU::V_ASHRREV_I32_e64_gfx10:
4618 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
4619 case AMDGPU::V_ASHRREV_I32_e64_vi:
4620
4621 case AMDGPU::V_LSHLREV_B32_e32:
4622 case AMDGPU::V_LSHLREV_B32_e64:
4623 case AMDGPU::V_LSHLREV_B32_e32_gfx10:
4624 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
4625 case AMDGPU::V_LSHLREV_B32_e32_vi:
4626 case AMDGPU::V_LSHLREV_B32_e64_gfx10:
4627 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
4628 case AMDGPU::V_LSHLREV_B32_e64_vi:
4629
4630 case AMDGPU::V_LSHLREV_B16_e32:
4631 case AMDGPU::V_LSHLREV_B16_e64:
4632 case AMDGPU::V_LSHLREV_B16_e32_vi:
4633 case AMDGPU::V_LSHLREV_B16_e64_vi:
4634 case AMDGPU::V_LSHLREV_B16_gfx10:
4635
4636 case AMDGPU::V_LSHRREV_B16_e32:
4637 case AMDGPU::V_LSHRREV_B16_e64:
4638 case AMDGPU::V_LSHRREV_B16_e32_vi:
4639 case AMDGPU::V_LSHRREV_B16_e64_vi:
4640 case AMDGPU::V_LSHRREV_B16_gfx10:
4641
4642 case AMDGPU::V_ASHRREV_I16_e32:
4643 case AMDGPU::V_ASHRREV_I16_e64:
4644 case AMDGPU::V_ASHRREV_I16_e32_vi:
4645 case AMDGPU::V_ASHRREV_I16_e64_vi:
4646 case AMDGPU::V_ASHRREV_I16_gfx10:
4647
4648 case AMDGPU::V_LSHLREV_B64_e64:
4649 case AMDGPU::V_LSHLREV_B64_gfx10:
4650 case AMDGPU::V_LSHLREV_B64_vi:
4651
4652 case AMDGPU::V_LSHRREV_B64_e64:
4653 case AMDGPU::V_LSHRREV_B64_gfx10:
4654 case AMDGPU::V_LSHRREV_B64_vi:
4655
4656 case AMDGPU::V_ASHRREV_I64_e64:
4657 case AMDGPU::V_ASHRREV_I64_gfx10:
4658 case AMDGPU::V_ASHRREV_I64_vi:
4659
4660 case AMDGPU::V_PK_LSHLREV_B16:
4661 case AMDGPU::V_PK_LSHLREV_B16_gfx10:
4662 case AMDGPU::V_PK_LSHLREV_B16_vi:
4663
4664 case AMDGPU::V_PK_LSHRREV_B16:
4665 case AMDGPU::V_PK_LSHRREV_B16_gfx10:
4666 case AMDGPU::V_PK_LSHRREV_B16_vi:
4667 case AMDGPU::V_PK_ASHRREV_I16:
4668 case AMDGPU::V_PK_ASHRREV_I16_gfx10:
4669 case AMDGPU::V_PK_ASHRREV_I16_vi:
4670 return true;
4671 default:
4672 return false;
4673 }
4674}
4675
4676bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst,
4677 const OperandVector &Operands) {
4678 using namespace SIInstrFlags;
4679 const unsigned Opcode = Inst.getOpcode();
4680 const MCInstrDesc &Desc = MII.get(Opcode);
4681
4682 // lds_direct register is defined so that it can be used
4683 // with 9-bit operands only. Ignore encodings which do not accept these.
4684 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
4685 if ((Desc.TSFlags & Enc) == 0)
4686 return true;
4687
4688 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
4689 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
4690 if (SrcIdx == -1)
4691 break;
4692 const auto &Src = Inst.getOperand(SrcIdx);
4693 if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
4694
4695 if (isGFX90A() || isGFX11Plus()) {
4696 Error(getOperandLoc(Operands, SrcIdx),
4697 "lds_direct is not supported on this GPU");
4698 return false;
4699 }
4700
4701 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) {
4702 Error(getOperandLoc(Operands, SrcIdx),
4703 "lds_direct cannot be used with this instruction");
4704 return false;
4705 }
4706
4707 if (SrcName != OpName::src0) {
4708 Error(getOperandLoc(Operands, SrcIdx),
4709 "lds_direct may be used as src0 only");
4710 return false;
4711 }
4712 }
4713 }
4714
4715 return true;
4716}
4717
4718SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
4719 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
4720 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4721 if (Op.isFlatOffset())
4722 return Op.getStartLoc();
4723 }
4724 return getLoc();
4725}
4726
4727bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
4728 const OperandVector &Operands) {
4729 auto Opcode = Inst.getOpcode();
4730 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4731 if (OpNum == -1)
4732 return true;
4733
4734 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4735 if ((TSFlags & SIInstrFlags::FLAT))
4736 return validateFlatOffset(Inst, Operands);
4737
4738 if ((TSFlags & SIInstrFlags::SMRD))
4739 return validateSMEMOffset(Inst, Operands);
4740
4741 const auto &Op = Inst.getOperand(OpNum);
4742 // GFX12+ buffer ops: InstOffset is signed 24, but must not be a negative.
4743 if (isGFX12Plus() &&
4744 (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
4745 const unsigned OffsetSize = 24;
4746 if (!isUIntN(OffsetSize - 1, Op.getImm())) {
4747 Error(getFlatOffsetLoc(Operands),
4748 Twine("expected a ") + Twine(OffsetSize - 1) +
4749 "-bit unsigned offset for buffer ops");
4750 return false;
4751 }
4752 } else {
4753 const unsigned OffsetSize = 16;
4754 if (!isUIntN(OffsetSize, Op.getImm())) {
4755 Error(getFlatOffsetLoc(Operands),
4756 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
4757 return false;
4758 }
4759 }
4760 return true;
4761}
4762
4763bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
4764 const OperandVector &Operands) {
4765 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4766 if ((TSFlags & SIInstrFlags::FLAT) == 0)
4767 return true;
4768
4769 auto Opcode = Inst.getOpcode();
4770 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4771 assert(OpNum != -1);
4772
4773 const auto &Op = Inst.getOperand(OpNum);
4774 if (!hasFlatOffsets() && Op.getImm() != 0) {
4775 Error(getFlatOffsetLoc(Operands),
4776 "flat offset modifier is not supported on this GPU");
4777 return false;
4778 }
4779
4780 // For pre-GFX12 FLAT instructions the offset must be positive;
4781 // MSB is ignored and forced to zero.
4782 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI());
4783 bool AllowNegative =
4785 isGFX12Plus();
4786 if (!isIntN(OffsetSize, Op.getImm()) || (!AllowNegative && Op.getImm() < 0)) {
4787 Error(getFlatOffsetLoc(Operands),
4788 Twine("expected a ") +
4789 (AllowNegative ? Twine(OffsetSize) + "-bit signed offset"
4790 : Twine(OffsetSize - 1) + "-bit unsigned offset"));
4791 return false;
4792 }
4793
4794 return true;
4795}
4796
4797SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
4798 // Start with second operand because SMEM Offset cannot be dst or src0.
4799 for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
4800 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
4801 if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
4802 return Op.getStartLoc();
4803 }
4804 return getLoc();
4805}
4806
4807bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
4808 const OperandVector &Operands) {
4809 if (isCI() || isSI())
4810 return true;
4811
4812 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
4813 if ((TSFlags & SIInstrFlags::SMRD) == 0)
4814 return true;
4815
4816 auto Opcode = Inst.getOpcode();
4817 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
4818 if (OpNum == -1)
4819 return true;
4820
4821 const auto &Op = Inst.getOperand(OpNum);
4822 if (!Op.isImm())
4823 return true;
4824
4825 uint64_t Offset = Op.getImm();
4826 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
4829 return true;
4830
4831 Error(getSMEMOffsetLoc(Operands),
4832 isGFX12Plus() && IsBuffer
4833 ? "expected a 23-bit unsigned offset for buffer ops"
4834 : isGFX12Plus() ? "expected a 24-bit signed offset"
4835 : (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset"
4836 : "expected a 21-bit signed offset");
4837
4838 return false;
4839}
4840
4841bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst,
4842 const OperandVector &Operands) {
4843 unsigned Opcode = Inst.getOpcode();
4844 const MCInstrDesc &Desc = MII.get(Opcode);
4845 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
4846 return true;
4847
4848 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
4849 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
4850
4851 const int OpIndices[] = { Src0Idx, Src1Idx };
4852
4853 unsigned NumExprs = 0;
4854 unsigned NumLiterals = 0;
4855 int64_t LiteralValue;
4856
4857 for (int OpIdx : OpIndices) {
4858 if (OpIdx == -1) break;
4859
4860 const MCOperand &MO = Inst.getOperand(OpIdx);
4861 // Exclude special imm operands (like that used by s_set_gpr_idx_on)
4863 bool IsLit = false;
4864 std::optional<int64_t> Imm;
4865 if (MO.isImm()) {
4866 Imm = MO.getImm();
4867 } else if (MO.isExpr()) {
4868 if (isLitExpr(MO.getExpr())) {
4869 IsLit = true;
4870 Imm = getLitValue(MO.getExpr());
4871 }
4872 } else {
4873 continue;
4874 }
4875
4876 if (!Imm.has_value()) {
4877 ++NumExprs;
4878 } else if (!isInlineConstant(Inst, OpIdx)) {
4879 auto OpType = static_cast<AMDGPU::OperandType>(
4880 Desc.operands()[OpIdx].OperandType);
4881 int64_t Value = encode32BitLiteral(*Imm, OpType, IsLit);
4882 if (NumLiterals == 0 || LiteralValue != Value) {
4884 ++NumLiterals;
4885 }
4886 }
4887 }
4888 }
4889
4890 if (NumLiterals + NumExprs <= 1)
4891 return true;
4892
4893 Error(getOperandLoc(Operands, Src1Idx),
4894 "only one unique literal operand is allowed");
4895 return false;
4896}
4897
4898bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
4899 const unsigned Opc = Inst.getOpcode();
4900 if (isPermlane16(Opc)) {
4901 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4902 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4903
4904 if (OpSel & ~3)
4905 return false;
4906 }
4907
4908 uint64_t TSFlags = MII.get(Opc).TSFlags;
4909
4910 if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
4911 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4912 if (OpSelIdx != -1) {
4913 if (Inst.getOperand(OpSelIdx).getImm() != 0)
4914 return false;
4915 }
4916 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4917 if (OpSelHiIdx != -1) {
4918 if (Inst.getOperand(OpSelHiIdx).getImm() != -1)
4919 return false;
4920 }
4921 }
4922
4923 // op_sel[0:1] must be 0 for v_dot2_bf16_bf16 and v_dot2_f16_f16 (VOP3 Dot).
4924 if (isGFX11Plus() && (TSFlags & SIInstrFlags::IsDOT) &&
4925 (TSFlags & SIInstrFlags::VOP3) && !(TSFlags & SIInstrFlags::VOP3P)) {
4926 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4927 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4928 if (OpSel & 3)
4929 return false;
4930 }
4931
4932 // Packed math FP32 instructions typically accept SGPRs or VGPRs as source
4933 // operands. On gfx12+, if a source operand uses SGPRs, the HW can only read
4934 // the first SGPR and use it for both the low and high operations.
4935 if (isPackedFP32Inst(Opc) && isGFX12Plus()) {
4936 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
4937 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
4938 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4939 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
4940
4941 const MCOperand &Src0 = Inst.getOperand(Src0Idx);
4942 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
4943 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
4944 unsigned OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
4945
4946 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
4947
4948 auto VerifyOneSGPR = [OpSel, OpSelHi](unsigned Index) -> bool {
4949 unsigned Mask = 1U << Index;
4950 return ((OpSel & Mask) == 0) && ((OpSelHi & Mask) == 0);
4951 };
4952
4953 if (Src0.isReg() && isSGPR(Src0.getReg(), TRI) &&
4954 !VerifyOneSGPR(/*Index=*/0))
4955 return false;
4956 if (Src1.isReg() && isSGPR(Src1.getReg(), TRI) &&
4957 !VerifyOneSGPR(/*Index=*/1))
4958 return false;
4959
4960 int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
4961 if (Src2Idx != -1) {
4962 const MCOperand &Src2 = Inst.getOperand(Src2Idx);
4963 if (Src2.isReg() && isSGPR(Src2.getReg(), TRI) &&
4964 !VerifyOneSGPR(/*Index=*/2))
4965 return false;
4966 }
4967 }
4968
4969 return true;
4970}
4971
4972bool AMDGPUAsmParser::validateTrue16OpSel(const MCInst &Inst) {
4973 if (!hasTrue16Insts())
4974 return true;
4975 const MCRegisterInfo *MRI = getMRI();
4976 const unsigned Opc = Inst.getOpcode();
4977 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
4978 if (OpSelIdx == -1)
4979 return true;
4980 unsigned OpSelOpValue = Inst.getOperand(OpSelIdx).getImm();
4981 // If the value is 0 we could have a default OpSel Operand, so conservatively
4982 // allow it.
4983 if (OpSelOpValue == 0)
4984 return true;
4985 unsigned OpCount = 0;
4986 for (AMDGPU::OpName OpName : {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
4987 AMDGPU::OpName::src2, AMDGPU::OpName::vdst}) {
4988 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), OpName);
4989 if (OpIdx == -1)
4990 continue;
4991 const MCOperand &Op = Inst.getOperand(OpIdx);
4992 if (Op.isReg() &&
4993 MRI->getRegClass(AMDGPU::VGPR_16RegClassID).contains(Op.getReg())) {
4994 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(Op.getReg(), *MRI);
4995 bool OpSelOpIsHi = ((OpSelOpValue & (1 << OpCount)) != 0);
4996 if (OpSelOpIsHi != VGPRSuffixIsHi)
4997 return false;
4998 }
4999 ++OpCount;
5000 }
5001
5002 return true;
5003}
5004
5005bool AMDGPUAsmParser::validateNeg(const MCInst &Inst, AMDGPU::OpName OpName) {
5006 assert(OpName == AMDGPU::OpName::neg_lo || OpName == AMDGPU::OpName::neg_hi);
5007
5008 const unsigned Opc = Inst.getOpcode();
5009 uint64_t TSFlags = MII.get(Opc).TSFlags;
5010
5011 // v_dot4 fp8/bf8 neg_lo/neg_hi not allowed on src0 and src1 (allowed on src2)
5012 // v_wmma iu4/iu8 neg_lo not allowed on src2 (allowed on src0, src1)
5013 // v_swmmac f16/bf16 neg_lo/neg_hi not allowed on src2 (allowed on src0, src1)
5014 // other wmma/swmmac instructions don't have neg_lo/neg_hi operand.
5015 if (!(TSFlags & SIInstrFlags::IsDOT) && !(TSFlags & SIInstrFlags::IsWMMA) &&
5016 !(TSFlags & SIInstrFlags::IsSWMMAC))
5017 return true;
5018
5019 int NegIdx = AMDGPU::getNamedOperandIdx(Opc, OpName);
5020 if (NegIdx == -1)
5021 return true;
5022
5023 unsigned Neg = Inst.getOperand(NegIdx).getImm();
5024
5025 // Instructions that have neg_lo or neg_hi operand but neg modifier is allowed
5026 // on some src operands but not allowed on other.
5027 // It is convenient that such instructions don't have src_modifiers operand
5028 // for src operands that don't allow neg because they also don't allow opsel.
5029
5030 const AMDGPU::OpName SrcMods[3] = {AMDGPU::OpName::src0_modifiers,
5031 AMDGPU::OpName::src1_modifiers,
5032 AMDGPU::OpName::src2_modifiers};
5033
5034 for (unsigned i = 0; i < 3; ++i) {
5035 if (!AMDGPU::hasNamedOperand(Opc, SrcMods[i])) {
5036 if (Neg & (1 << i))
5037 return false;
5038 }
5039 }
5040
5041 return true;
5042}
5043
5044bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
5045 const OperandVector &Operands) {
5046 const unsigned Opc = Inst.getOpcode();
5047 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
5048 if (DppCtrlIdx >= 0) {
5049 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
5050
5051 if (!AMDGPU::isLegalDPALU_DPPControl(getSTI(), DppCtrl) &&
5052 AMDGPU::isDPALU_DPP(MII.get(Opc), MII, getSTI())) {
5053 // DP ALU DPP is supported for row_newbcast only on GFX9* and row_share
5054 // only on GFX12.
5055 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
5056 Error(S, isGFX12() ? "DP ALU dpp only supports row_share"
5057 : "DP ALU dpp only supports row_newbcast");
5058 return false;
5059 }
5060 }
5061
5062 int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
5063 bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
5064
5065 if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
5066 int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
5067 if (Src1Idx >= 0) {
5068 const MCOperand &Src1 = Inst.getOperand(Src1Idx);
5069 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5070 if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) {
5071 Error(getOperandLoc(Operands, Src1Idx),
5072 "invalid operand for instruction");
5073 return false;
5074 }
5075 if (Src1.isImm()) {
5076 Error(getInstLoc(Operands),
5077 "src1 immediate operand invalid for instruction");
5078 return false;
5079 }
5080 }
5081 }
5082
5083 return true;
5084}
5085
5086// Check if VCC register matches wavefront size
5087bool AMDGPUAsmParser::validateVccOperand(MCRegister Reg) const {
5088 return (Reg == AMDGPU::VCC && isWave64()) ||
5089 (Reg == AMDGPU::VCC_LO && isWave32());
5090}
5091
5092// One unique literal can be used. VOP3 literal is only allowed in GFX10+
5093bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst,
5094 const OperandVector &Operands) {
5095 unsigned Opcode = Inst.getOpcode();
5096 const MCInstrDesc &Desc = MII.get(Opcode);
5097 bool HasMandatoryLiteral = getNamedOperandIdx(Opcode, OpName::imm) != -1;
5098 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) &&
5099 !HasMandatoryLiteral && !isVOPD(Opcode))
5100 return true;
5101
5102 OperandIndices OpIndices = getSrcOperandIndices(Opcode, HasMandatoryLiteral);
5103
5104 std::optional<unsigned> LiteralOpIdx;
5105 std::optional<uint64_t> LiteralValue;
5106
5107 for (int OpIdx : OpIndices) {
5108 if (OpIdx == -1)
5109 continue;
5110
5111 const MCOperand &MO = Inst.getOperand(OpIdx);
5112 if (!MO.isImm() && !MO.isExpr())
5113 continue;
5114 if (!isSISrcOperand(Desc, OpIdx))
5115 continue;
5116
5117 std::optional<int64_t> Imm;
5118 if (MO.isImm())
5119 Imm = MO.getImm();
5120 else if (MO.isExpr() && isLitExpr(MO.getExpr()))
5121 Imm = getLitValue(MO.getExpr());
5122
5123 bool IsAnotherLiteral = false;
5124 if (!Imm.has_value()) {
5125 // Literal value not known, so we conservately assume it's different.
5126 IsAnotherLiteral = true;
5127 } else if (!isInlineConstant(Inst, OpIdx)) {
5128 uint64_t Value = *Imm;
5129 bool IsForcedFP64 =
5130 Desc.operands()[OpIdx].OperandType == AMDGPU::OPERAND_KIMM64 ||
5132 HasMandatoryLiteral);
5133 bool IsFP64 = (IsForcedFP64 || AMDGPU::isSISrcFPOperand(Desc, OpIdx)) &&
5134 AMDGPU::getOperandSize(Desc.operands()[OpIdx]) == 8;
5135 bool IsValid32Op = AMDGPU::isValid32BitLiteral(Value, IsFP64);
5136
5137 if (!IsValid32Op && !isInt<32>(Value) && !isUInt<32>(Value) &&
5138 !IsForcedFP64 && (!has64BitLiterals() || Desc.getSize() != 4)) {
5139 Error(getOperandLoc(Operands, OpIdx),
5140 "invalid operand for instruction");
5141 return false;
5142 }
5143
5144 if (IsFP64 && IsValid32Op && !IsForcedFP64)
5145 Value = Hi_32(Value);
5146
5147 IsAnotherLiteral = !LiteralValue || *LiteralValue != Value;
5149 }
5150
5151 if (IsAnotherLiteral && !HasMandatoryLiteral &&
5152 !getFeatureBits()[FeatureVOP3Literal]) {
5153 Error(getOperandLoc(Operands, OpIdx),
5154 "literal operands are not supported");
5155 return false;
5156 }
5157
5158 if (LiteralOpIdx && IsAnotherLiteral) {
5159 Error(getLaterLoc(getOperandLoc(Operands, OpIdx),
5160 getOperandLoc(Operands, *LiteralOpIdx)),
5161 "only one unique literal operand is allowed");
5162 return false;
5163 }
5164
5165 if (IsAnotherLiteral)
5166 LiteralOpIdx = OpIdx;
5167 }
5168
5169 return true;
5170}
5171
5172// Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
5173static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name,
5174 const MCRegisterInfo *MRI) {
5175 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name);
5176 if (OpIdx < 0)
5177 return -1;
5178
5179 const MCOperand &Op = Inst.getOperand(OpIdx);
5180 if (!Op.isReg())
5181 return -1;
5182
5183 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5184 auto Reg = Sub ? Sub : Op.getReg();
5185 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5186 return AGPR32.contains(Reg) ? 1 : 0;
5187}
5188
5189bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
5190 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5191 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
5193 SIInstrFlags::DS)) == 0)
5194 return true;
5195
5196 AMDGPU::OpName DataName = (TSFlags & SIInstrFlags::DS)
5197 ? AMDGPU::OpName::data0
5198 : AMDGPU::OpName::vdata;
5199
5200 const MCRegisterInfo *MRI = getMRI();
5201 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
5202 int DataAreg = IsAGPROperand(Inst, DataName, MRI);
5203
5204 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
5205 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
5206 if (Data2Areg >= 0 && Data2Areg != DataAreg)
5207 return false;
5208 }
5209
5210 auto FB = getFeatureBits();
5211 if (FB[AMDGPU::FeatureGFX90AInsts]) {
5212 if (DataAreg < 0 || DstAreg < 0)
5213 return true;
5214 return DstAreg == DataAreg;
5215 }
5216
5217 return DstAreg < 1 && DataAreg < 1;
5218}
5219
5220bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
5221 auto FB = getFeatureBits();
5222 if (!FB[AMDGPU::FeatureRequiresAlignedVGPRs])
5223 return true;
5224
5225 unsigned Opc = Inst.getOpcode();
5226 const MCRegisterInfo *MRI = getMRI();
5227 // DS_READ_B96_TR_B6 is the only DS instruction in GFX950, that allows
5228 // unaligned VGPR. All others only allow even aligned VGPRs.
5229 if (FB[AMDGPU::FeatureGFX90AInsts] && Opc == AMDGPU::DS_READ_B96_TR_B6_vi)
5230 return true;
5231
5232 if (FB[AMDGPU::FeatureGFX1250Insts]) {
5233 switch (Opc) {
5234 default:
5235 break;
5236 case AMDGPU::DS_LOAD_TR6_B96:
5237 case AMDGPU::DS_LOAD_TR6_B96_gfx12:
5238 // DS_LOAD_TR6_B96 is the only DS instruction in GFX1250, that
5239 // allows unaligned VGPR. All others only allow even aligned VGPRs.
5240 return true;
5241 case AMDGPU::GLOBAL_LOAD_TR6_B96:
5242 case AMDGPU::GLOBAL_LOAD_TR6_B96_gfx1250: {
5243 // GLOBAL_LOAD_TR6_B96 is the only GLOBAL instruction in GFX1250, that
5244 // allows unaligned VGPR for vdst, but other operands still only allow
5245 // even aligned VGPRs.
5246 int VAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
5247 if (VAddrIdx != -1) {
5248 const MCOperand &Op = Inst.getOperand(VAddrIdx);
5249 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5250 if ((Sub - AMDGPU::VGPR0) & 1)
5251 return false;
5252 }
5253 return true;
5254 }
5255 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR:
5256 case AMDGPU::GLOBAL_LOAD_TR6_B96_SADDR_gfx1250:
5257 return true;
5258 }
5259 }
5260
5261 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5262 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
5263 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
5264 const MCOperand &Op = Inst.getOperand(I);
5265 if (!Op.isReg())
5266 continue;
5267
5268 MCRegister Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
5269 if (!Sub)
5270 continue;
5271
5272 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
5273 return false;
5274 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
5275 return false;
5276 }
5277
5278 return true;
5279}
5280
5281SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const {
5282 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
5283 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
5284 if (Op.isBLGP())
5285 return Op.getStartLoc();
5286 }
5287 return SMLoc();
5288}
5289
5290bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
5291 const OperandVector &Operands) {
5292 unsigned Opc = Inst.getOpcode();
5293 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
5294 if (BlgpIdx == -1)
5295 return true;
5296 SMLoc BLGPLoc = getBLGPLoc(Operands);
5297 if (!BLGPLoc.isValid())
5298 return true;
5299 bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
5300 auto FB = getFeatureBits();
5301 bool UsesNeg = false;
5302 if (FB[AMDGPU::FeatureGFX940Insts]) {
5303 switch (Opc) {
5304 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
5305 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
5306 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
5307 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
5308 UsesNeg = true;
5309 }
5310 }
5311
5312 if (IsNeg == UsesNeg)
5313 return true;
5314
5315 Error(BLGPLoc,
5316 UsesNeg ? "invalid modifier: blgp is not supported"
5317 : "invalid modifier: neg is not supported");
5318
5319 return false;
5320}
5321
5322bool AMDGPUAsmParser::validateWaitCnt(const MCInst &Inst,
5323 const OperandVector &Operands) {
5324 if (!isGFX11Plus())
5325 return true;
5326
5327 unsigned Opc = Inst.getOpcode();
5328 if (Opc != AMDGPU::S_WAITCNT_EXPCNT_gfx11 &&
5329 Opc != AMDGPU::S_WAITCNT_LGKMCNT_gfx11 &&
5330 Opc != AMDGPU::S_WAITCNT_VMCNT_gfx11 &&
5331 Opc != AMDGPU::S_WAITCNT_VSCNT_gfx11)
5332 return true;
5333
5334 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::sdst);
5335 assert(Src0Idx >= 0 && Inst.getOperand(Src0Idx).isReg());
5336 auto Reg = mc2PseudoReg(Inst.getOperand(Src0Idx).getReg());
5337 if (Reg == AMDGPU::SGPR_NULL)
5338 return true;
5339
5340 Error(getOperandLoc(Operands, Src0Idx), "src0 must be null");
5341 return false;
5342}
5343
5344bool AMDGPUAsmParser::validateDS(const MCInst &Inst,
5345 const OperandVector &Operands) {
5346 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5347 if ((TSFlags & SIInstrFlags::DS) == 0)
5348 return true;
5349 if (TSFlags & SIInstrFlags::GWS)
5350 return validateGWS(Inst, Operands);
5351 // Only validate GDS for non-GWS instructions.
5352 if (hasGDS())
5353 return true;
5354 int GDSIdx =
5355 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::gds);
5356 if (GDSIdx < 0)
5357 return true;
5358 unsigned GDS = Inst.getOperand(GDSIdx).getImm();
5359 if (GDS) {
5360 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyGDS, Operands);
5361 Error(S, "gds modifier is not supported on this GPU");
5362 return false;
5363 }
5364 return true;
5365}
5366
5367// gfx90a has an undocumented limitation:
5368// DS_GWS opcodes must use even aligned registers.
5369bool AMDGPUAsmParser::validateGWS(const MCInst &Inst,
5370 const OperandVector &Operands) {
5371 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts])
5372 return true;
5373
5374 int Opc = Inst.getOpcode();
5375 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi &&
5376 Opc != AMDGPU::DS_GWS_SEMA_BR_vi)
5377 return true;
5378
5379 const MCRegisterInfo *MRI = getMRI();
5380 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
5381 int Data0Pos =
5382 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0);
5383 assert(Data0Pos != -1);
5384 auto Reg = Inst.getOperand(Data0Pos).getReg();
5385 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0);
5386 if (RegIdx & 1) {
5387 Error(getOperandLoc(Operands, Data0Pos), "vgpr must be even aligned");
5388 return false;
5389 }
5390
5391 return true;
5392}
5393
5394bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
5395 const OperandVector &Operands,
5396 SMLoc IDLoc) {
5397 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
5398 AMDGPU::OpName::cpol);
5399 if (CPolPos == -1)
5400 return true;
5401
5402 unsigned CPol = Inst.getOperand(CPolPos).getImm();
5403
5404 if (!isGFX1250Plus()) {
5405 if (CPol & CPol::SCAL) {
5406 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5407 StringRef CStr(S.getPointer());
5408 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5409 Error(S, "scale_offset is not supported on this GPU");
5410 }
5411 if (CPol & CPol::NV) {
5412 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5413 StringRef CStr(S.getPointer());
5414 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("nv")]);
5415 Error(S, "nv is not supported on this GPU");
5416 }
5417 }
5418
5419 if ((CPol & CPol::SCAL) && !supportsScaleOffset(MII, Inst.getOpcode())) {
5420 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5421 StringRef CStr(S.getPointer());
5422 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scale_offset")]);
5423 Error(S, "scale_offset is not supported for this instruction");
5424 }
5425
5426 if (isGFX12Plus())
5427 return validateTHAndScopeBits(Inst, Operands, CPol);
5428
5429 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
5430 if (TSFlags & SIInstrFlags::SMRD) {
5431 if (CPol && (isSI() || isCI())) {
5432 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5433 Error(S, "cache policy is not supported for SMRD instructions");
5434 return false;
5435 }
5436 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) {
5437 Error(IDLoc, "invalid cache policy for SMEM instruction");
5438 return false;
5439 }
5440 }
5441
5442 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
5443 const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
5446 if (!(TSFlags & AllowSCCModifier)) {
5447 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5448 StringRef CStr(S.getPointer());
5449 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
5450 Error(S,
5451 "scc modifier is not supported for this instruction on this GPU");
5452 return false;
5453 }
5454 }
5455
5457 return true;
5458
5459 if (TSFlags & SIInstrFlags::IsAtomicRet) {
5460 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
5461 Error(IDLoc, isGFX940() ? "instruction must use sc0"
5462 : "instruction must use glc");
5463 return false;
5464 }
5465 } else {
5466 if (CPol & CPol::GLC) {
5467 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5468 StringRef CStr(S.getPointer());
5470 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
5471 Error(S, isGFX940() ? "instruction must not use sc0"
5472 : "instruction must not use glc");
5473 return false;
5474 }
5475 }
5476
5477 return true;
5478}
5479
5480bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
5481 const OperandVector &Operands,
5482 const unsigned CPol) {
5483 const unsigned TH = CPol & AMDGPU::CPol::TH;
5484 const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
5485
5486 const unsigned Opcode = Inst.getOpcode();
5487 const MCInstrDesc &TID = MII.get(Opcode);
5488
5489 auto PrintError = [&](StringRef Msg) {
5490 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
5491 Error(S, Msg);
5492 return false;
5493 };
5494
5495 if ((TH & AMDGPU::CPol::TH_ATOMIC_RETURN) &&
5497 return PrintError("th:TH_ATOMIC_RETURN requires a destination operand");
5498
5499 if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
5502 return PrintError("instruction must use th:TH_ATOMIC_RETURN");
5503
5504 if (TH == 0)
5505 return true;
5506
5507 if ((TID.TSFlags & SIInstrFlags::SMRD) &&
5508 ((TH == AMDGPU::CPol::TH_NT_RT) || (TH == AMDGPU::CPol::TH_RT_NT) ||
5509 (TH == AMDGPU::CPol::TH_NT_HT)))
5510 return PrintError("invalid th value for SMEM instruction");
5511
5512 if (TH == AMDGPU::CPol::TH_BYPASS) {
5513 if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
5515 (Scope == AMDGPU::CPol::SCOPE_SYS &&
5517 return PrintError("scope and th combination is not valid");
5518 }
5519
5520 unsigned THType = AMDGPU::getTemporalHintType(TID);
5521 if (THType == AMDGPU::CPol::TH_TYPE_ATOMIC) {
5522 if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
5523 return PrintError("invalid th value for atomic instructions");
5524 } else if (THType == AMDGPU::CPol::TH_TYPE_STORE) {
5525 if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
5526 return PrintError("invalid th value for store instructions");
5527 } else {
5528 if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
5529 return PrintError("invalid th value for load instructions");
5530 }
5531
5532 return true;
5533}
5534
5535bool AMDGPUAsmParser::validateTFE(const MCInst &Inst,
5536 const OperandVector &Operands) {
5537 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
5538 if (Desc.mayStore() &&
5540 SMLoc Loc = getImmLoc(AMDGPUOperand::ImmTyTFE, Operands);
5541 if (Loc != getInstLoc(Operands)) {
5542 Error(Loc, "TFE modifier has no meaning for store instructions");
5543 return false;
5544 }
5545 }
5546
5547 return true;
5548}
5549
5550bool AMDGPUAsmParser::validateWMMA(const MCInst &Inst,
5551 const OperandVector &Operands) {
5552 unsigned Opc = Inst.getOpcode();
5553 const MCRegisterInfo *TRI = getContext().getRegisterInfo();
5554 const MCInstrDesc &Desc = MII.get(Opc);
5555
5556 auto validateFmt = [&](AMDGPU::OpName FmtOp, AMDGPU::OpName SrcOp) -> bool {
5557 int FmtIdx = AMDGPU::getNamedOperandIdx(Opc, FmtOp);
5558 if (FmtIdx == -1)
5559 return true;
5560 unsigned Fmt = Inst.getOperand(FmtIdx).getImm();
5561 int SrcIdx = AMDGPU::getNamedOperandIdx(Opc, SrcOp);
5562 unsigned RegSize =
5563 TRI->getRegClass(MII.getOpRegClassID(Desc.operands()[SrcIdx], HwMode))
5564 .getSizeInBits();
5565
5567 return true;
5568
5569 Error(getOperandLoc(Operands, SrcIdx),
5570 "wrong register tuple size for " +
5571 Twine(WMMAMods::ModMatrixFmt[Fmt]));
5572 return false;
5573 };
5574
5575 return validateFmt(AMDGPU::OpName::matrix_a_fmt, AMDGPU::OpName::src0) &&
5576 validateFmt(AMDGPU::OpName::matrix_b_fmt, AMDGPU::OpName::src1);
5577}
5578
5579bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, SMLoc IDLoc,
5580 const OperandVector &Operands) {
5581 if (!validateLdsDirect(Inst, Operands))
5582 return false;
5583 if (!validateTrue16OpSel(Inst)) {
5584 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5585 "op_sel operand conflicts with 16-bit operand suffix");
5586 return false;
5587 }
5588 if (!validateSOPLiteral(Inst, Operands))
5589 return false;
5590 if (!validateVOPLiteral(Inst, Operands)) {
5591 return false;
5592 }
5593 if (!validateConstantBusLimitations(Inst, Operands)) {
5594 return false;
5595 }
5596 if (!validateVOPD(Inst, Operands)) {
5597 return false;
5598 }
5599 if (!validateIntClampSupported(Inst)) {
5600 Error(getImmLoc(AMDGPUOperand::ImmTyClamp, Operands),
5601 "integer clamping is not supported on this GPU");
5602 return false;
5603 }
5604 if (!validateOpSel(Inst)) {
5605 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
5606 "invalid op_sel operand");
5607 return false;
5608 }
5609 if (!validateNeg(Inst, AMDGPU::OpName::neg_lo)) {
5610 Error(getImmLoc(AMDGPUOperand::ImmTyNegLo, Operands),
5611 "invalid neg_lo operand");
5612 return false;
5613 }
5614 if (!validateNeg(Inst, AMDGPU::OpName::neg_hi)) {
5615 Error(getImmLoc(AMDGPUOperand::ImmTyNegHi, Operands),
5616 "invalid neg_hi operand");
5617 return false;
5618 }
5619 if (!validateDPP(Inst, Operands)) {
5620 return false;
5621 }
5622 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
5623 if (!validateMIMGD16(Inst)) {
5624 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5625 "d16 modifier is not supported on this GPU");
5626 return false;
5627 }
5628 if (!validateMIMGDim(Inst, Operands)) {
5629 Error(IDLoc, "missing dim operand");
5630 return false;
5631 }
5632 if (!validateTensorR128(Inst)) {
5633 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
5634 "instruction must set modifier r128=0");
5635 return false;
5636 }
5637 if (!validateMIMGMSAA(Inst)) {
5638 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
5639 "invalid dim; must be MSAA type");
5640 return false;
5641 }
5642 if (!validateMIMGDataSize(Inst, IDLoc)) {
5643 return false;
5644 }
5645 if (!validateMIMGAddrSize(Inst, IDLoc))
5646 return false;
5647 if (!validateMIMGAtomicDMask(Inst)) {
5648 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5649 "invalid atomic image dmask");
5650 return false;
5651 }
5652 if (!validateMIMGGatherDMask(Inst)) {
5653 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
5654 "invalid image_gather dmask: only one bit must be set");
5655 return false;
5656 }
5657 if (!validateMovrels(Inst, Operands)) {
5658 return false;
5659 }
5660 if (!validateOffset(Inst, Operands)) {
5661 return false;
5662 }
5663 if (!validateMAIAccWrite(Inst, Operands)) {
5664 return false;
5665 }
5666 if (!validateMAISrc2(Inst, Operands)) {
5667 return false;
5668 }
5669 if (!validateMFMA(Inst, Operands)) {
5670 return false;
5671 }
5672 if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
5673 return false;
5674 }
5675
5676 if (!validateAGPRLdSt(Inst)) {
5677 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
5678 ? "invalid register class: data and dst should be all VGPR or AGPR"
5679 : "invalid register class: agpr loads and stores not supported on this GPU"
5680 );
5681 return false;
5682 }
5683 if (!validateVGPRAlign(Inst)) {
5684 Error(IDLoc,
5685 "invalid register class: vgpr tuples must be 64 bit aligned");
5686 return false;
5687 }
5688 if (!validateDS(Inst, Operands)) {
5689 return false;
5690 }
5691
5692 if (!validateBLGP(Inst, Operands)) {
5693 return false;
5694 }
5695
5696 if (!validateDivScale(Inst)) {
5697 Error(IDLoc, "ABS not allowed in VOP3B instructions");
5698 return false;
5699 }
5700 if (!validateWaitCnt(Inst, Operands)) {
5701 return false;
5702 }
5703 if (!validateTFE(Inst, Operands)) {
5704 return false;
5705 }
5706 if (!validateWMMA(Inst, Operands)) {
5707 return false;
5708 }
5709
5710 return true;
5711}
5712
5714 const FeatureBitset &FBS,
5715 unsigned VariantID = 0);
5716
5717static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
5718 const FeatureBitset &AvailableFeatures,
5719 unsigned VariantID);
5720
5721bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5722 const FeatureBitset &FBS) {
5723 return isSupportedMnemo(Mnemo, FBS, getAllVariants());
5724}
5725
5726bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
5727 const FeatureBitset &FBS,
5728 ArrayRef<unsigned> Variants) {
5729 for (auto Variant : Variants) {
5730 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
5731 return true;
5732 }
5733
5734 return false;
5735}
5736
5737bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
5738 SMLoc IDLoc) {
5739 FeatureBitset FBS = ComputeAvailableFeatures(getFeatureBits());
5740
5741 // Check if requested instruction variant is supported.
5742 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
5743 return false;
5744
5745 // This instruction is not supported.
5746 // Clear any other pending errors because they are no longer relevant.
5747 getParser().clearPendingErrors();
5748
5749 // Requested instruction variant is not supported.
5750 // Check if any other variants are supported.
5751 StringRef VariantName = getMatchedVariantName();
5752 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
5753 return Error(IDLoc,
5754 Twine(VariantName,
5755 " variant of this instruction is not supported"));
5756 }
5757
5758 // Check if this instruction may be used with a different wavesize.
5759 if (isGFX10Plus() && getFeatureBits()[AMDGPU::FeatureWavefrontSize64] &&
5760 !getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) {
5761 // FIXME: Use getAvailableFeatures, and do not manually recompute
5762 FeatureBitset FeaturesWS32 = getFeatureBits();
5763 FeaturesWS32.flip(AMDGPU::FeatureWavefrontSize64)
5764 .flip(AMDGPU::FeatureWavefrontSize32);
5765 FeatureBitset AvailableFeaturesWS32 =
5766 ComputeAvailableFeatures(FeaturesWS32);
5767
5768 if (isSupportedMnemo(Mnemo, AvailableFeaturesWS32, getMatchedVariants()))
5769 return Error(IDLoc, "instruction requires wavesize=32");
5770 }
5771
5772 // Finally check if this instruction is supported on any other GPU.
5773 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
5774 return Error(IDLoc, "instruction not supported on this GPU");
5775 }
5776
5777 // Instruction not supported on any GPU. Probably a typo.
5778 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
5779 return Error(IDLoc, "invalid instruction" + Suggestion);
5780}
5781
5782static bool isInvalidVOPDY(const OperandVector &Operands,
5783 uint64_t InvalidOprIdx) {
5784 assert(InvalidOprIdx < Operands.size());
5785 const auto &Op = ((AMDGPUOperand &)*Operands[InvalidOprIdx]);
5786 if (Op.isToken() && InvalidOprIdx > 1) {
5787 const auto &PrevOp = ((AMDGPUOperand &)*Operands[InvalidOprIdx - 1]);
5788 return PrevOp.isToken() && PrevOp.getToken() == "::";
5789 }
5790 return false;
5791}
5792
5793bool AMDGPUAsmParser::matchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
5794 OperandVector &Operands,
5795 MCStreamer &Out,
5796 uint64_t &ErrorInfo,
5797 bool MatchingInlineAsm) {
5798 MCInst Inst;
5799 Inst.setLoc(IDLoc);
5800 unsigned Result = Match_Success;
5801 for (auto Variant : getMatchedVariants()) {
5802 uint64_t EI;
5803 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
5804 Variant);
5805 // We order match statuses from least to most specific. We use most specific
5806 // status as resulting
5807 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature
5808 if (R == Match_Success || R == Match_MissingFeature ||
5809 (R == Match_InvalidOperand && Result != Match_MissingFeature) ||
5810 (R == Match_MnemonicFail && Result != Match_InvalidOperand &&
5811 Result != Match_MissingFeature)) {
5812 Result = R;
5813 ErrorInfo = EI;
5814 }
5815 if (R == Match_Success)
5816 break;
5817 }
5818
5819 if (Result == Match_Success) {
5820 if (!validateInstruction(Inst, IDLoc, Operands)) {
5821 return true;
5822 }
5823 Out.emitInstruction(Inst, getSTI());
5824 return false;
5825 }
5826
5827 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
5828 if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
5829 return true;
5830 }
5831
5832 switch (Result) {
5833 default: break;
5834 case Match_MissingFeature:
5835 // It has been verified that the specified instruction
5836 // mnemonic is valid. A match was found but it requires
5837 // features which are not supported on this GPU.
5838 return Error(IDLoc, "operands are not valid for this GPU or mode");
5839
5840 case Match_InvalidOperand: {
5841 SMLoc ErrorLoc = IDLoc;
5842 if (ErrorInfo != ~0ULL) {
5843 if (ErrorInfo >= Operands.size()) {
5844 return Error(IDLoc, "too few operands for instruction");
5845 }
5846 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
5847 if (ErrorLoc == SMLoc())
5848 ErrorLoc = IDLoc;
5849
5850 if (isInvalidVOPDY(Operands, ErrorInfo))
5851 return Error(ErrorLoc, "invalid VOPDY instruction");
5852 }
5853 return Error(ErrorLoc, "invalid operand for instruction");
5854 }
5855
5856 case Match_MnemonicFail:
5857 llvm_unreachable("Invalid instructions should have been handled already");
5858 }
5859 llvm_unreachable("Implement any new match types added!");
5860}
5861
5862bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
5863 int64_t Tmp = -1;
5864 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
5865 return true;
5866 }
5867 if (getParser().parseAbsoluteExpression(Tmp)) {
5868 return true;
5869 }
5870 Ret = static_cast<uint32_t>(Tmp);
5871 return false;
5872}
5873
5874bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
5875 if (!getSTI().getTargetTriple().isAMDGCN())
5876 return TokError("directive only supported for amdgcn architecture");
5877
5878 std::string TargetIDDirective;
5879 SMLoc TargetStart = getTok().getLoc();
5880 if (getParser().parseEscapedString(TargetIDDirective))
5881 return true;
5882
5883 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
5884 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
5885 return getParser().Error(TargetRange.Start,
5886 (Twine(".amdgcn_target directive's target id ") +
5887 Twine(TargetIDDirective) +
5888 Twine(" does not match the specified target id ") +
5889 Twine(getTargetStreamer().getTargetID()->toString())).str());
5890
5891 return false;
5892}
5893
5894bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
5895 return Error(Range.Start, "value out of range", Range);
5896}
5897
5898bool AMDGPUAsmParser::calculateGPRBlocks(
5899 const FeatureBitset &Features, const MCExpr *VCCUsed,
5900 const MCExpr *FlatScrUsed, bool XNACKUsed,
5901 std::optional<bool> EnableWavefrontSize32, const MCExpr *NextFreeVGPR,
5902 SMRange VGPRRange, const MCExpr *NextFreeSGPR, SMRange SGPRRange,
5903 const MCExpr *&VGPRBlocks, const MCExpr *&SGPRBlocks) {
5904 // TODO(scott.linder): These calculations are duplicated from
5905 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
5906 IsaVersion Version = getIsaVersion(getSTI().getCPU());
5907 MCContext &Ctx = getContext();
5908
5909 const MCExpr *NumSGPRs = NextFreeSGPR;
5910 int64_t EvaluatedSGPRs;
5911
5912 if (Version.Major >= 10)
5914 else {
5915 unsigned MaxAddressableNumSGPRs =
5917
5918 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) && Version.Major >= 8 &&
5919 !Features.test(FeatureSGPRInitBug) &&
5920 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5921 return OutOfRangeError(SGPRRange);
5922
5923 const MCExpr *ExtraSGPRs =
5924 AMDGPUMCExpr::createExtraSGPRs(VCCUsed, FlatScrUsed, XNACKUsed, Ctx);
5925 NumSGPRs = MCBinaryExpr::createAdd(NumSGPRs, ExtraSGPRs, Ctx);
5926
5927 if (NumSGPRs->evaluateAsAbsolute(EvaluatedSGPRs) &&
5928 (Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
5929 static_cast<uint64_t>(EvaluatedSGPRs) > MaxAddressableNumSGPRs)
5930 return OutOfRangeError(SGPRRange);
5931
5932 if (Features.test(FeatureSGPRInitBug))
5933 NumSGPRs =
5935 }
5936
5937 // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:
5938 // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1
5939 auto GetNumGPRBlocks = [&Ctx](const MCExpr *NumGPR,
5940 unsigned Granule) -> const MCExpr * {
5941 const MCExpr *OneConst = MCConstantExpr::create(1ul, Ctx);
5942 const MCExpr *GranuleConst = MCConstantExpr::create(Granule, Ctx);
5943 const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);
5944 const MCExpr *AlignToGPR =
5945 AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);
5946 const MCExpr *DivGPR =
5947 MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);
5948 const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
5949 return SubGPR;
5950 };
5951
5952 VGPRBlocks = GetNumGPRBlocks(
5953 NextFreeVGPR,
5954 IsaInfo::getVGPREncodingGranule(&getSTI(), EnableWavefrontSize32));
5955 SGPRBlocks =
5956 GetNumGPRBlocks(NumSGPRs, IsaInfo::getSGPREncodingGranule(&getSTI()));
5957
5958 return false;
5959}
5960
5961bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
5962 if (!getSTI().getTargetTriple().isAMDGCN())
5963 return TokError("directive only supported for amdgcn architecture");
5964
5965 if (!isHsaAbi(getSTI()))
5966 return TokError("directive only supported for amdhsa OS");
5967
5968 StringRef KernelName;
5969 if (getParser().parseIdentifier(KernelName))
5970 return true;
5971
5972 AMDGPU::MCKernelDescriptor KD =
5974 &getSTI(), getContext());
5975
5976 StringSet<> Seen;
5977
5978 IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
5979
5980 const MCExpr *ZeroExpr = MCConstantExpr::create(0, getContext());
5981 const MCExpr *OneExpr = MCConstantExpr::create(1, getContext());
5982
5983 SMRange VGPRRange;
5984 const MCExpr *NextFreeVGPR = ZeroExpr;
5985 const MCExpr *AccumOffset = MCConstantExpr::create(0, getContext());
5986 const MCExpr *NamedBarCnt = ZeroExpr;
5987 uint64_t SharedVGPRCount = 0;
5988 uint64_t PreloadLength = 0;
5989 uint64_t PreloadOffset = 0;
5990 SMRange SGPRRange;
5991 const MCExpr *NextFreeSGPR = ZeroExpr;
5992
5993 // Count the number of user SGPRs implied from the enabled feature bits.
5994 unsigned ImpliedUserSGPRCount = 0;
5995
5996 // Track if the asm explicitly contains the directive for the user SGPR
5997 // count.
5998 std::optional<unsigned> ExplicitUserSGPRCount;
5999 const MCExpr *ReserveVCC = OneExpr;
6000 const MCExpr *ReserveFlatScr = OneExpr;
6001 std::optional<bool> EnableWavefrontSize32;
6002
6003 while (true) {
6004 while (trySkipToken(AsmToken::EndOfStatement));
6005
6006 StringRef ID;
6007 SMRange IDRange = getTok().getLocRange();
6008 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
6009 return true;
6010
6011 if (ID == ".end_amdhsa_kernel")
6012 break;
6013
6014 if (!Seen.insert(ID).second)
6015 return TokError(".amdhsa_ directives cannot be repeated");
6016
6017 SMLoc ValStart = getLoc();
6018 const MCExpr *ExprVal;
6019 if (getParser().parseExpression(ExprVal))
6020 return true;
6021 SMLoc ValEnd = getLoc();
6022 SMRange ValRange = SMRange(ValStart, ValEnd);
6023
6024 int64_t IVal = 0;
6025 uint64_t Val = IVal;
6026 bool EvaluatableExpr;
6027 if ((EvaluatableExpr = ExprVal->evaluateAsAbsolute(IVal))) {
6028 if (IVal < 0)
6029 return OutOfRangeError(ValRange);
6030 Val = IVal;
6031 }
6032
6033#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \
6034 if (!isUInt<ENTRY##_WIDTH>(Val)) \
6035 return OutOfRangeError(RANGE); \
6036 AMDGPU::MCKernelDescriptor::bits_set(FIELD, VALUE, ENTRY##_SHIFT, ENTRY, \
6037 getContext());
6038
6039// Some fields use the parsed value immediately which requires the expression to
6040// be solvable.
6041#define EXPR_RESOLVE_OR_ERROR(RESOLVED) \
6042 if (!(RESOLVED)) \
6043 return Error(IDRange.Start, "directive should have resolvable expression", \
6044 IDRange);
6045
6046 if (ID == ".amdhsa_group_segment_fixed_size") {
6048 CHAR_BIT>(Val))
6049 return OutOfRangeError(ValRange);
6050 KD.group_segment_fixed_size = ExprVal;
6051 } else if (ID == ".amdhsa_private_segment_fixed_size") {
6053 CHAR_BIT>(Val))
6054 return OutOfRangeError(ValRange);
6055 KD.private_segment_fixed_size = ExprVal;
6056 } else if (ID == ".amdhsa_kernarg_size") {
6057 if (!isUInt<sizeof(kernel_descriptor_t::kernarg_size) * CHAR_BIT>(Val))
6058 return OutOfRangeError(ValRange);
6059 KD.kernarg_size = ExprVal;
6060 } else if (ID == ".amdhsa_user_sgpr_count") {
6061 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6062 ExplicitUserSGPRCount = Val;
6063 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
6064 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6066 return Error(IDRange.Start,
6067 "directive is not supported with architected flat scratch",
6068 IDRange);
6070 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
6071 ExprVal, ValRange);
6072 if (Val)
6073 ImpliedUserSGPRCount += 4;
6074 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_length") {
6075 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6076 if (!hasKernargPreload())
6077 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6078
6079 if (Val > getMaxNumUserSGPRs())
6080 return OutOfRangeError(ValRange);
6081 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_LENGTH, ExprVal,
6082 ValRange);
6083 if (Val) {
6084 ImpliedUserSGPRCount += Val;
6085 PreloadLength = Val;
6086 }
6087 } else if (ID == ".amdhsa_user_sgpr_kernarg_preload_offset") {
6088 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6089 if (!hasKernargPreload())
6090 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6091
6092 if (Val >= 1024)
6093 return OutOfRangeError(ValRange);
6094 PARSE_BITS_ENTRY(KD.kernarg_preload, KERNARG_PRELOAD_SPEC_OFFSET, ExprVal,
6095 ValRange);
6096 if (Val)
6097 PreloadOffset = Val;
6098 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
6099 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6101 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, ExprVal,
6102 ValRange);
6103 if (Val)
6104 ImpliedUserSGPRCount += 2;
6105 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
6106 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6108 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, ExprVal,
6109 ValRange);
6110 if (Val)
6111 ImpliedUserSGPRCount += 2;
6112 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
6113 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6115 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
6116 ExprVal, ValRange);
6117 if (Val)
6118 ImpliedUserSGPRCount += 2;
6119 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
6120 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6122 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, ExprVal,
6123 ValRange);
6124 if (Val)
6125 ImpliedUserSGPRCount += 2;
6126 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
6128 return Error(IDRange.Start,
6129 "directive is not supported with architected flat scratch",
6130 IDRange);
6131 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6133 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT,
6134 ExprVal, ValRange);
6135 if (Val)
6136 ImpliedUserSGPRCount += 2;
6137 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
6138 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6140 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
6141 ExprVal, ValRange);
6142 if (Val)
6143 ImpliedUserSGPRCount += 1;
6144 } else if (ID == ".amdhsa_wavefront_size32") {
6145 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6146 if (IVersion.Major < 10)
6147 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6148 EnableWavefrontSize32 = Val;
6150 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, ExprVal,
6151 ValRange);
6152 } else if (ID == ".amdhsa_uses_dynamic_stack") {
6154 KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK, ExprVal,
6155 ValRange);
6156 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
6158 return Error(IDRange.Start,
6159 "directive is not supported with architected flat scratch",
6160 IDRange);
6162 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6163 ValRange);
6164 } else if (ID == ".amdhsa_enable_private_segment") {
6166 return Error(
6167 IDRange.Start,
6168 "directive is not supported without architected flat scratch",
6169 IDRange);
6171 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, ExprVal,
6172 ValRange);
6173 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
6175 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, ExprVal,
6176 ValRange);
6177 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
6179 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, ExprVal,
6180 ValRange);
6181 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
6183 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, ExprVal,
6184 ValRange);
6185 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
6187 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, ExprVal,
6188 ValRange);
6189 } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
6191 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, ExprVal,
6192 ValRange);
6193 } else if (ID == ".amdhsa_next_free_vgpr") {
6194 VGPRRange = ValRange;
6195 NextFreeVGPR = ExprVal;
6196 } else if (ID == ".amdhsa_next_free_sgpr") {
6197 SGPRRange = ValRange;
6198 NextFreeSGPR = ExprVal;
6199 } else if (ID == ".amdhsa_accum_offset") {
6200 if (!isGFX90A())
6201 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6202 AccumOffset = ExprVal;
6203 } else if (ID == ".amdhsa_named_barrier_count") {
6204 if (!isGFX1250Plus())
6205 return Error(IDRange.Start, "directive requires gfx1250+", IDRange);
6206 NamedBarCnt = ExprVal;
6207 } else if (ID == ".amdhsa_reserve_vcc") {
6208 if (EvaluatableExpr && !isUInt<1>(Val))
6209 return OutOfRangeError(ValRange);
6210 ReserveVCC = ExprVal;
6211 } else if (ID == ".amdhsa_reserve_flat_scratch") {
6212 if (IVersion.Major < 7)
6213 return Error(IDRange.Start, "directive requires gfx7+", IDRange);
6215 return Error(IDRange.Start,
6216 "directive is not supported with architected flat scratch",
6217 IDRange);
6218 if (EvaluatableExpr && !isUInt<1>(Val))
6219 return OutOfRangeError(ValRange);
6220 ReserveFlatScr = ExprVal;
6221 } else if (ID == ".amdhsa_reserve_xnack_mask") {
6222 if (IVersion.Major < 8)
6223 return Error(IDRange.Start, "directive requires gfx8+", IDRange);
6224 if (!isUInt<1>(Val))
6225 return OutOfRangeError(ValRange);
6226 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
6227 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
6228 IDRange);
6229 } else if (ID == ".amdhsa_float_round_mode_32") {
6231 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, ExprVal,
6232 ValRange);
6233 } else if (ID == ".amdhsa_float_round_mode_16_64") {
6235 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, ExprVal,
6236 ValRange);
6237 } else if (ID == ".amdhsa_float_denorm_mode_32") {
6239 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, ExprVal,
6240 ValRange);
6241 } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
6243 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, ExprVal,
6244 ValRange);
6245 } else if (ID == ".amdhsa_dx10_clamp") {
6246 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6247 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6248 IDRange);
6250 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, ExprVal,
6251 ValRange);
6252 } else if (ID == ".amdhsa_ieee_mode") {
6253 if (!getSTI().hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))
6254 return Error(IDRange.Start, "directive unsupported on gfx1170+",
6255 IDRange);
6257 COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, ExprVal,
6258 ValRange);
6259 } else if (ID == ".amdhsa_fp16_overflow") {
6260 if (IVersion.Major < 9)
6261 return Error(IDRange.Start, "directive requires gfx9+", IDRange);
6263 COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, ExprVal,
6264 ValRange);
6265 } else if (ID == ".amdhsa_tg_split") {
6266 if (!isGFX90A())
6267 return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
6268 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
6269 ExprVal, ValRange);
6270 } else if (ID == ".amdhsa_workgroup_processor_mode") {
6271 if (!supportsWGP(getSTI()))
6272 return Error(IDRange.Start,
6273 "directive unsupported on " + getSTI().getCPU(), IDRange);
6275 COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, ExprVal,
6276 ValRange);
6277 } else if (ID == ".amdhsa_memory_ordered") {
6278 if (IVersion.Major < 10)
6279 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6281 COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, ExprVal,
6282 ValRange);
6283 } else if (ID == ".amdhsa_forward_progress") {
6284 if (IVersion.Major < 10)
6285 return Error(IDRange.Start, "directive requires gfx10+", IDRange);
6287 COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, ExprVal,
6288 ValRange);
6289 } else if (ID == ".amdhsa_shared_vgpr_count") {
6290 EXPR_RESOLVE_OR_ERROR(EvaluatableExpr);
6291 if (IVersion.Major < 10 || IVersion.Major >= 12)
6292 return Error(IDRange.Start, "directive requires gfx10 or gfx11",
6293 IDRange);
6294 SharedVGPRCount = Val;
6296 COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
6297 ValRange);
6298 } else if (ID == ".amdhsa_inst_pref_size") {
6299 if (IVersion.Major < 11)
6300 return Error(IDRange.Start, "directive requires gfx11+", IDRange);
6301 if (IVersion.Major == 11) {
6303 COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
6304 ValRange);
6305 } else {
6307 COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
6308 ValRange);
6309 }
6310 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
6313 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION,
6314 ExprVal, ValRange);
6315 } else if (ID == ".amdhsa_exception_fp_denorm_src") {
6317 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
6318 ExprVal, ValRange);
6319 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
6322 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO,
6323 ExprVal, ValRange);
6324 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
6326 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
6327 ExprVal, ValRange);
6328 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
6330 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
6331 ExprVal, ValRange);
6332 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
6334 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
6335 ExprVal, ValRange);
6336 } else if (ID == ".amdhsa_exception_int_div_zero") {
6338 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
6339 ExprVal, ValRange);
6340 } else if (ID == ".amdhsa_round_robin_scheduling") {
6341 if (IVersion.Major < 12)
6342 return Error(IDRange.Start, "directive requires gfx12+", IDRange);
6344 COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN, ExprVal,
6345 ValRange);
6346 } else {
6347 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
6348 }
6349
6350#undef PARSE_BITS_ENTRY
6351 }
6352
6353 if (!Seen.contains(".amdhsa_next_free_vgpr"))
6354 return TokError(".amdhsa_next_free_vgpr directive is required");
6355
6356 if (!Seen.contains(".amdhsa_next_free_sgpr"))
6357 return TokError(".amdhsa_next_free_sgpr directive is required");
6358
6359 unsigned UserSGPRCount = ExplicitUserSGPRCount.value_or(ImpliedUserSGPRCount);
6360
6361 // Consider the case where the total number of UserSGPRs with trailing
6362 // allocated preload SGPRs, is greater than the number of explicitly
6363 // referenced SGPRs.
6364 if (PreloadLength) {
6365 MCContext &Ctx = getContext();
6366 NextFreeSGPR = AMDGPUMCExpr::createMax(
6367 {NextFreeSGPR, MCConstantExpr::create(UserSGPRCount, Ctx)}, Ctx);
6368 }
6369
6370 const MCExpr *VGPRBlocks;
6371 const MCExpr *SGPRBlocks;
6372 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
6373 getTargetStreamer().getTargetID()->isXnackOnOrAny(),
6374 EnableWavefrontSize32, NextFreeVGPR,
6375 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
6376 SGPRBlocks))
6377 return true;
6378
6379 int64_t EvaluatedVGPRBlocks;
6380 bool VGPRBlocksEvaluatable =
6381 VGPRBlocks->evaluateAsAbsolute(EvaluatedVGPRBlocks);
6382 if (VGPRBlocksEvaluatable &&
6384 static_cast<uint64_t>(EvaluatedVGPRBlocks))) {
6385 return OutOfRangeError(VGPRRange);
6386 }
6388 KD.compute_pgm_rsrc1, VGPRBlocks,
6389 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT,
6390 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, getContext());
6391
6392 int64_t EvaluatedSGPRBlocks;
6393 if (SGPRBlocks->evaluateAsAbsolute(EvaluatedSGPRBlocks) &&
6395 static_cast<uint64_t>(EvaluatedSGPRBlocks)))
6396 return OutOfRangeError(SGPRRange);
6398 KD.compute_pgm_rsrc1, SGPRBlocks,
6399 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT,
6400 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, getContext());
6401
6402 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
6403 return TokError("amdgpu_user_sgpr_count smaller than than implied by "
6404 "enabled user SGPRs");
6405
6406 if (isGFX1250Plus()) {
6408 return TokError("too many user SGPRs enabled");
6411 MCConstantExpr::create(UserSGPRCount, getContext()),
6412 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT_SHIFT,
6413 COMPUTE_PGM_RSRC2_GFX125_USER_SGPR_COUNT, getContext());
6414 } else {
6416 UserSGPRCount))
6417 return TokError("too many user SGPRs enabled");
6420 MCConstantExpr::create(UserSGPRCount, getContext()),
6421 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT_SHIFT,
6422 COMPUTE_PGM_RSRC2_GFX6_GFX120_USER_SGPR_COUNT, getContext());
6423 }
6424
6425 int64_t IVal = 0;
6426 if (!KD.kernarg_size->evaluateAsAbsolute(IVal))
6427 return TokError("Kernarg size should be resolvable");
6428 uint64_t kernarg_size = IVal;
6429 if (PreloadLength && kernarg_size &&
6430 (PreloadLength * 4 + PreloadOffset * 4 > kernarg_size))
6431 return TokError("Kernarg preload length + offset is larger than the "
6432 "kernarg segment size");
6433
6434 if (isGFX90A()) {
6435 if (!Seen.contains(".amdhsa_accum_offset"))
6436 return TokError(".amdhsa_accum_offset directive is required");
6437 int64_t EvaluatedAccum;
6438 bool AccumEvaluatable = AccumOffset->evaluateAsAbsolute(EvaluatedAccum);
6439 uint64_t UEvaluatedAccum = EvaluatedAccum;
6440 if (AccumEvaluatable &&
6441 (UEvaluatedAccum < 4 || UEvaluatedAccum > 256 || (UEvaluatedAccum & 3)))
6442 return TokError("accum_offset should be in range [4..256] in "
6443 "increments of 4");
6444
6445 int64_t EvaluatedNumVGPR;
6446 if (NextFreeVGPR->evaluateAsAbsolute(EvaluatedNumVGPR) &&
6447 AccumEvaluatable &&
6448 UEvaluatedAccum >
6449 alignTo(std::max((uint64_t)1, (uint64_t)EvaluatedNumVGPR), 4))
6450 return TokError("accum_offset exceeds total VGPR allocation");
6451 const MCExpr *AdjustedAccum = MCBinaryExpr::createSub(
6453 AccumOffset, MCConstantExpr::create(4, getContext()), getContext()),
6456 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,
6457 COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
6458 getContext());
6459 }
6460
6461 if (isGFX1250Plus())
6463 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT,
6464 COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,
6465 getContext());
6466
6467 if (IVersion.Major >= 10 && IVersion.Major < 12) {
6468 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
6469 if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
6470 return TokError("shared_vgpr_count directive not valid on "
6471 "wavefront size 32");
6472 }
6473
6474 if (VGPRBlocksEvaluatable &&
6475 (SharedVGPRCount * 2 + static_cast<uint64_t>(EvaluatedVGPRBlocks) >
6476 63)) {
6477 return TokError("shared_vgpr_count*2 + "
6478 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot "
6479 "exceed 63\n");
6480 }
6481 }
6482
6483 getTargetStreamer().EmitAmdhsaKernelDescriptor(getSTI(), KernelName, KD,
6484 NextFreeVGPR, NextFreeSGPR,
6485 ReserveVCC, ReserveFlatScr);
6486 return false;
6487}
6488
6489bool AMDGPUAsmParser::ParseDirectiveAMDHSACodeObjectVersion() {
6490 uint32_t Version;
6491 if (ParseAsAbsoluteExpression(Version))
6492 return true;
6493
6494 getTargetStreamer().EmitDirectiveAMDHSACodeObjectVersion(Version);
6495 return false;
6496}
6497
6498bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
6499 AMDGPUMCKernelCodeT &C) {
6500 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
6501 // assembly for backwards compatibility.
6502 if (ID == "max_scratch_backing_memory_byte_size") {
6503 Parser.eatToEndOfStatement();
6504 return false;
6505 }
6506
6507 SmallString<40> ErrStr;
6508 raw_svector_ostream Err(ErrStr);
6509 if (!C.ParseKernelCodeT(ID, getParser(), Err)) {
6510 return TokError(Err.str());
6511 }
6512 Lex();
6513
6514 if (ID == "enable_wavefront_size32") {
6515 if (C.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
6516 if (!isGFX10Plus())
6517 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
6518 if (!isWave32())
6519 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
6520 } else {
6521 if (!isWave64())
6522 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
6523 }
6524 }
6525
6526 if (ID == "wavefront_size") {
6527 if (C.wavefront_size == 5) {
6528 if (!isGFX10Plus())
6529 return TokError("wavefront_size=5 is only allowed on GFX10+");
6530 if (!isWave32())
6531 return TokError("wavefront_size=5 requires +WavefrontSize32");
6532 } else if (C.wavefront_size == 6) {
6533 if (!isWave64())
6534 return TokError("wavefront_size=6 requires +WavefrontSize64");
6535 }
6536 }
6537
6538 return false;
6539}
6540
6541bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
6542 AMDGPUMCKernelCodeT KernelCode;
6543 KernelCode.initDefault(&getSTI(), getContext());
6544
6545 while (true) {
6546 // Lex EndOfStatement. This is in a while loop, because lexing a comment
6547 // will set the current token to EndOfStatement.
6548 while(trySkipToken(AsmToken::EndOfStatement));
6549
6550 StringRef ID;
6551 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
6552 return true;
6553
6554 if (ID == ".end_amd_kernel_code_t")
6555 break;
6556
6557 if (ParseAMDKernelCodeTValue(ID, KernelCode))
6558 return true;
6559 }
6560
6561 KernelCode.validate(&getSTI(), getContext());
6562 getTargetStreamer().EmitAMDKernelCodeT(KernelCode);
6563
6564 return false;
6565}
6566
6567bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
6568 StringRef KernelName;
6569 if (!parseId(KernelName, "expected symbol name"))
6570 return true;
6571
6572 getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
6574
6575 KernelScope.initialize(getContext());
6576 return false;
6577}
6578
6579bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
6580 if (!getSTI().getTargetTriple().isAMDGCN()) {
6581 return Error(getLoc(),
6582 ".amd_amdgpu_isa directive is not available on non-amdgcn "
6583 "architectures");
6584 }
6585
6586 auto TargetIDDirective = getLexer().getTok().getStringContents();
6587 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
6588 return Error(getParser().getTok().getLoc(), "target id must match options");
6589
6590 getTargetStreamer().EmitISAVersion();
6591 Lex();
6592
6593 return false;
6594}
6595
6596bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
6597 assert(isHsaAbi(getSTI()));
6598
6599 std::string HSAMetadataString;
6600 if (ParseToEndDirective(HSAMD::V3::AssemblerDirectiveBegin,
6601 HSAMD::V3::AssemblerDirectiveEnd, HSAMetadataString))
6602 return true;
6603
6604 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
6605 return Error(getLoc(), "invalid HSA metadata");
6606
6607 return false;
6608}
6609
6610/// Common code to parse out a block of text (typically YAML) between start and
6611/// end directives.
6612bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
6613 const char *AssemblerDirectiveEnd,
6614 std::string &CollectString) {
6615
6616 raw_string_ostream CollectStream(CollectString);
6617
6618 getLexer().setSkipSpace(false);
6619
6620 bool FoundEnd = false;
6621 while (!isToken(AsmToken::Eof)) {
6622 while (isToken(AsmToken::Space)) {
6623 CollectStream << getTokenStr();
6624 Lex();
6625 }
6626
6627 if (trySkipId(AssemblerDirectiveEnd)) {
6628 FoundEnd = true;
6629 break;
6630 }
6631
6632 CollectStream << Parser.parseStringToEndOfStatement()
6633 << getContext().getAsmInfo()->getSeparatorString();
6634
6635 Parser.eatToEndOfStatement();
6636 }
6637
6638 getLexer().setSkipSpace(true);
6639
6640 if (isToken(AsmToken::Eof) && !FoundEnd) {
6641 return TokError(Twine("expected directive ") +
6642 Twine(AssemblerDirectiveEnd) + Twine(" not found"));
6643 }
6644
6645 return false;
6646}
6647
6648/// Parse the assembler directive for new MsgPack-format PAL metadata.
6649bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
6650 std::string String;
6651 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
6653 return true;
6654
6655 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6656 if (!PALMetadata->setFromString(String))
6657 return Error(getLoc(), "invalid PAL metadata");
6658 return false;
6659}
6660
6661/// Parse the assembler directive for old linear-format PAL metadata.
6662bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
6663 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
6664 return Error(getLoc(),
6665 (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
6666 "not available on non-amdpal OSes")).str());
6667 }
6668
6669 auto *PALMetadata = getTargetStreamer().getPALMetadata();
6670 PALMetadata->setLegacy();
6671 for (;;) {
6672 uint32_t Key, Value;
6673 if (ParseAsAbsoluteExpression(Key)) {
6674 return TokError(Twine("invalid value in ") +
6676 }
6677 if (!trySkipToken(AsmToken::Comma)) {
6678 return TokError(Twine("expected an even number of values in ") +
6680 }
6681 if (ParseAsAbsoluteExpression(Value)) {
6682 return TokError(Twine("invalid value in ") +
6684 }
6685 PALMetadata->setRegister(Key, Value);
6686 if (!trySkipToken(AsmToken::Comma))
6687 break;
6688 }
6689 return false;
6690}
6691
6692/// ParseDirectiveAMDGPULDS
6693/// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
6694bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
6695 if (getParser().checkForValidSection())
6696 return true;
6697
6698 StringRef Name;
6699 SMLoc NameLoc = getLoc();
6700 if (getParser().parseIdentifier(Name))
6701 return TokError("expected identifier in directive");
6702
6703 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
6704 if (getParser().parseComma())
6705 return true;
6706
6707 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
6708
6709 int64_t Size;
6710 SMLoc SizeLoc = getLoc();
6711 if (getParser().parseAbsoluteExpression(Size))
6712 return true;
6713 if (Size < 0)
6714 return Error(SizeLoc, "size must be non-negative");
6715 if (Size > LocalMemorySize)
6716 return Error(SizeLoc, "size is too large");
6717
6718 int64_t Alignment = 4;
6719 if (trySkipToken(AsmToken::Comma)) {
6720 SMLoc AlignLoc = getLoc();
6721 if (getParser().parseAbsoluteExpression(Alignment))
6722 return true;
6723 if (Alignment < 0 || !isPowerOf2_64(Alignment))
6724 return Error(AlignLoc, "alignment must be a power of two");
6725
6726 // Alignment larger than the size of LDS is possible in theory, as long
6727 // as the linker manages to place to symbol at address 0, but we do want
6728 // to make sure the alignment fits nicely into a 32-bit integer.
6729 if (Alignment >= 1u << 31)
6730 return Error(AlignLoc, "alignment is too large");
6731 }
6732
6733 if (parseEOL())
6734 return true;
6735
6736 Symbol->redefineIfPossible();
6737 if (!Symbol->isUndefined())
6738 return Error(NameLoc, "invalid symbol redefinition");
6739
6740 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
6741 return false;
6742}
6743
6744bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
6745 StringRef IDVal = DirectiveID.getString();
6746
6747 if (isHsaAbi(getSTI())) {
6748 if (IDVal == ".amdhsa_kernel")
6749 return ParseDirectiveAMDHSAKernel();
6750
6751 if (IDVal == ".amdhsa_code_object_version")
6752 return ParseDirectiveAMDHSACodeObjectVersion();
6753
6754 // TODO: Restructure/combine with PAL metadata directive.
6756 return ParseDirectiveHSAMetadata();
6757 } else {
6758 if (IDVal == ".amd_kernel_code_t")
6759 return ParseDirectiveAMDKernelCodeT();
6760
6761 if (IDVal == ".amdgpu_hsa_kernel")
6762 return ParseDirectiveAMDGPUHsaKernel();
6763
6764 if (IDVal == ".amd_amdgpu_isa")
6765 return ParseDirectiveISAVersion();
6766
6768 return Error(getLoc(), (Twine(HSAMD::AssemblerDirectiveBegin) +
6769 Twine(" directive is "
6770 "not available on non-amdhsa OSes"))
6771 .str());
6772 }
6773 }
6774
6775 if (IDVal == ".amdgcn_target")
6776 return ParseDirectiveAMDGCNTarget();
6777
6778 if (IDVal == ".amdgpu_lds")
6779 return ParseDirectiveAMDGPULDS();
6780
6781 if (IDVal == PALMD::AssemblerDirectiveBegin)
6782 return ParseDirectivePALMetadataBegin();
6783
6784 if (IDVal == PALMD::AssemblerDirective)
6785 return ParseDirectivePALMetadata();
6786
6787 return true;
6788}
6789
6790bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
6791 MCRegister Reg) {
6792 if (MRI.regsOverlap(TTMP12_TTMP13_TTMP14_TTMP15, Reg))
6793 return isGFX9Plus();
6794
6795 // GFX10+ has 2 more SGPRs 104 and 105.
6796 if (MRI.regsOverlap(SGPR104_SGPR105, Reg))
6797 return hasSGPR104_SGPR105();
6798
6799 switch (Reg.id()) {
6800 case SRC_SHARED_BASE_LO:
6801 case SRC_SHARED_BASE:
6802 case SRC_SHARED_LIMIT_LO:
6803 case SRC_SHARED_LIMIT:
6804 case SRC_PRIVATE_BASE_LO:
6805 case SRC_PRIVATE_BASE:
6806 case SRC_PRIVATE_LIMIT_LO:
6807 case SRC_PRIVATE_LIMIT:
6808 return isGFX9Plus();
6809 case SRC_FLAT_SCRATCH_BASE_LO:
6810 case SRC_FLAT_SCRATCH_BASE_HI:
6811 return hasGloballyAddressableScratch();
6812 case SRC_POPS_EXITING_WAVE_ID:
6813 return isGFX9Plus() && !isGFX11Plus();
6814 case TBA:
6815 case TBA_LO:
6816 case TBA_HI:
6817 case TMA:
6818 case TMA_LO:
6819 case TMA_HI:
6820 return !isGFX9Plus();
6821 case XNACK_MASK:
6822 case XNACK_MASK_LO:
6823 case XNACK_MASK_HI:
6824 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
6825 case SGPR_NULL:
6826 return isGFX10Plus();
6827 case SRC_EXECZ:
6828 case SRC_VCCZ:
6829 return !isGFX11Plus();
6830 default:
6831 break;
6832 }
6833
6834 if (isCI())
6835 return true;
6836
6837 if (isSI() || isGFX10Plus()) {
6838 // No flat_scr on SI.
6839 // On GFX10Plus flat scratch is not a valid register operand and can only be
6840 // accessed with s_setreg/s_getreg.
6841 switch (Reg.id()) {
6842 case FLAT_SCR:
6843 case FLAT_SCR_LO:
6844 case FLAT_SCR_HI:
6845 return false;
6846 default:
6847 return true;
6848 }
6849 }
6850
6851 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
6852 // SI/CI have.
6853 if (MRI.regsOverlap(SGPR102_SGPR103, Reg))
6854 return hasSGPR102_SGPR103();
6855
6856 return true;
6857}
6858
6859ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
6860 StringRef Mnemonic,
6861 OperandMode Mode) {
6862 ParseStatus Res = parseVOPD(Operands);
6863 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6864 return Res;
6865
6866 // Try to parse with a custom parser
6867 Res = MatchOperandParserImpl(Operands, Mnemonic);
6868
6869 // If we successfully parsed the operand or if there as an error parsing,
6870 // we are done.
6871 //
6872 // If we are parsing after we reach EndOfStatement then this means we
6873 // are appending default values to the Operands list. This is only done
6874 // by custom parser, so we shouldn't continue on to the generic parsing.
6875 if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
6876 return Res;
6877
6878 SMLoc RBraceLoc;
6879 SMLoc LBraceLoc = getLoc();
6880 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
6881 unsigned Prefix = Operands.size();
6882
6883 for (;;) {
6884 auto Loc = getLoc();
6885 Res = parseReg(Operands);
6886 if (Res.isNoMatch())
6887 Error(Loc, "expected a register");
6888 if (!Res.isSuccess())
6889 return ParseStatus::Failure;
6890
6891 RBraceLoc = getLoc();
6892 if (trySkipToken(AsmToken::RBrac))
6893 break;
6894
6895 if (!skipToken(AsmToken::Comma,
6896 "expected a comma or a closing square bracket"))
6897 return ParseStatus::Failure;
6898 }
6899
6900 if (Operands.size() - Prefix > 1) {
6901 Operands.insert(Operands.begin() + Prefix,
6902 AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
6903 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
6904 }
6905
6906 return ParseStatus::Success;
6907 }
6908
6909 return parseRegOrImm(Operands);
6910}
6911
6912StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
6913 // Clear any forced encodings from the previous instruction.
6914 setForcedEncodingSize(0);
6915 setForcedDPP(false);
6916 setForcedSDWA(false);
6917
6918 if (Name.consume_back("_e64_dpp")) {
6919 setForcedDPP(true);
6920 setForcedEncodingSize(64);
6921 return Name;
6922 }
6923 if (Name.consume_back("_e64")) {
6924 setForcedEncodingSize(64);
6925 return Name;
6926 }
6927 if (Name.consume_back("_e32")) {
6928 setForcedEncodingSize(32);
6929 return Name;
6930 }
6931 if (Name.consume_back("_dpp")) {
6932 setForcedDPP(true);
6933 return Name;
6934 }
6935 if (Name.consume_back("_sdwa")) {
6936 setForcedSDWA(true);
6937 return Name;
6938 }
6939 return Name;
6940}
6941
6942static void applyMnemonicAliases(StringRef &Mnemonic,
6943 const FeatureBitset &Features,
6944 unsigned VariantID);
6945
6946bool AMDGPUAsmParser::parseInstruction(ParseInstructionInfo &Info,
6947 StringRef Name, SMLoc NameLoc,
6948 OperandVector &Operands) {
6949 // Add the instruction mnemonic
6950 Name = parseMnemonicSuffix(Name);
6951
6952 // If the target architecture uses MnemonicAlias, call it here to parse
6953 // operands correctly.
6954 applyMnemonicAliases(Name, getAvailableFeatures(), 0);
6955
6956 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
6957
6958 bool IsMIMG = Name.starts_with("image_");
6959
6960 while (!trySkipToken(AsmToken::EndOfStatement)) {
6961 OperandMode Mode = OperandMode_Default;
6962 if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
6963 Mode = OperandMode_NSA;
6964 ParseStatus Res = parseOperand(Operands, Name, Mode);
6965
6966 if (!Res.isSuccess()) {
6967 checkUnsupportedInstruction(Name, NameLoc);
6968 if (!Parser.hasPendingError()) {
6969 // FIXME: use real operand location rather than the current location.
6970 StringRef Msg = Res.isFailure() ? "failed parsing operand."
6971 : "not a valid operand.";
6972 Error(getLoc(), Msg);
6973 }
6974 while (!trySkipToken(AsmToken::EndOfStatement)) {
6975 lex();
6976 }
6977 return true;
6978 }
6979
6980 // Eat the comma or space if there is one.
6981 trySkipToken(AsmToken::Comma);
6982 }
6983
6984 return false;
6985}
6986
6987//===----------------------------------------------------------------------===//
6988// Utility functions
6989//===----------------------------------------------------------------------===//
6990
6991ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
6992 OperandVector &Operands) {
6993 SMLoc S = getLoc();
6994 if (!trySkipId(Name))
6995 return ParseStatus::NoMatch;
6996
6997 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
6998 return ParseStatus::Success;
6999}
7000
7001ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
7002 int64_t &IntVal) {
7003
7004 if (!trySkipId(Prefix, AsmToken::Colon))
7005 return ParseStatus::NoMatch;
7006
7008}
7009
7010ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
7011 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7012 std::function<bool(int64_t &)> ConvertResult) {
7013 SMLoc S = getLoc();
7014 int64_t Value = 0;
7015
7016 ParseStatus Res = parseIntWithPrefix(Prefix, Value);
7017 if (!Res.isSuccess())
7018 return Res;
7019
7020 if (ConvertResult && !ConvertResult(Value)) {
7021 Error(S, "invalid " + StringRef(Prefix) + " value.");
7022 }
7023
7024 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
7025 return ParseStatus::Success;
7026}
7027
7028ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
7029 const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
7030 bool (*ConvertResult)(int64_t &)) {
7031 SMLoc S = getLoc();
7032 if (!trySkipId(Prefix, AsmToken::Colon))
7033 return ParseStatus::NoMatch;
7034
7035 if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
7036 return ParseStatus::Failure;
7037
7038 unsigned Val = 0;
7039 const unsigned MaxSize = 4;
7040
7041 // FIXME: How to verify the number of elements matches the number of src
7042 // operands?
7043 for (int I = 0; ; ++I) {
7044 int64_t Op;
7045 SMLoc Loc = getLoc();
7046 if (!parseExpr(Op))
7047 return ParseStatus::Failure;
7048
7049 if (Op != 0 && Op != 1)
7050 return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
7051
7052 Val |= (Op << I);
7053
7054 if (trySkipToken(AsmToken::RBrac))
7055 break;
7056
7057 if (I + 1 == MaxSize)
7058 return Error(getLoc(), "expected a closing square bracket");
7059
7060 if (!skipToken(AsmToken::Comma, "expected a comma"))
7061 return ParseStatus::Failure;
7062 }
7063
7064 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
7065 return ParseStatus::Success;
7066}
7067
7068ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
7069 OperandVector &Operands,
7070 AMDGPUOperand::ImmTy ImmTy,
7071 bool IgnoreNegative) {
7072 int64_t Bit;
7073 SMLoc S = getLoc();
7074
7075 if (trySkipId(Name)) {
7076 Bit = 1;
7077 } else if (trySkipId("no", Name)) {
7078 if (IgnoreNegative)
7079 return ParseStatus::Success;
7080 Bit = 0;
7081 } else {
7082 return ParseStatus::NoMatch;
7083 }
7084
7085 if (Name == "r128" && !hasMIMG_R128())
7086 return Error(S, "r128 modifier is not supported on this GPU");
7087 if (Name == "a16" && !hasA16())
7088 return Error(S, "a16 modifier is not supported on this GPU");
7089
7090 if (Bit == 0 && Name == "gds") {
7091 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7092 if (Mnemo.starts_with("ds_gws"))
7093 return Error(S, "nogds is not allowed");
7094 }
7095
7096 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
7097 ImmTy = AMDGPUOperand::ImmTyR128A16;
7098
7099 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
7100 return ParseStatus::Success;
7101}
7102
7103unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
7104 bool &Disabling) const {
7105 Disabling = Id.consume_front("no");
7106
7107 if (isGFX940() && !Mnemo.starts_with("s_")) {
7108 return StringSwitch<unsigned>(Id)
7109 .Case("nt", AMDGPU::CPol::NT)
7110 .Case("sc0", AMDGPU::CPol::SC0)
7111 .Case("sc1", AMDGPU::CPol::SC1)
7112 .Default(0);
7113 }
7114
7115 return StringSwitch<unsigned>(Id)
7116 .Case("dlc", AMDGPU::CPol::DLC)
7117 .Case("glc", AMDGPU::CPol::GLC)
7118 .Case("scc", AMDGPU::CPol::SCC)
7119 .Case("slc", AMDGPU::CPol::SLC)
7120 .Default(0);
7121}
7122
7123ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
7124 if (isGFX12Plus()) {
7125 SMLoc StringLoc = getLoc();
7126
7127 int64_t CPolVal = 0;
7128 ParseStatus ResTH = ParseStatus::NoMatch;
7129 ParseStatus ResScope = ParseStatus::NoMatch;
7130 ParseStatus ResNV = ParseStatus::NoMatch;
7131 ParseStatus ResScal = ParseStatus::NoMatch;
7132
7133 for (;;) {
7134 if (ResTH.isNoMatch()) {
7135 int64_t TH;
7136 ResTH = parseTH(Operands, TH);
7137 if (ResTH.isFailure())
7138 return ResTH;
7139 if (ResTH.isSuccess()) {
7140 CPolVal |= TH;
7141 continue;
7142 }
7143 }
7144
7145 if (ResScope.isNoMatch()) {
7146 int64_t Scope;
7147 ResScope = parseScope(Operands, Scope);
7148 if (ResScope.isFailure())
7149 return ResScope;
7150 if (ResScope.isSuccess()) {
7151 CPolVal |= Scope;
7152 continue;
7153 }
7154 }
7155
7156 // NV bit exists on GFX12+, but does something starting from GFX1250.
7157 // Allow parsing on all GFX12 and fail on validation for better
7158 // diagnostics.
7159 if (ResNV.isNoMatch()) {
7160 if (trySkipId("nv")) {
7161 ResNV = ParseStatus::Success;
7162 CPolVal |= CPol::NV;
7163 continue;
7164 } else if (trySkipId("no", "nv")) {
7165 ResNV = ParseStatus::Success;
7166 continue;
7167 }
7168 }
7169
7170 if (ResScal.isNoMatch()) {
7171 if (trySkipId("scale_offset")) {
7172 ResScal = ParseStatus::Success;
7173 CPolVal |= CPol::SCAL;
7174 continue;
7175 } else if (trySkipId("no", "scale_offset")) {
7176 ResScal = ParseStatus::Success;
7177 continue;
7178 }
7179 }
7180
7181 break;
7182 }
7183
7184 if (ResTH.isNoMatch() && ResScope.isNoMatch() && ResNV.isNoMatch() &&
7185 ResScal.isNoMatch())
7186 return ParseStatus::NoMatch;
7187
7188 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
7189 AMDGPUOperand::ImmTyCPol));
7190 return ParseStatus::Success;
7191 }
7192
7193 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
7194 SMLoc OpLoc = getLoc();
7195 unsigned Enabled = 0, Seen = 0;
7196 for (;;) {
7197 SMLoc S = getLoc();
7198 bool Disabling;
7199 unsigned CPol = getCPolKind(getId(), Mnemo, Disabling);
7200 if (!CPol)
7201 break;
7202
7203 lex();
7204
7205 if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
7206 return Error(S, "dlc modifier is not supported on this GPU");
7207
7208 if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
7209 return Error(S, "scc modifier is not supported on this GPU");
7210
7211 if (Seen & CPol)
7212 return Error(S, "duplicate cache policy modifier");
7213
7214 if (!Disabling)
7215 Enabled |= CPol;
7216
7217 Seen |= CPol;
7218 }
7219
7220 if (!Seen)
7221 return ParseStatus::NoMatch;
7222
7223 Operands.push_back(
7224 AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
7225 return ParseStatus::Success;
7226}
7227
7228ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
7229 int64_t &Scope) {
7230 static const unsigned Scopes[] = {CPol::SCOPE_CU, CPol::SCOPE_SE,
7232
7233 ParseStatus Res = parseStringOrIntWithPrefix(
7234 Operands, "scope", {"SCOPE_CU", "SCOPE_SE", "SCOPE_DEV", "SCOPE_SYS"},
7235 Scope);
7236
7237 if (Res.isSuccess())
7238 Scope = Scopes[Scope];
7239
7240 return Res;
7241}
7242
7243ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
7244 TH = AMDGPU::CPol::TH_RT; // default
7245
7246 StringRef Value;
7247 SMLoc StringLoc;
7248 ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
7249 if (!Res.isSuccess())
7250 return Res;
7251
7252 if (Value == "TH_DEFAULT")
7254 else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_WB" ||
7255 Value == "TH_LOAD_NT_WB") {
7256 return Error(StringLoc, "invalid th value");
7257 } else if (Value.consume_front("TH_ATOMIC_")) {
7259 } else if (Value.consume_front("TH_LOAD_")) {
7261 } else if (Value.consume_front("TH_STORE_")) {
7263 } else {
7264 return Error(StringLoc, "invalid th value");
7265 }
7266
7267 if (Value == "BYPASS")
7269
7270 if (TH != 0) {
7272 TH |= StringSwitch<int64_t>(Value)
7273 .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7274 .Case("RT", AMDGPU::CPol::TH_RT)
7275 .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
7276 .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
7277 .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
7279 .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
7280 .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
7282 .Default(0xffffffff);
7283 else
7284 TH |= StringSwitch<int64_t>(Value)
7285 .Case("RT", AMDGPU::CPol::TH_RT)
7286 .Case("NT", AMDGPU::CPol::TH_NT)
7287 .Case("HT", AMDGPU::CPol::TH_HT)
7288 .Case("LU", AMDGPU::CPol::TH_LU)
7289 .Case("WB", AMDGPU::CPol::TH_WB)
7290 .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
7291 .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
7292 .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
7293 .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
7294 .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
7295 .Default(0xffffffff);
7296 }
7297
7298 if (TH == 0xffffffff)
7299 return Error(StringLoc, "invalid th value");
7300
7301 return ParseStatus::Success;
7302}
7303
7304static void
7306 AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx,
7307 AMDGPUOperand::ImmTy ImmT, int64_t Default = 0,
7308 std::optional<unsigned> InsertAt = std::nullopt) {
7309 auto i = OptionalIdx.find(ImmT);
7310 if (i != OptionalIdx.end()) {
7311 unsigned Idx = i->second;
7312 const AMDGPUOperand &Op =
7313 static_cast<const AMDGPUOperand &>(*Operands[Idx]);
7314 if (InsertAt)
7315 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Op.getImm()));
7316 else
7317 Op.addImmOperands(Inst, 1);
7318 } else {
7319 if (InsertAt.has_value())
7320 Inst.insert(Inst.begin() + *InsertAt, MCOperand::createImm(Default));
7321 else
7323 }
7324}
7325
7326ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
7327 StringRef &Value,
7328 SMLoc &StringLoc) {
7329 if (!trySkipId(Prefix, AsmToken::Colon))
7330 return ParseStatus::NoMatch;
7331
7332 StringLoc = getLoc();
7333 return parseId(Value, "expected an identifier") ? ParseStatus::Success
7335}
7336
7337ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7338 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7339 int64_t &IntVal) {
7340 if (!trySkipId(Name, AsmToken::Colon))
7341 return ParseStatus::NoMatch;
7342
7343 SMLoc StringLoc = getLoc();
7344
7345 StringRef Value;
7346 if (isToken(AsmToken::Identifier)) {
7347 Value = getTokenStr();
7348 lex();
7349
7350 for (IntVal = 0; IntVal < (int64_t)Ids.size(); ++IntVal)
7351 if (Value == Ids[IntVal])
7352 break;
7353 } else if (!parseExpr(IntVal))
7354 return ParseStatus::Failure;
7355
7356 if (IntVal < 0 || IntVal >= (int64_t)Ids.size())
7357 return Error(StringLoc, "invalid " + Twine(Name) + " value");
7358
7359 return ParseStatus::Success;
7360}
7361
7362ParseStatus AMDGPUAsmParser::parseStringOrIntWithPrefix(
7363 OperandVector &Operands, StringRef Name, ArrayRef<const char *> Ids,
7364 AMDGPUOperand::ImmTy Type) {
7365 SMLoc S = getLoc();
7366 int64_t IntVal;
7367
7368 ParseStatus Res = parseStringOrIntWithPrefix(Operands, Name, Ids, IntVal);
7369 if (Res.isSuccess())
7370 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S, Type));
7371
7372 return Res;
7373}
7374
7375//===----------------------------------------------------------------------===//
7376// MTBUF format
7377//===----------------------------------------------------------------------===//
7378
7379bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
7380 int64_t MaxVal,
7381 int64_t &Fmt) {
7382 int64_t Val;
7383 SMLoc Loc = getLoc();
7384
7385 auto Res = parseIntWithPrefix(Pref, Val);
7386 if (Res.isFailure())
7387 return false;
7388 if (Res.isNoMatch())
7389 return true;
7390
7391 if (Val < 0 || Val > MaxVal) {
7392 Error(Loc, Twine("out of range ", StringRef(Pref)));
7393 return false;
7394 }
7395
7396 Fmt = Val;
7397 return true;
7398}
7399
7400ParseStatus AMDGPUAsmParser::tryParseIndexKey(OperandVector &Operands,
7401 AMDGPUOperand::ImmTy ImmTy) {
7402 const char *Pref = "index_key";
7403 int64_t ImmVal = 0;
7404 SMLoc Loc = getLoc();
7405 auto Res = parseIntWithPrefix(Pref, ImmVal);
7406 if (!Res.isSuccess())
7407 return Res;
7408
7409 if ((ImmTy == AMDGPUOperand::ImmTyIndexKey16bit ||
7410 ImmTy == AMDGPUOperand::ImmTyIndexKey32bit) &&
7411 (ImmVal < 0 || ImmVal > 1))
7412 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7413
7414 if (ImmTy == AMDGPUOperand::ImmTyIndexKey8bit && (ImmVal < 0 || ImmVal > 3))
7415 return Error(Loc, Twine("out of range ", StringRef(Pref)));
7416
7417 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, ImmTy));
7418 return ParseStatus::Success;
7419}
7420
7421ParseStatus AMDGPUAsmParser::parseIndexKey8bit(OperandVector &Operands) {
7422 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey8bit);
7423}
7424
7425ParseStatus AMDGPUAsmParser::parseIndexKey16bit(OperandVector &Operands) {
7426 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey16bit);
7427}
7428
7429ParseStatus AMDGPUAsmParser::parseIndexKey32bit(OperandVector &Operands) {
7430 return tryParseIndexKey(Operands, AMDGPUOperand::ImmTyIndexKey32bit);
7431}
7432
7433ParseStatus AMDGPUAsmParser::tryParseMatrixFMT(OperandVector &Operands,
7434 StringRef Name,
7435 AMDGPUOperand::ImmTy Type) {
7436 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixFmt,
7437 Type);
7438}
7439
7440ParseStatus AMDGPUAsmParser::parseMatrixAFMT(OperandVector &Operands) {
7441 return tryParseMatrixFMT(Operands, "matrix_a_fmt",
7442 AMDGPUOperand::ImmTyMatrixAFMT);
7443}
7444
7445ParseStatus AMDGPUAsmParser::parseMatrixBFMT(OperandVector &Operands) {
7446 return tryParseMatrixFMT(Operands, "matrix_b_fmt",
7447 AMDGPUOperand::ImmTyMatrixBFMT);
7448}
7449
7450ParseStatus AMDGPUAsmParser::tryParseMatrixScale(OperandVector &Operands,
7451 StringRef Name,
7452 AMDGPUOperand::ImmTy Type) {
7453 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScale,
7454 Type);
7455}
7456
7457ParseStatus AMDGPUAsmParser::parseMatrixAScale(OperandVector &Operands) {
7458 return tryParseMatrixScale(Operands, "matrix_a_scale",
7459 AMDGPUOperand::ImmTyMatrixAScale);
7460}
7461
7462ParseStatus AMDGPUAsmParser::parseMatrixBScale(OperandVector &Operands) {
7463 return tryParseMatrixScale(Operands, "matrix_b_scale",
7464 AMDGPUOperand::ImmTyMatrixBScale);
7465}
7466
7467ParseStatus AMDGPUAsmParser::tryParseMatrixScaleFmt(OperandVector &Operands,
7468 StringRef Name,
7469 AMDGPUOperand::ImmTy Type) {
7470 return parseStringOrIntWithPrefix(Operands, Name, WMMAMods::ModMatrixScaleFmt,
7471 Type);
7472}
7473
7474ParseStatus AMDGPUAsmParser::parseMatrixAScaleFmt(OperandVector &Operands) {
7475 return tryParseMatrixScaleFmt(Operands, "matrix_a_scale_fmt",
7476 AMDGPUOperand::ImmTyMatrixAScaleFmt);
7477}
7478
7479ParseStatus AMDGPUAsmParser::parseMatrixBScaleFmt(OperandVector &Operands) {
7480 return tryParseMatrixScaleFmt(Operands, "matrix_b_scale_fmt",
7481 AMDGPUOperand::ImmTyMatrixBScaleFmt);
7482}
7483
7484// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
7485// values to live in a joint format operand in the MCInst encoding.
7486ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
7487 using namespace llvm::AMDGPU::MTBUFFormat;
7488
7489 int64_t Dfmt = DFMT_UNDEF;
7490 int64_t Nfmt = NFMT_UNDEF;
7491
7492 // dfmt and nfmt can appear in either order, and each is optional.
7493 for (int I = 0; I < 2; ++I) {
7494 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
7495 return ParseStatus::Failure;
7496
7497 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
7498 return ParseStatus::Failure;
7499
7500 // Skip optional comma between dfmt/nfmt
7501 // but guard against 2 commas following each other.
7502 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
7503 !peekToken().is(AsmToken::Comma)) {
7504 trySkipToken(AsmToken::Comma);
7505 }
7506 }
7507
7508 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
7509 return ParseStatus::NoMatch;
7510
7511 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7512 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7513
7514 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7515 return ParseStatus::Success;
7516}
7517
7518ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
7519 using namespace llvm::AMDGPU::MTBUFFormat;
7520
7521 int64_t Fmt = UFMT_UNDEF;
7522
7523 if (!tryParseFmt("format", UFMT_MAX, Fmt))
7524 return ParseStatus::Failure;
7525
7526 if (Fmt == UFMT_UNDEF)
7527 return ParseStatus::NoMatch;
7528
7529 Format = Fmt;
7530 return ParseStatus::Success;
7531}
7532
7533bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
7534 int64_t &Nfmt,
7535 StringRef FormatStr,
7536 SMLoc Loc) {
7537 using namespace llvm::AMDGPU::MTBUFFormat;
7538 int64_t Format;
7539
7540 Format = getDfmt(FormatStr);
7541 if (Format != DFMT_UNDEF) {
7542 Dfmt = Format;
7543 return true;
7544 }
7545
7546 Format = getNfmt(FormatStr, getSTI());
7547 if (Format != NFMT_UNDEF) {
7548 Nfmt = Format;
7549 return true;
7550 }
7551
7552 Error(Loc, "unsupported format");
7553 return false;
7554}
7555
7556ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
7557 SMLoc FormatLoc,
7558 int64_t &Format) {
7559 using namespace llvm::AMDGPU::MTBUFFormat;
7560
7561 int64_t Dfmt = DFMT_UNDEF;
7562 int64_t Nfmt = NFMT_UNDEF;
7563 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
7564 return ParseStatus::Failure;
7565
7566 if (trySkipToken(AsmToken::Comma)) {
7567 StringRef Str;
7568 SMLoc Loc = getLoc();
7569 if (!parseId(Str, "expected a format string") ||
7570 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
7571 return ParseStatus::Failure;
7572 if (Dfmt == DFMT_UNDEF)
7573 return Error(Loc, "duplicate numeric format");
7574 if (Nfmt == NFMT_UNDEF)
7575 return Error(Loc, "duplicate data format");
7576 }
7577
7578 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
7579 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
7580
7581 if (isGFX10Plus()) {
7582 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
7583 if (Ufmt == UFMT_UNDEF)
7584 return Error(FormatLoc, "unsupported format");
7585 Format = Ufmt;
7586 } else {
7587 Format = encodeDfmtNfmt(Dfmt, Nfmt);
7588 }
7589
7590 return ParseStatus::Success;
7591}
7592
7593ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
7594 SMLoc Loc,
7595 int64_t &Format) {
7596 using namespace llvm::AMDGPU::MTBUFFormat;
7597
7598 auto Id = getUnifiedFormat(FormatStr, getSTI());
7599 if (Id == UFMT_UNDEF)
7600 return ParseStatus::NoMatch;
7601
7602 if (!isGFX10Plus())
7603 return Error(Loc, "unified format is not supported on this GPU");
7604
7605 Format = Id;
7606 return ParseStatus::Success;
7607}
7608
7609ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
7610 using namespace llvm::AMDGPU::MTBUFFormat;
7611 SMLoc Loc = getLoc();
7612
7613 if (!parseExpr(Format))
7614 return ParseStatus::Failure;
7615 if (!isValidFormatEncoding(Format, getSTI()))
7616 return Error(Loc, "out of range format");
7617
7618 return ParseStatus::Success;
7619}
7620
7621ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
7622 using namespace llvm::AMDGPU::MTBUFFormat;
7623
7624 if (!trySkipId("format", AsmToken::Colon))
7625 return ParseStatus::NoMatch;
7626
7627 if (trySkipToken(AsmToken::LBrac)) {
7628 StringRef FormatStr;
7629 SMLoc Loc = getLoc();
7630 if (!parseId(FormatStr, "expected a format string"))
7631 return ParseStatus::Failure;
7632
7633 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
7634 if (Res.isNoMatch())
7635 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
7636 if (!Res.isSuccess())
7637 return Res;
7638
7639 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
7640 return ParseStatus::Failure;
7641
7642 return ParseStatus::Success;
7643 }
7644
7645 return parseNumericFormat(Format);
7646}
7647
7648ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
7649 using namespace llvm::AMDGPU::MTBUFFormat;
7650
7651 int64_t Format = getDefaultFormatEncoding(getSTI());
7652 ParseStatus Res;
7653 SMLoc Loc = getLoc();
7654
7655 // Parse legacy format syntax.
7656 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
7657 if (Res.isFailure())
7658 return Res;
7659
7660 bool FormatFound = Res.isSuccess();
7661
7662 Operands.push_back(
7663 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
7664
7665 if (FormatFound)
7666 trySkipToken(AsmToken::Comma);
7667
7668 if (isToken(AsmToken::EndOfStatement)) {
7669 // We are expecting an soffset operand,
7670 // but let matcher handle the error.
7671 return ParseStatus::Success;
7672 }
7673
7674 // Parse soffset.
7675 Res = parseRegOrImm(Operands);
7676 if (!Res.isSuccess())
7677 return Res;
7678
7679 trySkipToken(AsmToken::Comma);
7680
7681 if (!FormatFound) {
7682 Res = parseSymbolicOrNumericFormat(Format);
7683 if (Res.isFailure())
7684 return Res;
7685 if (Res.isSuccess()) {
7686 auto Size = Operands.size();
7687 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
7688 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
7689 Op.setImm(Format);
7690 }
7691 return ParseStatus::Success;
7692 }
7693
7694 if (isId("format") && peekToken().is(AsmToken::Colon))
7695 return Error(getLoc(), "duplicate format");
7696 return ParseStatus::Success;
7697}
7698
7699ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
7700 ParseStatus Res =
7701 parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
7702 if (Res.isNoMatch()) {
7703 Res = parseIntWithPrefix("inst_offset", Operands,
7704 AMDGPUOperand::ImmTyInstOffset);
7705 }
7706 return Res;
7707}
7708
7709ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
7710 ParseStatus Res =
7711 parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
7712 if (Res.isNoMatch())
7713 Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
7714 return Res;
7715}
7716
7717ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
7718 ParseStatus Res =
7719 parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
7720 if (Res.isNoMatch()) {
7721 Res =
7722 parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
7723 }
7724 return Res;
7725}
7726
7727//===----------------------------------------------------------------------===//
7728// Exp
7729//===----------------------------------------------------------------------===//
7730
7731void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
7732 OptionalImmIndexMap OptionalIdx;
7733
7734 unsigned OperandIdx[4];
7735 unsigned EnMask = 0;
7736 int SrcIdx = 0;
7737
7738 for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
7739 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
7740
7741 // Add the register arguments
7742 if (Op.isReg()) {
7743 assert(SrcIdx < 4);
7744 OperandIdx[SrcIdx] = Inst.size();
7745 Op.addRegOperands(Inst, 1);
7746 ++SrcIdx;
7747 continue;
7748 }
7749
7750 if (Op.isOff()) {
7751 assert(SrcIdx < 4);
7752 OperandIdx[SrcIdx] = Inst.size();
7753 Inst.addOperand(MCOperand::createReg(MCRegister()));
7754 ++SrcIdx;
7755 continue;
7756 }
7757
7758 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
7759 Op.addImmOperands(Inst, 1);
7760 continue;
7761 }
7762
7763 if (Op.isToken() && (Op.getToken() == "done" || Op.getToken() == "row_en"))
7764 continue;
7765
7766 // Handle optional arguments
7767 OptionalIdx[Op.getImmTy()] = i;
7768 }
7769
7770 assert(SrcIdx == 4);
7771
7772 bool Compr = false;
7773 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
7774 Compr = true;
7775 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
7776 Inst.getOperand(OperandIdx[2]).setReg(MCRegister());
7777 Inst.getOperand(OperandIdx[3]).setReg(MCRegister());
7778 }
7779
7780 for (auto i = 0; i < SrcIdx; ++i) {
7781 if (Inst.getOperand(OperandIdx[i]).getReg()) {
7782 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
7783 }
7784 }
7785
7786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
7787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
7788
7789 Inst.addOperand(MCOperand::createImm(EnMask));
7790}
7791
7792//===----------------------------------------------------------------------===//
7793// s_waitcnt
7794//===----------------------------------------------------------------------===//
7795
7796static bool
7798 const AMDGPU::IsaVersion ISA,
7799 int64_t &IntVal,
7800 int64_t CntVal,
7801 bool Saturate,
7802 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
7803 unsigned (*decode)(const IsaVersion &Version, unsigned))
7804{
7805 bool Failed = false;
7806
7807 IntVal = encode(ISA, IntVal, CntVal);
7808 if (CntVal != decode(ISA, IntVal)) {
7809 if (Saturate) {
7810 IntVal = encode(ISA, IntVal, -1);
7811 } else {
7812 Failed = true;
7813 }
7814 }
7815 return Failed;
7816}
7817
7818bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
7819
7820 SMLoc CntLoc = getLoc();
7821 StringRef CntName = getTokenStr();
7822
7823 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
7824 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7825 return false;
7826
7827 int64_t CntVal;
7828 SMLoc ValLoc = getLoc();
7829 if (!parseExpr(CntVal))
7830 return false;
7831
7832 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7833
7834 bool Failed = true;
7835 bool Sat = CntName.ends_with("_sat");
7836
7837 if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
7838 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
7839 } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
7840 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
7841 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
7842 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
7843 } else {
7844 Error(CntLoc, "invalid counter name " + CntName);
7845 return false;
7846 }
7847
7848 if (Failed) {
7849 Error(ValLoc, "too large value for " + CntName);
7850 return false;
7851 }
7852
7853 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
7854 return false;
7855
7856 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
7857 if (isToken(AsmToken::EndOfStatement)) {
7858 Error(getLoc(), "expected a counter name");
7859 return false;
7860 }
7861 }
7862
7863 return true;
7864}
7865
7866ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
7867 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
7868 int64_t Waitcnt = getWaitcntBitMask(ISA);
7869 SMLoc S = getLoc();
7870
7871 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7872 while (!isToken(AsmToken::EndOfStatement)) {
7873 if (!parseCnt(Waitcnt))
7874 return ParseStatus::Failure;
7875 }
7876 } else {
7877 if (!parseExpr(Waitcnt))
7878 return ParseStatus::Failure;
7879 }
7880
7881 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
7882 return ParseStatus::Success;
7883}
7884
7885bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
7886 SMLoc FieldLoc = getLoc();
7887 StringRef FieldName = getTokenStr();
7888 if (!skipToken(AsmToken::Identifier, "expected a field name") ||
7889 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
7890 return false;
7891
7892 SMLoc ValueLoc = getLoc();
7893 StringRef ValueName = getTokenStr();
7894 if (!skipToken(AsmToken::Identifier, "expected a value name") ||
7895 !skipToken(AsmToken::RParen, "expected a right parenthesis"))
7896 return false;
7897
7898 unsigned Shift;
7899 if (FieldName == "instid0") {
7900 Shift = 0;
7901 } else if (FieldName == "instskip") {
7902 Shift = 4;
7903 } else if (FieldName == "instid1") {
7904 Shift = 7;
7905 } else {
7906 Error(FieldLoc, "invalid field name " + FieldName);
7907 return false;
7908 }
7909
7910 int Value;
7911 if (Shift == 4) {
7912 // Parse values for instskip.
7913 Value = StringSwitch<int>(ValueName)
7914 .Case("SAME", 0)
7915 .Case("NEXT", 1)
7916 .Case("SKIP_1", 2)
7917 .Case("SKIP_2", 3)
7918 .Case("SKIP_3", 4)
7919 .Case("SKIP_4", 5)
7920 .Default(-1);
7921 } else {
7922 // Parse values for instid0 and instid1.
7923 Value = StringSwitch<int>(ValueName)
7924 .Case("NO_DEP", 0)
7925 .Case("VALU_DEP_1", 1)
7926 .Case("VALU_DEP_2", 2)
7927 .Case("VALU_DEP_3", 3)
7928 .Case("VALU_DEP_4", 4)
7929 .Case("TRANS32_DEP_1", 5)
7930 .Case("TRANS32_DEP_2", 6)
7931 .Case("TRANS32_DEP_3", 7)
7932 .Case("FMA_ACCUM_CYCLE_1", 8)
7933 .Case("SALU_CYCLE_1", 9)
7934 .Case("SALU_CYCLE_2", 10)
7935 .Case("SALU_CYCLE_3", 11)
7936 .Default(-1);
7937 }
7938 if (Value < 0) {
7939 Error(ValueLoc, "invalid value name " + ValueName);
7940 return false;
7941 }
7942
7943 Delay |= Value << Shift;
7944 return true;
7945}
7946
7947ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
7948 int64_t Delay = 0;
7949 SMLoc S = getLoc();
7950
7951 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
7952 do {
7953 if (!parseDelay(Delay))
7954 return ParseStatus::Failure;
7955 } while (trySkipToken(AsmToken::Pipe));
7956 } else {
7957 if (!parseExpr(Delay))
7958 return ParseStatus::Failure;
7959 }
7960
7961 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
7962 return ParseStatus::Success;
7963}
7964
7965bool
7966AMDGPUOperand::isSWaitCnt() const {
7967 return isImm();
7968}
7969
7970bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
7971
7972//===----------------------------------------------------------------------===//
7973// DepCtr
7974//===----------------------------------------------------------------------===//
7975
7976void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId,
7977 StringRef DepCtrName) {
7978 switch (ErrorId) {
7979 case OPR_ID_UNKNOWN:
7980 Error(Loc, Twine("invalid counter name ", DepCtrName));
7981 return;
7982 case OPR_ID_UNSUPPORTED:
7983 Error(Loc, Twine(DepCtrName, " is not supported on this GPU"));
7984 return;
7985 case OPR_ID_DUPLICATE:
7986 Error(Loc, Twine("duplicate counter name ", DepCtrName));
7987 return;
7988 case OPR_VAL_INVALID:
7989 Error(Loc, Twine("invalid value for ", DepCtrName));
7990 return;
7991 default:
7992 assert(false);
7993 }
7994}
7995
7996bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
7997
7998 using namespace llvm::AMDGPU::DepCtr;
7999
8000 SMLoc DepCtrLoc = getLoc();
8001 StringRef DepCtrName = getTokenStr();
8002
8003 if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
8004 !skipToken(AsmToken::LParen, "expected a left parenthesis"))
8005 return false;
8006
8007 int64_t ExprVal;
8008 if (!parseExpr(ExprVal))
8009 return false;
8010
8011 unsigned PrevOprMask = UsedOprMask;
8012 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI());
8013
8014 if (CntVal < 0) {
8015 depCtrError(DepCtrLoc, CntVal, DepCtrName);
8016 return false;
8017 }
8018
8019 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8020 return false;
8021
8022 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
8023 if (isToken(AsmToken::EndOfStatement)) {
8024 Error(getLoc(), "expected a counter name");
8025 return false;
8026 }
8027 }
8028
8029 unsigned CntValMask = PrevOprMask ^ UsedOprMask;
8030 DepCtr = (DepCtr & ~CntValMask) | CntVal;
8031 return true;
8032}
8033
8034ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
8035 using namespace llvm::AMDGPU::DepCtr;
8036
8037 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
8038 SMLoc Loc = getLoc();
8039
8040 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
8041 unsigned UsedOprMask = 0;
8042 while (!isToken(AsmToken::EndOfStatement)) {
8043 if (!parseDepCtr(DepCtr, UsedOprMask))
8044 return ParseStatus::Failure;
8045 }
8046 } else {
8047 if (!parseExpr(DepCtr))
8048 return ParseStatus::Failure;
8049 }
8050
8051 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
8052 return ParseStatus::Success;
8053}
8054
8055bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
8056
8057//===----------------------------------------------------------------------===//
8058// hwreg
8059//===----------------------------------------------------------------------===//
8060
8061ParseStatus AMDGPUAsmParser::parseHwregFunc(OperandInfoTy &HwReg,
8062 OperandInfoTy &Offset,
8063 OperandInfoTy &Width) {
8064 using namespace llvm::AMDGPU::Hwreg;
8065
8066 if (!trySkipId("hwreg", AsmToken::LParen))
8067 return ParseStatus::NoMatch;
8068
8069 // The register may be specified by name or using a numeric code
8070 HwReg.Loc = getLoc();
8071 if (isToken(AsmToken::Identifier) &&
8072 (HwReg.Val = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8073 HwReg.IsSymbolic = true;
8074 lex(); // skip register name
8075 } else if (!parseExpr(HwReg.Val, "a register name")) {
8076 return ParseStatus::Failure;
8077 }
8078
8079 if (trySkipToken(AsmToken::RParen))
8080 return ParseStatus::Success;
8081
8082 // parse optional params
8083 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
8084 return ParseStatus::Failure;
8085
8086 Offset.Loc = getLoc();
8087 if (!parseExpr(Offset.Val))
8088 return ParseStatus::Failure;
8089
8090 if (!skipToken(AsmToken::Comma, "expected a comma"))
8091 return ParseStatus::Failure;
8092
8093 Width.Loc = getLoc();
8094 if (!parseExpr(Width.Val) ||
8095 !skipToken(AsmToken::RParen, "expected a closing parenthesis"))
8096 return ParseStatus::Failure;
8097
8098 return ParseStatus::Success;
8099}
8100
8101ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
8102 using namespace llvm::AMDGPU::Hwreg;
8103
8104 int64_t ImmVal = 0;
8105 SMLoc Loc = getLoc();
8106
8107 StructuredOpField HwReg("id", "hardware register", HwregId::Width,
8108 HwregId::Default);
8109 StructuredOpField Offset("offset", "bit offset", HwregOffset::Width,
8110 HwregOffset::Default);
8111 struct : StructuredOpField {
8112 using StructuredOpField::StructuredOpField;
8113 bool validate(AMDGPUAsmParser &Parser) const override {
8114 if (!isUIntN(Width, Val - 1))
8115 return Error(Parser, "only values from 1 to 32 are legal");
8116 return true;
8117 }
8118 } Width("size", "bitfield width", HwregSize::Width, HwregSize::Default);
8119 ParseStatus Res = parseStructuredOpFields({&HwReg, &Offset, &Width});
8120
8121 if (Res.isNoMatch())
8122 Res = parseHwregFunc(HwReg, Offset, Width);
8123
8124 if (Res.isSuccess()) {
8125 if (!validateStructuredOpFields({&HwReg, &Offset, &Width}))
8126 return ParseStatus::Failure;
8127 ImmVal = HwregEncoding::encode(HwReg.Val, Offset.Val, Width.Val);
8128 }
8129
8130 if (Res.isNoMatch() &&
8131 parseExpr(ImmVal, "a hwreg macro, structured immediate"))
8133
8134 if (!Res.isSuccess())
8135 return ParseStatus::Failure;
8136
8137 if (!isUInt<16>(ImmVal))
8138 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8139 Operands.push_back(
8140 AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
8141 return ParseStatus::Success;
8142}
8143
8144bool AMDGPUOperand::isHwreg() const {
8145 return isImmTy(ImmTyHwreg);
8146}
8147
8148//===----------------------------------------------------------------------===//
8149// sendmsg
8150//===----------------------------------------------------------------------===//
8151
8152bool
8153AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
8154 OperandInfoTy &Op,
8155 OperandInfoTy &Stream) {
8156 using namespace llvm::AMDGPU::SendMsg;
8157
8158 Msg.Loc = getLoc();
8159 if (isToken(AsmToken::Identifier) &&
8160 (Msg.Val = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) {
8161 Msg.IsSymbolic = true;
8162 lex(); // skip message name
8163 } else if (!parseExpr(Msg.Val, "a message name")) {
8164 return false;
8165 }
8166
8167 if (trySkipToken(AsmToken::Comma)) {
8168 Op.IsDefined = true;
8169 Op.Loc = getLoc();
8170 if (isToken(AsmToken::Identifier) &&
8171 (Op.Val = getMsgOpId(Msg.Val, getTokenStr(), getSTI())) !=
8173 lex(); // skip operation name
8174 } else if (!parseExpr(Op.Val, "an operation name")) {
8175 return false;
8176 }
8177
8178 if (trySkipToken(AsmToken::Comma)) {
8179 Stream.IsDefined = true;
8180 Stream.Loc = getLoc();
8181 if (!parseExpr(Stream.Val))
8182 return false;
8183 }
8184 }
8185
8186 return skipToken(AsmToken::RParen, "expected a closing parenthesis");
8187}
8188
8189bool
8190AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
8191 const OperandInfoTy &Op,
8192 const OperandInfoTy &Stream) {
8193 using namespace llvm::AMDGPU::SendMsg;
8194
8195 // Validation strictness depends on whether message is specified
8196 // in a symbolic or in a numeric form. In the latter case
8197 // only encoding possibility is checked.
8198 bool Strict = Msg.IsSymbolic;
8199
8200 if (Strict) {
8201 if (Msg.Val == OPR_ID_UNSUPPORTED) {
8202 Error(Msg.Loc, "specified message id is not supported on this GPU");
8203 return false;
8204 }
8205 } else {
8206 if (!isValidMsgId(Msg.Val, getSTI())) {
8207 Error(Msg.Loc, "invalid message id");
8208 return false;
8209 }
8210 }
8211 if (Strict && (msgRequiresOp(Msg.Val, getSTI()) != Op.IsDefined)) {
8212 if (Op.IsDefined) {
8213 Error(Op.Loc, "message does not support operations");
8214 } else {
8215 Error(Msg.Loc, "missing message operation");
8216 }
8217 return false;
8218 }
8219 if (!isValidMsgOp(Msg.Val, Op.Val, getSTI(), Strict)) {
8220 if (Op.Val == OPR_ID_UNSUPPORTED)
8221 Error(Op.Loc, "specified operation id is not supported on this GPU");
8222 else
8223 Error(Op.Loc, "invalid operation id");
8224 return false;
8225 }
8226 if (Strict && !msgSupportsStream(Msg.Val, Op.Val, getSTI()) &&
8227 Stream.IsDefined) {
8228 Error(Stream.Loc, "message operation does not support streams");
8229 return false;
8230 }
8231 if (!isValidMsgStream(Msg.Val, Op.Val, Stream.Val, getSTI(), Strict)) {
8232 Error(Stream.Loc, "invalid message stream id");
8233 return false;
8234 }
8235 return true;
8236}
8237
8238ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
8239 using namespace llvm::AMDGPU::SendMsg;
8240
8241 int64_t ImmVal = 0;
8242 SMLoc Loc = getLoc();
8243
8244 if (trySkipId("sendmsg", AsmToken::LParen)) {
8245 OperandInfoTy Msg(OPR_ID_UNKNOWN);
8246 OperandInfoTy Op(OP_NONE_);
8247 OperandInfoTy Stream(STREAM_ID_NONE_);
8248 if (parseSendMsgBody(Msg, Op, Stream) &&
8249 validateSendMsg(Msg, Op, Stream)) {
8250 ImmVal = encodeMsg(Msg.Val, Op.Val, Stream.Val);
8251 } else {
8252 return ParseStatus::Failure;
8253 }
8254 } else if (parseExpr(ImmVal, "a sendmsg macro")) {
8255 if (ImmVal < 0 || !isUInt<16>(ImmVal))
8256 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8257 } else {
8258 return ParseStatus::Failure;
8259 }
8260
8261 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
8262 return ParseStatus::Success;
8263}
8264
8265bool AMDGPUOperand::isSendMsg() const {
8266 return isImmTy(ImmTySendMsg);
8267}
8268
8269ParseStatus AMDGPUAsmParser::parseWaitEvent(OperandVector &Operands) {
8270 using namespace llvm::AMDGPU::WaitEvent;
8271
8272 SMLoc Loc = getLoc();
8273 int64_t ImmVal = 0;
8274
8275 StructuredOpField DontWaitExportReady("dont_wait_export_ready", "bit value",
8276 1, 0);
8277 StructuredOpField ExportReady("export_ready", "bit value", 1, 0);
8278
8279 StructuredOpField *TargetBitfield =
8280 isGFX11() ? &DontWaitExportReady : &ExportReady;
8281
8282 ParseStatus Res = parseStructuredOpFields({TargetBitfield});
8283 if (Res.isNoMatch() && parseExpr(ImmVal, "structured immediate"))
8285 else if (Res.isSuccess()) {
8286 if (!validateStructuredOpFields({TargetBitfield}))
8287 return ParseStatus::Failure;
8288 ImmVal = TargetBitfield->Val;
8289 }
8290
8291 if (!Res.isSuccess())
8292 return ParseStatus::Failure;
8293
8294 if (!isUInt<16>(ImmVal))
8295 return Error(Loc, "invalid immediate: only 16-bit values are legal");
8296
8297 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc,
8298 AMDGPUOperand::ImmTyWaitEvent));
8299 return ParseStatus::Success;
8300}
8301
8302bool AMDGPUOperand::isWaitEvent() const { return isImmTy(ImmTyWaitEvent); }
8303
8304//===----------------------------------------------------------------------===//
8305// v_interp
8306//===----------------------------------------------------------------------===//
8307
8308ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
8309 StringRef Str;
8310 SMLoc S = getLoc();
8311
8312 if (!parseId(Str))
8313 return ParseStatus::NoMatch;
8314
8315 int Slot = StringSwitch<int>(Str)
8316 .Case("p10", 0)
8317 .Case("p20", 1)
8318 .Case("p0", 2)
8319 .Default(-1);
8320
8321 if (Slot == -1)
8322 return Error(S, "invalid interpolation slot");
8323
8324 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
8325 AMDGPUOperand::ImmTyInterpSlot));
8326 return ParseStatus::Success;
8327}
8328
8329ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
8330 StringRef Str;
8331 SMLoc S = getLoc();
8332
8333 if (!parseId(Str))
8334 return ParseStatus::NoMatch;
8335
8336 if (!Str.starts_with("attr"))
8337 return Error(S, "invalid interpolation attribute");
8338
8339 StringRef Chan = Str.take_back(2);
8340 int AttrChan = StringSwitch<int>(Chan)
8341 .Case(".x", 0)
8342 .Case(".y", 1)
8343 .Case(".z", 2)
8344 .Case(".w", 3)
8345 .Default(-1);
8346 if (AttrChan == -1)
8347 return Error(S, "invalid or missing interpolation attribute channel");
8348
8349 Str = Str.drop_back(2).drop_front(4);
8350
8351 uint8_t Attr;
8352 if (Str.getAsInteger(10, Attr))
8353 return Error(S, "invalid or missing interpolation attribute number");
8354
8355 if (Attr > 32)
8356 return Error(S, "out of bounds interpolation attribute number");
8357
8358 SMLoc SChan = SMLoc::getFromPointer(Chan.data());
8359
8360 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
8361 AMDGPUOperand::ImmTyInterpAttr));
8362 Operands.push_back(AMDGPUOperand::CreateImm(
8363 this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
8364 return ParseStatus::Success;
8365}
8366
8367//===----------------------------------------------------------------------===//
8368// exp
8369//===----------------------------------------------------------------------===//
8370
8371ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
8372 using namespace llvm::AMDGPU::Exp;
8373
8374 StringRef Str;
8375 SMLoc S = getLoc();
8376
8377 if (!parseId(Str))
8378 return ParseStatus::NoMatch;
8379
8380 unsigned Id = getTgtId(Str);
8381 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
8382 return Error(S, (Id == ET_INVALID)
8383 ? "invalid exp target"
8384 : "exp target is not supported on this GPU");
8385
8386 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
8387 AMDGPUOperand::ImmTyExpTgt));
8388 return ParseStatus::Success;
8389}
8390
8391//===----------------------------------------------------------------------===//
8392// parser helpers
8393//===----------------------------------------------------------------------===//
8394
8395bool
8396AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
8397 return Token.is(AsmToken::Identifier) && Token.getString() == Id;
8398}
8399
8400bool
8401AMDGPUAsmParser::isId(const StringRef Id) const {
8402 return isId(getToken(), Id);
8403}
8404
8405bool
8406AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
8407 return getTokenKind() == Kind;
8408}
8409
8410StringRef AMDGPUAsmParser::getId() const {
8411 return isToken(AsmToken::Identifier) ? getTokenStr() : StringRef();
8412}
8413
8414bool
8415AMDGPUAsmParser::trySkipId(const StringRef Id) {
8416 if (isId(Id)) {
8417 lex();
8418 return true;
8419 }
8420 return false;
8421}
8422
8423bool
8424AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
8425 if (isToken(AsmToken::Identifier)) {
8426 StringRef Tok = getTokenStr();
8427 if (Tok.starts_with(Pref) && Tok.drop_front(Pref.size()) == Id) {
8428 lex();
8429 return true;
8430 }
8431 }
8432 return false;
8433}
8434
8435bool
8436AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
8437 if (isId(Id) && peekToken().is(Kind)) {
8438 lex();
8439 lex();
8440 return true;
8441 }
8442 return false;
8443}
8444
8445bool
8446AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
8447 if (isToken(Kind)) {
8448 lex();
8449 return true;
8450 }
8451 return false;
8452}
8453
8454bool
8455AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
8456 const StringRef ErrMsg) {
8457 if (!trySkipToken(Kind)) {
8458 Error(getLoc(), ErrMsg);
8459 return false;
8460 }
8461 return true;
8462}
8463
8464bool
8465AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
8466 SMLoc S = getLoc();
8467
8468 const MCExpr *Expr;
8469 if (Parser.parseExpression(Expr))
8470 return false;
8471
8472 if (Expr->evaluateAsAbsolute(Imm))
8473 return true;
8474
8475 if (Expected.empty()) {
8476 Error(S, "expected absolute expression");
8477 } else {
8478 Error(S, Twine("expected ", Expected) +
8479 Twine(" or an absolute expression"));
8480 }
8481 return false;
8482}
8483
8484bool
8485AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
8486 SMLoc S = getLoc();
8487
8488 const MCExpr *Expr;
8489 if (Parser.parseExpression(Expr))
8490 return false;
8491
8492 int64_t IntVal;
8493 if (Expr->evaluateAsAbsolute(IntVal)) {
8494 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
8495 } else {
8496 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
8497 }
8498 return true;
8499}
8500
8501bool
8502AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
8503 if (isToken(AsmToken::String)) {
8504 Val = getToken().getStringContents();
8505 lex();
8506 return true;
8507 }
8508 Error(getLoc(), ErrMsg);
8509 return false;
8510}
8511
8512bool
8513AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
8514 if (isToken(AsmToken::Identifier)) {
8515 Val = getTokenStr();
8516 lex();
8517 return true;
8518 }
8519 if (!ErrMsg.empty())
8520 Error(getLoc(), ErrMsg);
8521 return false;
8522}
8523
8524AsmToken
8525AMDGPUAsmParser::getToken() const {
8526 return Parser.getTok();
8527}
8528
8529AsmToken AMDGPUAsmParser::peekToken(bool ShouldSkipSpace) {
8530 return isToken(AsmToken::EndOfStatement)
8531 ? getToken()
8532 : getLexer().peekTok(ShouldSkipSpace);
8533}
8534
8535void
8536AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
8537 auto TokCount = getLexer().peekTokens(Tokens);
8538
8539 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
8540 Tokens[Idx] = AsmToken(AsmToken::Error, "");
8541}
8542
8544AMDGPUAsmParser::getTokenKind() const {
8545 return getLexer().getKind();
8546}
8547
8548SMLoc
8549AMDGPUAsmParser::getLoc() const {
8550 return getToken().getLoc();
8551}
8552
8553StringRef
8554AMDGPUAsmParser::getTokenStr() const {
8555 return getToken().getString();
8556}
8557
8558void
8559AMDGPUAsmParser::lex() {
8560 Parser.Lex();
8561}
8562
8563SMLoc AMDGPUAsmParser::getInstLoc(const OperandVector &Operands) const {
8564 return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
8565}
8566
8567// Returns one of the given locations that comes later in the source.
8568SMLoc AMDGPUAsmParser::getLaterLoc(SMLoc a, SMLoc b) {
8569 return a.getPointer() < b.getPointer() ? b : a;
8570}
8571
8572SMLoc AMDGPUAsmParser::getOperandLoc(const OperandVector &Operands,
8573 int MCOpIdx) const {
8574 for (const auto &Op : Operands) {
8575 const auto TargetOp = static_cast<AMDGPUOperand &>(*Op);
8576 if (TargetOp.getMCOpIdx() == MCOpIdx)
8577 return TargetOp.getStartLoc();
8578 }
8579 llvm_unreachable("No such MC operand!");
8580}
8581
8582SMLoc
8583AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
8584 const OperandVector &Operands) const {
8585 for (unsigned i = Operands.size() - 1; i > 0; --i) {
8586 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
8587 if (Test(Op))
8588 return Op.getStartLoc();
8589 }
8590 return getInstLoc(Operands);
8591}
8592
8593SMLoc
8594AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
8595 const OperandVector &Operands) const {
8596 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
8597 return getOperandLoc(Test, Operands);
8598}
8599
8600ParseStatus
8601AMDGPUAsmParser::parseStructuredOpFields(ArrayRef<StructuredOpField *> Fields) {
8602 if (!trySkipToken(AsmToken::LCurly))
8603 return ParseStatus::NoMatch;
8604
8605 bool First = true;
8606 while (!trySkipToken(AsmToken::RCurly)) {
8607 if (!First &&
8608 !skipToken(AsmToken::Comma, "comma or closing brace expected"))
8609 return ParseStatus::Failure;
8610
8611 StringRef Id = getTokenStr();
8612 SMLoc IdLoc = getLoc();
8613 if (!skipToken(AsmToken::Identifier, "field name expected") ||
8614 !skipToken(AsmToken::Colon, "colon expected"))
8615 return ParseStatus::Failure;
8616
8617 const auto *I =
8618 find_if(Fields, [Id](StructuredOpField *F) { return F->Id == Id; });
8619 if (I == Fields.end())
8620 return Error(IdLoc, "unknown field");
8621 if ((*I)->IsDefined)
8622 return Error(IdLoc, "duplicate field");
8623
8624 // TODO: Support symbolic values.
8625 (*I)->Loc = getLoc();
8626 if (!parseExpr((*I)->Val))
8627 return ParseStatus::Failure;
8628 (*I)->IsDefined = true;
8629
8630 First = false;
8631 }
8632 return ParseStatus::Success;
8633}
8634
8635bool AMDGPUAsmParser::validateStructuredOpFields(
8637 return all_of(Fields, [this](const StructuredOpField *F) {
8638 return F->validate(*this);
8639 });
8640}
8641
8642//===----------------------------------------------------------------------===//
8643// swizzle
8644//===----------------------------------------------------------------------===//
8645
8647static unsigned
8648encodeBitmaskPerm(const unsigned AndMask,
8649 const unsigned OrMask,
8650 const unsigned XorMask) {
8651 using namespace llvm::AMDGPU::Swizzle;
8652
8653 return BITMASK_PERM_ENC |
8654 (AndMask << BITMASK_AND_SHIFT) |
8655 (OrMask << BITMASK_OR_SHIFT) |
8656 (XorMask << BITMASK_XOR_SHIFT);
8657}
8658
8659bool AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, const unsigned MinVal,
8660 const unsigned MaxVal,
8661 const Twine &ErrMsg, SMLoc &Loc) {
8662 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8663 return false;
8664 }
8665 Loc = getLoc();
8666 if (!parseExpr(Op)) {
8667 return false;
8668 }
8669 if (Op < MinVal || Op > MaxVal) {
8670 Error(Loc, ErrMsg);
8671 return false;
8672 }
8673
8674 return true;
8675}
8676
8677bool
8678AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
8679 const unsigned MinVal,
8680 const unsigned MaxVal,
8681 const StringRef ErrMsg) {
8682 SMLoc Loc;
8683 for (unsigned i = 0; i < OpNum; ++i) {
8684 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
8685 return false;
8686 }
8687
8688 return true;
8689}
8690
8691bool
8692AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
8693 using namespace llvm::AMDGPU::Swizzle;
8694
8695 int64_t Lane[LANE_NUM];
8696 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
8697 "expected a 2-bit lane id")) {
8699 for (unsigned I = 0; I < LANE_NUM; ++I) {
8700 Imm |= Lane[I] << (LANE_SHIFT * I);
8701 }
8702 return true;
8703 }
8704 return false;
8705}
8706
8707bool
8708AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
8709 using namespace llvm::AMDGPU::Swizzle;
8710
8711 SMLoc Loc;
8712 int64_t GroupSize;
8713 int64_t LaneIdx;
8714
8715 if (!parseSwizzleOperand(GroupSize,
8716 2, 32,
8717 "group size must be in the interval [2,32]",
8718 Loc)) {
8719 return false;
8720 }
8721 if (!isPowerOf2_64(GroupSize)) {
8722 Error(Loc, "group size must be a power of two");
8723 return false;
8724 }
8725 if (parseSwizzleOperand(LaneIdx,
8726 0, GroupSize - 1,
8727 "lane id must be in the interval [0,group size - 1]",
8728 Loc)) {
8729 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
8730 return true;
8731 }
8732 return false;
8733}
8734
8735bool
8736AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
8737 using namespace llvm::AMDGPU::Swizzle;
8738
8739 SMLoc Loc;
8740 int64_t GroupSize;
8741
8742 if (!parseSwizzleOperand(GroupSize,
8743 2, 32,
8744 "group size must be in the interval [2,32]",
8745 Loc)) {
8746 return false;
8747 }
8748 if (!isPowerOf2_64(GroupSize)) {
8749 Error(Loc, "group size must be a power of two");
8750 return false;
8751 }
8752
8753 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
8754 return true;
8755}
8756
8757bool
8758AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
8759 using namespace llvm::AMDGPU::Swizzle;
8760
8761 SMLoc Loc;
8762 int64_t GroupSize;
8763
8764 if (!parseSwizzleOperand(GroupSize,
8765 1, 16,
8766 "group size must be in the interval [1,16]",
8767 Loc)) {
8768 return false;
8769 }
8770 if (!isPowerOf2_64(GroupSize)) {
8771 Error(Loc, "group size must be a power of two");
8772 return false;
8773 }
8774
8775 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
8776 return true;
8777}
8778
8779bool
8780AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
8781 using namespace llvm::AMDGPU::Swizzle;
8782
8783 if (!skipToken(AsmToken::Comma, "expected a comma")) {
8784 return false;
8785 }
8786
8787 StringRef Ctl;
8788 SMLoc StrLoc = getLoc();
8789 if (!parseString(Ctl)) {
8790 return false;
8791 }
8792 if (Ctl.size() != BITMASK_WIDTH) {
8793 Error(StrLoc, "expected a 5-character mask");
8794 return false;
8795 }
8796
8797 unsigned AndMask = 0;
8798 unsigned OrMask = 0;
8799 unsigned XorMask = 0;
8800
8801 for (size_t i = 0; i < Ctl.size(); ++i) {
8802 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
8803 switch(Ctl[i]) {
8804 default:
8805 Error(StrLoc, "invalid mask");
8806 return false;
8807 case '0':
8808 break;
8809 case '1':
8810 OrMask |= Mask;
8811 break;
8812 case 'p':
8813 AndMask |= Mask;
8814 break;
8815 case 'i':
8816 AndMask |= Mask;
8817 XorMask |= Mask;
8818 break;
8819 }
8820 }
8821
8822 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
8823 return true;
8824}
8825
8826bool AMDGPUAsmParser::parseSwizzleFFT(int64_t &Imm) {
8827 using namespace llvm::AMDGPU::Swizzle;
8828
8829 if (!AMDGPU::isGFX9Plus(getSTI())) {
8830 Error(getLoc(), "FFT mode swizzle not supported on this GPU");
8831 return false;
8832 }
8833
8834 int64_t Swizzle;
8835 SMLoc Loc;
8836 if (!parseSwizzleOperand(Swizzle, 0, FFT_SWIZZLE_MAX,
8837 "FFT swizzle must be in the interval [0," +
8838 Twine(FFT_SWIZZLE_MAX) + Twine(']'),
8839 Loc))
8840 return false;
8841
8842 Imm = FFT_MODE_ENC | Swizzle;
8843 return true;
8844}
8845
8846bool AMDGPUAsmParser::parseSwizzleRotate(int64_t &Imm) {
8847 using namespace llvm::AMDGPU::Swizzle;
8848
8849 if (!AMDGPU::isGFX9Plus(getSTI())) {
8850 Error(getLoc(), "Rotate mode swizzle not supported on this GPU");
8851 return false;
8852 }
8853
8854 SMLoc Loc;
8855 int64_t Direction;
8856
8857 if (!parseSwizzleOperand(Direction, 0, 1,
8858 "direction must be 0 (left) or 1 (right)", Loc))
8859 return false;
8860
8861 int64_t RotateSize;
8862 if (!parseSwizzleOperand(
8863 RotateSize, 0, ROTATE_MAX_SIZE,
8864 "number of threads to rotate must be in the interval [0," +
8865 Twine(ROTATE_MAX_SIZE) + Twine(']'),
8866 Loc))
8867 return false;
8868
8870 (RotateSize << ROTATE_SIZE_SHIFT);
8871 return true;
8872}
8873
8874bool
8875AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
8876
8877 SMLoc OffsetLoc = getLoc();
8878
8879 if (!parseExpr(Imm, "a swizzle macro")) {
8880 return false;
8881 }
8882 if (!isUInt<16>(Imm)) {
8883 Error(OffsetLoc, "expected a 16-bit offset");
8884 return false;
8885 }
8886 return true;
8887}
8888
8889bool
8890AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
8891 using namespace llvm::AMDGPU::Swizzle;
8892
8893 if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
8894
8895 SMLoc ModeLoc = getLoc();
8896 bool Ok = false;
8897
8898 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
8899 Ok = parseSwizzleQuadPerm(Imm);
8900 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
8901 Ok = parseSwizzleBitmaskPerm(Imm);
8902 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
8903 Ok = parseSwizzleBroadcast(Imm);
8904 } else if (trySkipId(IdSymbolic[ID_SWAP])) {
8905 Ok = parseSwizzleSwap(Imm);
8906 } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
8907 Ok = parseSwizzleReverse(Imm);
8908 } else if (trySkipId(IdSymbolic[ID_FFT])) {
8909 Ok = parseSwizzleFFT(Imm);
8910 } else if (trySkipId(IdSymbolic[ID_ROTATE])) {
8911 Ok = parseSwizzleRotate(Imm);
8912 } else {
8913 Error(ModeLoc, "expected a swizzle mode");
8914 }
8915
8916 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
8917 }
8918
8919 return false;
8920}
8921
8922ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
8923 SMLoc S = getLoc();
8924 int64_t Imm = 0;
8925
8926 if (trySkipId("offset")) {
8927
8928 bool Ok = false;
8929 if (skipToken(AsmToken::Colon, "expected a colon")) {
8930 if (trySkipId("swizzle")) {
8931 Ok = parseSwizzleMacro(Imm);
8932 } else {
8933 Ok = parseSwizzleOffset(Imm);
8934 }
8935 }
8936
8937 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
8938
8940 }
8941 return ParseStatus::NoMatch;
8942}
8943
8944bool
8945AMDGPUOperand::isSwizzle() const {
8946 return isImmTy(ImmTySwizzle);
8947}
8948
8949//===----------------------------------------------------------------------===//
8950// VGPR Index Mode
8951//===----------------------------------------------------------------------===//
8952
8953int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
8954
8955 using namespace llvm::AMDGPU::VGPRIndexMode;
8956
8957 if (trySkipToken(AsmToken::RParen)) {
8958 return OFF;
8959 }
8960
8961 int64_t Imm = 0;
8962
8963 while (true) {
8964 unsigned Mode = 0;
8965 SMLoc S = getLoc();
8966
8967 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
8968 if (trySkipId(IdSymbolic[ModeId])) {
8969 Mode = 1 << ModeId;
8970 break;
8971 }
8972 }
8973
8974 if (Mode == 0) {
8975 Error(S, (Imm == 0)?
8976 "expected a VGPR index mode or a closing parenthesis" :
8977 "expected a VGPR index mode");
8978 return UNDEF;
8979 }
8980
8981 if (Imm & Mode) {
8982 Error(S, "duplicate VGPR index mode");
8983 return UNDEF;
8984 }
8985 Imm |= Mode;
8986
8987 if (trySkipToken(AsmToken::RParen))
8988 break;
8989 if (!skipToken(AsmToken::Comma,
8990 "expected a comma or a closing parenthesis"))
8991 return UNDEF;
8992 }
8993
8994 return Imm;
8995}
8996
8997ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
8998
8999 using namespace llvm::AMDGPU::VGPRIndexMode;
9000
9001 int64_t Imm = 0;
9002 SMLoc S = getLoc();
9003
9004 if (trySkipId("gpr_idx", AsmToken::LParen)) {
9005 Imm = parseGPRIdxMacro();
9006 if (Imm == UNDEF)
9007 return ParseStatus::Failure;
9008 } else {
9009 if (getParser().parseAbsoluteExpression(Imm))
9010 return ParseStatus::Failure;
9011 if (Imm < 0 || !isUInt<4>(Imm))
9012 return Error(S, "invalid immediate: only 4-bit values are legal");
9013 }
9014
9015 Operands.push_back(
9016 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
9017 return ParseStatus::Success;
9018}
9019
9020bool AMDGPUOperand::isGPRIdxMode() const {
9021 return isImmTy(ImmTyGprIdxMode);
9022}
9023
9024//===----------------------------------------------------------------------===//
9025// sopp branch targets
9026//===----------------------------------------------------------------------===//
9027
9028ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
9029
9030 // Make sure we are not parsing something
9031 // that looks like a label or an expression but is not.
9032 // This will improve error messages.
9033 if (isRegister() || isModifier())
9034 return ParseStatus::NoMatch;
9035
9036 if (!parseExpr(Operands))
9037 return ParseStatus::Failure;
9038
9039 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
9040 assert(Opr.isImm() || Opr.isExpr());
9041 SMLoc Loc = Opr.getStartLoc();
9042
9043 // Currently we do not support arbitrary expressions as branch targets.
9044 // Only labels and absolute expressions are accepted.
9045 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
9046 Error(Loc, "expected an absolute expression or a label");
9047 } else if (Opr.isImm() && !Opr.isS16Imm()) {
9048 Error(Loc, "expected a 16-bit signed jump offset");
9049 }
9050
9051 return ParseStatus::Success;
9052}
9053
9054//===----------------------------------------------------------------------===//
9055// Boolean holding registers
9056//===----------------------------------------------------------------------===//
9057
9058ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
9059 return parseReg(Operands);
9060}
9061
9062//===----------------------------------------------------------------------===//
9063// mubuf
9064//===----------------------------------------------------------------------===//
9065
9066void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
9067 const OperandVector &Operands,
9068 bool IsAtomic) {
9069 OptionalImmIndexMap OptionalIdx;
9070 unsigned FirstOperandIdx = 1;
9071 bool IsAtomicReturn = false;
9072
9073 if (IsAtomic) {
9074 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags &
9076 }
9077
9078 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
9079 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9080
9081 // Add the register arguments
9082 if (Op.isReg()) {
9083 Op.addRegOperands(Inst, 1);
9084 // Insert a tied src for atomic return dst.
9085 // This cannot be postponed as subsequent calls to
9086 // addImmOperands rely on correct number of MC operands.
9087 if (IsAtomicReturn && i == FirstOperandIdx)
9088 Op.addRegOperands(Inst, 1);
9089 continue;
9090 }
9091
9092 // Handle the case where soffset is an immediate
9093 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
9094 Op.addImmOperands(Inst, 1);
9095 continue;
9096 }
9097
9098 // Handle tokens like 'offen' which are sometimes hard-coded into the
9099 // asm string. There are no MCInst operands for these.
9100 if (Op.isToken()) {
9101 continue;
9102 }
9103 assert(Op.isImm());
9104
9105 // Handle optional arguments
9106 OptionalIdx[Op.getImmTy()] = i;
9107 }
9108
9109 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
9110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
9111 // Parse a dummy operand as a placeholder for the SWZ operand. This enforces
9112 // agreement between MCInstrDesc.getNumOperands and MCInst.getNumOperands.
9114}
9115
9116//===----------------------------------------------------------------------===//
9117// smrd
9118//===----------------------------------------------------------------------===//
9119
9120bool AMDGPUOperand::isSMRDOffset8() const {
9121 return isImmLiteral() && isUInt<8>(getImm());
9122}
9123
9124bool AMDGPUOperand::isSMEMOffset() const {
9125 // Offset range is checked later by validator.
9126 return isImmLiteral();
9127}
9128
9129bool AMDGPUOperand::isSMRDLiteralOffset() const {
9130 // 32-bit literals are only supported on CI and we only want to use them
9131 // when the offset is > 8-bits.
9132 return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
9133}
9134
9135//===----------------------------------------------------------------------===//
9136// vop3
9137//===----------------------------------------------------------------------===//
9138
9139static bool ConvertOmodMul(int64_t &Mul) {
9140 if (Mul != 1 && Mul != 2 && Mul != 4)
9141 return false;
9142
9143 Mul >>= 1;
9144 return true;
9145}
9146
9147static bool ConvertOmodDiv(int64_t &Div) {
9148 if (Div == 1) {
9149 Div = 0;
9150 return true;
9151 }
9152
9153 if (Div == 2) {
9154 Div = 3;
9155 return true;
9156 }
9157
9158 return false;
9159}
9160
9161// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
9162// This is intentional and ensures compatibility with sp3.
9163// See bug 35397 for details.
9164bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
9165 if (BoundCtrl == 0 || BoundCtrl == 1) {
9166 if (!isGFX11Plus())
9167 BoundCtrl = 1;
9168 return true;
9169 }
9170 return false;
9171}
9172
9173void AMDGPUAsmParser::onBeginOfFile() {
9174 if (!getParser().getStreamer().getTargetStreamer() ||
9175 getSTI().getTargetTriple().getArch() == Triple::r600)
9176 return;
9177
9178 if (!getTargetStreamer().getTargetID())
9179 getTargetStreamer().initializeTargetID(getSTI(),
9180 getSTI().getFeatureString());
9181
9182 if (isHsaAbi(getSTI()))
9183 getTargetStreamer().EmitDirectiveAMDGCNTarget();
9184}
9185
9186/// Parse AMDGPU specific expressions.
9187///
9188/// expr ::= or(expr, ...) |
9189/// max(expr, ...)
9190///
9191bool AMDGPUAsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
9192 using AGVK = AMDGPUMCExpr::VariantKind;
9193
9194 if (isToken(AsmToken::Identifier)) {
9195 StringRef TokenId = getTokenStr();
9196 AGVK VK = StringSwitch<AGVK>(TokenId)
9197 .Case("max", AGVK::AGVK_Max)
9198 .Case("or", AGVK::AGVK_Or)
9199 .Case("extrasgprs", AGVK::AGVK_ExtraSGPRs)
9200 .Case("totalnumvgprs", AGVK::AGVK_TotalNumVGPRs)
9201 .Case("alignto", AGVK::AGVK_AlignTo)
9202 .Case("occupancy", AGVK::AGVK_Occupancy)
9203 .Default(AGVK::AGVK_None);
9204
9205 if (VK != AGVK::AGVK_None && peekToken().is(AsmToken::LParen)) {
9207 uint64_t CommaCount = 0;
9208 lex(); // Eat Arg ('or', 'max', 'occupancy', etc.)
9209 lex(); // Eat '('
9210 while (true) {
9211 if (trySkipToken(AsmToken::RParen)) {
9212 if (Exprs.empty()) {
9213 Error(getToken().getLoc(),
9214 "empty " + Twine(TokenId) + " expression");
9215 return true;
9216 }
9217 if (CommaCount + 1 != Exprs.size()) {
9218 Error(getToken().getLoc(),
9219 "mismatch of commas in " + Twine(TokenId) + " expression");
9220 return true;
9221 }
9222 Res = AMDGPUMCExpr::create(VK, Exprs, getContext());
9223 return false;
9224 }
9225 const MCExpr *Expr;
9226 if (getParser().parseExpression(Expr, EndLoc))
9227 return true;
9228 Exprs.push_back(Expr);
9229 bool LastTokenWasComma = trySkipToken(AsmToken::Comma);
9230 if (LastTokenWasComma)
9231 CommaCount++;
9232 if (!LastTokenWasComma && !isToken(AsmToken::RParen)) {
9233 Error(getToken().getLoc(),
9234 "unexpected token in " + Twine(TokenId) + " expression");
9235 return true;
9236 }
9237 }
9238 }
9239 }
9240 return getParser().parsePrimaryExpr(Res, EndLoc, nullptr);
9241}
9242
9243ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
9244 StringRef Name = getTokenStr();
9245 if (Name == "mul") {
9246 return parseIntWithPrefix("mul", Operands,
9247 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
9248 }
9249
9250 if (Name == "div") {
9251 return parseIntWithPrefix("div", Operands,
9252 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
9253 }
9254
9255 return ParseStatus::NoMatch;
9256}
9257
9258// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
9259// the number of src operands present, then copies that bit into src0_modifiers.
9260static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI) {
9261 int Opc = Inst.getOpcode();
9262 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9263 if (OpSelIdx == -1)
9264 return;
9265
9266 int SrcNum;
9267 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9268 AMDGPU::OpName::src2};
9269 for (SrcNum = 0; SrcNum < 3 && AMDGPU::hasNamedOperand(Opc, Ops[SrcNum]);
9270 ++SrcNum)
9271 ;
9272 assert(SrcNum > 0);
9273
9274 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9275
9276 int DstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
9277 if (DstIdx == -1)
9278 return;
9279
9280 const MCOperand &DstOp = Inst.getOperand(DstIdx);
9281 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
9282 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9283 if (DstOp.isReg() &&
9284 MRI.getRegClass(AMDGPU::VGPR_16RegClassID).contains(DstOp.getReg())) {
9285 if (AMDGPU::isHi16Reg(DstOp.getReg(), MRI))
9286 ModVal |= SISrcMods::DST_OP_SEL;
9287 } else {
9288 if ((OpSel & (1 << SrcNum)) != 0)
9289 ModVal |= SISrcMods::DST_OP_SEL;
9290 }
9291 Inst.getOperand(ModIdx).setImm(ModVal);
9292}
9293
9294void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst,
9295 const OperandVector &Operands) {
9296 cvtVOP3P(Inst, Operands);
9297 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9298}
9299
9300void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands,
9301 OptionalImmIndexMap &OptionalIdx) {
9302 cvtVOP3P(Inst, Operands, OptionalIdx);
9303 cvtVOP3DstOpSelOnly(Inst, *getMRI());
9304}
9305
9306static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
9307 return
9308 // 1. This operand is input modifiers
9309 Desc.operands()[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
9310 // 2. This is not last operand
9311 && Desc.NumOperands > (OpNum + 1)
9312 // 3. Next operand is register class
9313 && Desc.operands()[OpNum + 1].RegClass != -1
9314 // 4. Next register is not tied to any other operand
9315 && Desc.getOperandConstraint(OpNum + 1,
9317}
9318
9319void AMDGPUAsmParser::cvtOpSelHelper(MCInst &Inst, unsigned OpSel) {
9320 unsigned Opc = Inst.getOpcode();
9321 constexpr AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9322 AMDGPU::OpName::src2};
9323 constexpr AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9324 AMDGPU::OpName::src1_modifiers,
9325 AMDGPU::OpName::src2_modifiers};
9326 for (int J = 0; J < 3; ++J) {
9327 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9328 if (OpIdx == -1)
9329 // Some instructions, e.g. v_interp_p2_f16 in GFX9, have src0, src2, but
9330 // no src1. So continue instead of break.
9331 continue;
9332
9333 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9334 uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
9335
9336 if ((OpSel & (1 << J)) != 0)
9337 ModVal |= SISrcMods::OP_SEL_0;
9338 // op_sel[3] is encoded in src0_modifiers.
9339 if (ModOps[J] == AMDGPU::OpName::src0_modifiers && (OpSel & (1 << 3)) != 0)
9340 ModVal |= SISrcMods::DST_OP_SEL;
9341
9342 Inst.getOperand(ModIdx).setImm(ModVal);
9343 }
9344}
9345
9346void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
9347{
9348 OptionalImmIndexMap OptionalIdx;
9349 unsigned Opc = Inst.getOpcode();
9350
9351 unsigned I = 1;
9352 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9353 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9354 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9355 }
9356
9357 for (unsigned E = Operands.size(); I != E; ++I) {
9358 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9360 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9361 } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
9362 Op.isInterpAttrChan()) {
9363 Inst.addOperand(MCOperand::createImm(Op.getImm()));
9364 } else if (Op.isImmModifier()) {
9365 OptionalIdx[Op.getImmTy()] = I;
9366 } else {
9367 llvm_unreachable("unhandled operand type");
9368 }
9369 }
9370
9371 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::high))
9372 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9373 AMDGPUOperand::ImmTyHigh);
9374
9375 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9376 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9377 AMDGPUOperand::ImmTyClamp);
9378
9379 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9380 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9381 AMDGPUOperand::ImmTyOModSI);
9382
9383 // Some v_interp instructions use op_sel[3] for dst.
9384 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
9385 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9386 AMDGPUOperand::ImmTyOpSel);
9387 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9388 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9389
9390 cvtOpSelHelper(Inst, OpSel);
9391 }
9392}
9393
9394void AMDGPUAsmParser::cvtVINTERP(MCInst &Inst, const OperandVector &Operands)
9395{
9396 OptionalImmIndexMap OptionalIdx;
9397 unsigned Opc = Inst.getOpcode();
9398
9399 unsigned I = 1;
9400 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9401 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9402 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9403 }
9404
9405 for (unsigned E = Operands.size(); I != E; ++I) {
9406 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9408 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9409 } else if (Op.isImmModifier()) {
9410 OptionalIdx[Op.getImmTy()] = I;
9411 } else {
9412 llvm_unreachable("unhandled operand type");
9413 }
9414 }
9415
9416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClamp);
9417
9418 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9419 if (OpSelIdx != -1)
9420 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
9421
9422 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyWaitEXP);
9423
9424 if (OpSelIdx == -1)
9425 return;
9426
9427 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
9428 cvtOpSelHelper(Inst, OpSel);
9429}
9430
9431void AMDGPUAsmParser::cvtScaledMFMA(MCInst &Inst,
9432 const OperandVector &Operands) {
9433 OptionalImmIndexMap OptionalIdx;
9434 unsigned Opc = Inst.getOpcode();
9435 unsigned I = 1;
9436 int CbszOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::cbsz);
9437
9438 const MCInstrDesc &Desc = MII.get(Opc);
9439
9440 for (unsigned J = 0; J < Desc.getNumDefs(); ++J)
9441 static_cast<AMDGPUOperand &>(*Operands[I++]).addRegOperands(Inst, 1);
9442
9443 for (unsigned E = Operands.size(); I != E; ++I) {
9444 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[I]);
9445 int NumOperands = Inst.getNumOperands();
9446 // The order of operands in MCInst and parsed operands are different.
9447 // Adding dummy cbsz and blgp operands at corresponding MCInst operand
9448 // indices for parsing scale values correctly.
9449 if (NumOperands == CbszOpIdx) {
9452 }
9453 if (isRegOrImmWithInputMods(Desc, NumOperands)) {
9454 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9455 } else if (Op.isImmModifier()) {
9456 OptionalIdx[Op.getImmTy()] = I;
9457 } else {
9458 Op.addRegOrImmOperands(Inst, 1);
9459 }
9460 }
9461
9462 // Insert CBSZ and BLGP operands for F8F6F4 variants
9463 auto CbszIdx = OptionalIdx.find(AMDGPUOperand::ImmTyCBSZ);
9464 if (CbszIdx != OptionalIdx.end()) {
9465 int CbszVal = ((AMDGPUOperand &)*Operands[CbszIdx->second]).getImm();
9466 Inst.getOperand(CbszOpIdx).setImm(CbszVal);
9467 }
9468
9469 int BlgpOpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp);
9470 auto BlgpIdx = OptionalIdx.find(AMDGPUOperand::ImmTyBLGP);
9471 if (BlgpIdx != OptionalIdx.end()) {
9472 int BlgpVal = ((AMDGPUOperand &)*Operands[BlgpIdx->second]).getImm();
9473 Inst.getOperand(BlgpOpIdx).setImm(BlgpVal);
9474 }
9475
9476 // Add dummy src_modifiers
9479
9480 // Handle op_sel fields
9481
9482 unsigned OpSel = 0;
9483 auto OpselIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSel);
9484 if (OpselIdx != OptionalIdx.end()) {
9485 OpSel = static_cast<const AMDGPUOperand &>(*Operands[OpselIdx->second])
9486 .getImm();
9487 }
9488
9489 unsigned OpSelHi = 0;
9490 auto OpselHiIdx = OptionalIdx.find(AMDGPUOperand::ImmTyOpSelHi);
9491 if (OpselHiIdx != OptionalIdx.end()) {
9492 OpSelHi = static_cast<const AMDGPUOperand &>(*Operands[OpselHiIdx->second])
9493 .getImm();
9494 }
9495 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9496 AMDGPU::OpName::src1_modifiers};
9497
9498 for (unsigned J = 0; J < 2; ++J) {
9499 unsigned ModVal = 0;
9500 if (OpSel & (1 << J))
9501 ModVal |= SISrcMods::OP_SEL_0;
9502 if (OpSelHi & (1 << J))
9503 ModVal |= SISrcMods::OP_SEL_1;
9504
9505 const int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9506 Inst.getOperand(ModIdx).setImm(ModVal);
9507 }
9508}
9509
9510void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
9511 OptionalImmIndexMap &OptionalIdx) {
9512 unsigned Opc = Inst.getOpcode();
9513
9514 unsigned I = 1;
9515 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9516 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
9517 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
9518 }
9519
9520 for (unsigned E = Operands.size(); I != E; ++I) {
9521 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
9523 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9524 } else if (Op.isImmModifier()) {
9525 OptionalIdx[Op.getImmTy()] = I;
9526 } else {
9527 Op.addRegOrImmOperands(Inst, 1);
9528 }
9529 }
9530
9531 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::scale_sel))
9532 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9533 AMDGPUOperand::ImmTyScaleSel);
9534
9535 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9536 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9537 AMDGPUOperand::ImmTyClamp);
9538
9539 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
9540 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in))
9541 Inst.addOperand(Inst.getOperand(0));
9542 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9543 AMDGPUOperand::ImmTyByteSel);
9544 }
9545
9546 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
9547 addOptionalImmOperand(Inst, Operands, OptionalIdx,
9548 AMDGPUOperand::ImmTyOModSI);
9549
9550 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
9551 // it has src2 register operand that is tied to dst operand
9552 // we don't allow modifiers for this operand in assembler so src2_modifiers
9553 // should be 0.
9554 if (isMAC(Opc)) {
9555 auto *it = Inst.begin();
9556 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
9557 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
9558 ++it;
9559 // Copy the operand to ensure it's not invalidated when Inst grows.
9560 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
9561 }
9562}
9563
9564void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
9565 OptionalImmIndexMap OptionalIdx;
9566 cvtVOP3(Inst, Operands, OptionalIdx);
9567}
9568
9569void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
9570 OptionalImmIndexMap &OptIdx) {
9571 const int Opc = Inst.getOpcode();
9572 const MCInstrDesc &Desc = MII.get(Opc);
9573
9574 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
9575
9576 if (Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_F16_vi ||
9577 Opc == AMDGPU::V_CVT_SCALEF32_PK_FP4_BF16_vi ||
9578 Opc == AMDGPU::V_CVT_SR_BF8_F32_vi ||
9579 Opc == AMDGPU::V_CVT_SR_FP8_F32_vi ||
9580 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx11 ||
9581 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx11 ||
9582 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_gfx12 ||
9583 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_gfx12) {
9584 Inst.addOperand(MCOperand::createImm(0)); // Placeholder for src2_mods
9585 Inst.addOperand(Inst.getOperand(0));
9586 }
9587
9588 // Adding vdst_in operand is already covered for these DPP instructions in
9589 // cvtVOP3DPP.
9590 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vdst_in) &&
9591 !(Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx11 ||
9592 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx11 ||
9593 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx11 ||
9594 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx11 ||
9595 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx11 ||
9596 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx11 ||
9597 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx11 ||
9598 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx11 ||
9599 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx11 ||
9600 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx11 ||
9601 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx11 ||
9602 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx11 ||
9603 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp_gfx12 ||
9604 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp_gfx12 ||
9605 Opc == AMDGPU::V_CVT_PK_BF8_F32_t16_e64_dpp8_gfx12 ||
9606 Opc == AMDGPU::V_CVT_PK_FP8_F32_t16_e64_dpp8_gfx12 ||
9607 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp_gfx12 ||
9608 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp_gfx12 ||
9609 Opc == AMDGPU::V_CVT_PK_BF8_F32_fake16_e64_dpp8_gfx12 ||
9610 Opc == AMDGPU::V_CVT_PK_FP8_F32_fake16_e64_dpp8_gfx12 ||
9611 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12 ||
9612 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
9613 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp_gfx1250 ||
9614 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx1250_e64_dpp8_gfx1250 ||
9615 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
9616 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
9617 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp_gfx1250 ||
9618 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp_gfx1250 ||
9619 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_dpp8_gfx1250 ||
9620 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_dpp8_gfx1250 ||
9621 Opc == AMDGPU::V_CVT_SR_FP8_F16_t16_e64_gfx1250 ||
9622 Opc == AMDGPU::V_CVT_SR_FP8_F16_fake16_e64_gfx1250 ||
9623 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp_gfx1250 ||
9624 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp_gfx1250 ||
9625 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_dpp8_gfx1250 ||
9626 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_dpp8_gfx1250 ||
9627 Opc == AMDGPU::V_CVT_SR_BF8_F16_t16_e64_gfx1250 ||
9628 Opc == AMDGPU::V_CVT_SR_BF8_F16_fake16_e64_gfx1250)) {
9629 Inst.addOperand(Inst.getOperand(0));
9630 }
9631
9632 int BitOp3Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::bitop3);
9633 if (BitOp3Idx != -1) {
9634 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9635 }
9636
9637 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
9638 // instruction, and then figure out where to actually put the modifiers
9639
9640 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
9641 if (OpSelIdx != -1) {
9642 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
9643 }
9644
9645 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
9646 if (OpSelHiIdx != -1) {
9647 int DefaultVal = IsPacked ? -1 : 0;
9648 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
9649 DefaultVal);
9650 }
9651
9652 int MatrixAFMTIdx =
9653 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_fmt);
9654 if (MatrixAFMTIdx != -1) {
9655 addOptionalImmOperand(Inst, Operands, OptIdx,
9656 AMDGPUOperand::ImmTyMatrixAFMT, 0);
9657 }
9658
9659 int MatrixBFMTIdx =
9660 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_fmt);
9661 if (MatrixBFMTIdx != -1) {
9662 addOptionalImmOperand(Inst, Operands, OptIdx,
9663 AMDGPUOperand::ImmTyMatrixBFMT, 0);
9664 }
9665
9666 int MatrixAScaleIdx =
9667 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale);
9668 if (MatrixAScaleIdx != -1) {
9669 addOptionalImmOperand(Inst, Operands, OptIdx,
9670 AMDGPUOperand::ImmTyMatrixAScale, 0);
9671 }
9672
9673 int MatrixBScaleIdx =
9674 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale);
9675 if (MatrixBScaleIdx != -1) {
9676 addOptionalImmOperand(Inst, Operands, OptIdx,
9677 AMDGPUOperand::ImmTyMatrixBScale, 0);
9678 }
9679
9680 int MatrixAScaleFmtIdx =
9681 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_a_scale_fmt);
9682 if (MatrixAScaleFmtIdx != -1) {
9683 addOptionalImmOperand(Inst, Operands, OptIdx,
9684 AMDGPUOperand::ImmTyMatrixAScaleFmt, 0);
9685 }
9686
9687 int MatrixBScaleFmtIdx =
9688 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::matrix_b_scale_fmt);
9689 if (MatrixBScaleFmtIdx != -1) {
9690 addOptionalImmOperand(Inst, Operands, OptIdx,
9691 AMDGPUOperand::ImmTyMatrixBScaleFmt, 0);
9692 }
9693
9694 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_a_reuse))
9695 addOptionalImmOperand(Inst, Operands, OptIdx,
9696 AMDGPUOperand::ImmTyMatrixAReuse, 0);
9697
9698 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::matrix_b_reuse))
9699 addOptionalImmOperand(Inst, Operands, OptIdx,
9700 AMDGPUOperand::ImmTyMatrixBReuse, 0);
9701
9702 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
9703 if (NegLoIdx != -1)
9704 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
9705
9706 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
9707 if (NegHiIdx != -1)
9708 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
9709
9710 const AMDGPU::OpName Ops[] = {AMDGPU::OpName::src0, AMDGPU::OpName::src1,
9711 AMDGPU::OpName::src2};
9712 const AMDGPU::OpName ModOps[] = {AMDGPU::OpName::src0_modifiers,
9713 AMDGPU::OpName::src1_modifiers,
9714 AMDGPU::OpName::src2_modifiers};
9715
9716 unsigned OpSel = 0;
9717 unsigned OpSelHi = 0;
9718 unsigned NegLo = 0;
9719 unsigned NegHi = 0;
9720
9721 if (OpSelIdx != -1)
9722 OpSel = Inst.getOperand(OpSelIdx).getImm();
9723
9724 if (OpSelHiIdx != -1)
9725 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
9726
9727 if (NegLoIdx != -1)
9728 NegLo = Inst.getOperand(NegLoIdx).getImm();
9729
9730 if (NegHiIdx != -1)
9731 NegHi = Inst.getOperand(NegHiIdx).getImm();
9732
9733 for (int J = 0; J < 3; ++J) {
9734 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
9735 if (OpIdx == -1)
9736 break;
9737
9738 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
9739
9740 if (ModIdx == -1)
9741 continue;
9742
9743 uint32_t ModVal = 0;
9744
9745 const MCOperand &SrcOp = Inst.getOperand(OpIdx);
9746 if (SrcOp.isReg() && getMRI()
9747 ->getRegClass(AMDGPU::VGPR_16RegClassID)
9748 .contains(SrcOp.getReg())) {
9749 bool VGPRSuffixIsHi = AMDGPU::isHi16Reg(SrcOp.getReg(), *getMRI());
9750 if (VGPRSuffixIsHi)
9751 ModVal |= SISrcMods::OP_SEL_0;
9752 } else {
9753 if ((OpSel & (1 << J)) != 0)
9754 ModVal |= SISrcMods::OP_SEL_0;
9755 }
9756
9757 if ((OpSelHi & (1 << J)) != 0)
9758 ModVal |= SISrcMods::OP_SEL_1;
9759
9760 if ((NegLo & (1 << J)) != 0)
9761 ModVal |= SISrcMods::NEG;
9762
9763 if ((NegHi & (1 << J)) != 0)
9764 ModVal |= SISrcMods::NEG_HI;
9765
9766 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
9767 }
9768}
9769
9770void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
9771 OptionalImmIndexMap OptIdx;
9772 cvtVOP3(Inst, Operands, OptIdx);
9773 cvtVOP3P(Inst, Operands, OptIdx);
9774}
9775
9777 unsigned i, unsigned Opc,
9778 AMDGPU::OpName OpName) {
9779 if (AMDGPU::getNamedOperandIdx(Opc, OpName) != -1)
9780 ((AMDGPUOperand &)*Operands[i]).addRegOrImmWithFPInputModsOperands(Inst, 2);
9781 else
9782 ((AMDGPUOperand &)*Operands[i]).addRegOperands(Inst, 1);
9783}
9784
9785void AMDGPUAsmParser::cvtSWMMAC(MCInst &Inst, const OperandVector &Operands) {
9786 unsigned Opc = Inst.getOpcode();
9787
9788 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1);
9789 addSrcModifiersAndSrc(Inst, Operands, 2, Opc, AMDGPU::OpName::src0_modifiers);
9790 addSrcModifiersAndSrc(Inst, Operands, 3, Opc, AMDGPU::OpName::src1_modifiers);
9791 ((AMDGPUOperand &)*Operands[1]).addRegOperands(Inst, 1); // srcTiedDef
9792 ((AMDGPUOperand &)*Operands[4]).addRegOperands(Inst, 1); // src2
9793
9794 OptionalImmIndexMap OptIdx;
9795 for (unsigned i = 5; i < Operands.size(); ++i) {
9796 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
9797 OptIdx[Op.getImmTy()] = i;
9798 }
9799
9800 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_8bit))
9801 addOptionalImmOperand(Inst, Operands, OptIdx,
9802 AMDGPUOperand::ImmTyIndexKey8bit);
9803
9804 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_16bit))
9805 addOptionalImmOperand(Inst, Operands, OptIdx,
9806 AMDGPUOperand::ImmTyIndexKey16bit);
9807
9808 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::index_key_32bit))
9809 addOptionalImmOperand(Inst, Operands, OptIdx,
9810 AMDGPUOperand::ImmTyIndexKey32bit);
9811
9812 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
9813 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyClamp);
9814
9815 cvtVOP3P(Inst, Operands, OptIdx);
9816}
9817
9818//===----------------------------------------------------------------------===//
9819// VOPD
9820//===----------------------------------------------------------------------===//
9821
9822ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
9823 if (!hasVOPD(getSTI()))
9824 return ParseStatus::NoMatch;
9825
9826 if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
9827 SMLoc S = getLoc();
9828 lex();
9829 lex();
9830 Operands.push_back(AMDGPUOperand::CreateToken(this, "::", S));
9831 SMLoc OpYLoc = getLoc();
9832 StringRef OpYName;
9833 if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
9834 Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
9835 return ParseStatus::Success;
9836 }
9837 return Error(OpYLoc, "expected a VOPDY instruction after ::");
9838 }
9839 return ParseStatus::NoMatch;
9840}
9841
9842// Create VOPD MCInst operands using parsed assembler operands.
9843void AMDGPUAsmParser::cvtVOPD(MCInst &Inst, const OperandVector &Operands) {
9844 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
9845
9846 auto addOp = [&](uint16_t ParsedOprIdx) { // NOLINT:function pointer
9847 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[ParsedOprIdx]);
9849 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
9850 return;
9851 }
9852 if (Op.isReg()) {
9853 Op.addRegOperands(Inst, 1);
9854 return;
9855 }
9856 if (Op.isImm()) {
9857 Op.addImmOperands(Inst, 1);
9858 return;
9859 }
9860 llvm_unreachable("Unhandled operand type in cvtVOPD");
9861 };
9862
9863 const auto &InstInfo = getVOPDInstInfo(Inst.getOpcode(), &MII);
9864
9865 // MCInst operands are ordered as follows:
9866 // dstX, dstY, src0X [, other OpX operands], src0Y [, other OpY operands]
9867
9868 for (auto CompIdx : VOPD::COMPONENTS) {
9869 addOp(InstInfo[CompIdx].getIndexOfDstInParsedOperands());
9870 }
9871
9872 for (auto CompIdx : VOPD::COMPONENTS) {
9873 const auto &CInfo = InstInfo[CompIdx];
9874 auto CompSrcOperandsNum = InstInfo[CompIdx].getCompParsedSrcOperandsNum();
9875 for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOperandsNum; ++CompSrcIdx)
9876 addOp(CInfo.getIndexOfSrcInParsedOperands(CompSrcIdx));
9877 if (CInfo.hasSrc2Acc())
9878 addOp(CInfo.getIndexOfDstInParsedOperands());
9879 }
9880
9881 int BitOp3Idx =
9882 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::bitop3);
9883 if (BitOp3Idx != -1) {
9884 OptionalImmIndexMap OptIdx;
9885 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands.back());
9886 if (Op.isImm())
9887 OptIdx[Op.getImmTy()] = Operands.size() - 1;
9888
9889 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyBitOp3);
9890 }
9891}
9892
9893//===----------------------------------------------------------------------===//
9894// dpp
9895//===----------------------------------------------------------------------===//
9896
9897bool AMDGPUOperand::isDPP8() const {
9898 return isImmTy(ImmTyDPP8);
9899}
9900
9901bool AMDGPUOperand::isDPPCtrl() const {
9902 using namespace AMDGPU::DPP;
9903
9904 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
9905 if (result) {
9906 int64_t Imm = getImm();
9907 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
9908 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
9909 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
9910 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
9911 (Imm == DppCtrl::WAVE_SHL1) ||
9912 (Imm == DppCtrl::WAVE_ROL1) ||
9913 (Imm == DppCtrl::WAVE_SHR1) ||
9914 (Imm == DppCtrl::WAVE_ROR1) ||
9915 (Imm == DppCtrl::ROW_MIRROR) ||
9916 (Imm == DppCtrl::ROW_HALF_MIRROR) ||
9917 (Imm == DppCtrl::BCAST15) ||
9918 (Imm == DppCtrl::BCAST31) ||
9919 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
9920 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
9921 }
9922 return false;
9923}
9924
9925//===----------------------------------------------------------------------===//
9926// mAI
9927//===----------------------------------------------------------------------===//
9928
9929bool AMDGPUOperand::isBLGP() const {
9930 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
9931}
9932
9933bool AMDGPUOperand::isS16Imm() const {
9934 return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
9935}
9936
9937bool AMDGPUOperand::isU16Imm() const {
9938 return isImmLiteral() && isUInt<16>(getImm());
9939}
9940
9941//===----------------------------------------------------------------------===//
9942// dim
9943//===----------------------------------------------------------------------===//
9944
9945bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
9946 // We want to allow "dim:1D" etc.,
9947 // but the initial 1 is tokenized as an integer.
9948 std::string Token;
9949 if (isToken(AsmToken::Integer)) {
9950 SMLoc Loc = getToken().getEndLoc();
9951 Token = std::string(getTokenStr());
9952 lex();
9953 if (getLoc() != Loc)
9954 return false;
9955 }
9956
9957 StringRef Suffix;
9958 if (!parseId(Suffix))
9959 return false;
9960 Token += Suffix;
9961
9962 StringRef DimId = Token;
9963 DimId.consume_front("SQ_RSRC_IMG_");
9964
9965 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
9966 if (!DimInfo)
9967 return false;
9968
9969 Encoding = DimInfo->Encoding;
9970 return true;
9971}
9972
9973ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
9974 if (!isGFX10Plus())
9975 return ParseStatus::NoMatch;
9976
9977 SMLoc S = getLoc();
9978
9979 if (!trySkipId("dim", AsmToken::Colon))
9980 return ParseStatus::NoMatch;
9981
9982 unsigned Encoding;
9983 SMLoc Loc = getLoc();
9984 if (!parseDimId(Encoding))
9985 return Error(Loc, "invalid dim value");
9986
9987 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
9988 AMDGPUOperand::ImmTyDim));
9989 return ParseStatus::Success;
9990}
9991
9992//===----------------------------------------------------------------------===//
9993// dpp
9994//===----------------------------------------------------------------------===//
9995
9996ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
9997 SMLoc S = getLoc();
9998
9999 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
10000 return ParseStatus::NoMatch;
10001
10002 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
10003
10004 int64_t Sels[8];
10005
10006 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10007 return ParseStatus::Failure;
10008
10009 for (size_t i = 0; i < 8; ++i) {
10010 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10011 return ParseStatus::Failure;
10012
10013 SMLoc Loc = getLoc();
10014 if (getParser().parseAbsoluteExpression(Sels[i]))
10015 return ParseStatus::Failure;
10016 if (0 > Sels[i] || 7 < Sels[i])
10017 return Error(Loc, "expected a 3-bit value");
10018 }
10019
10020 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10021 return ParseStatus::Failure;
10022
10023 unsigned DPP8 = 0;
10024 for (size_t i = 0; i < 8; ++i)
10025 DPP8 |= (Sels[i] << (i * 3));
10026
10027 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
10028 return ParseStatus::Success;
10029}
10030
10031bool
10032AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
10033 const OperandVector &Operands) {
10034 if (Ctrl == "row_newbcast")
10035 return isGFX90A();
10036
10037 if (Ctrl == "row_share" ||
10038 Ctrl == "row_xmask")
10039 return isGFX10Plus();
10040
10041 if (Ctrl == "wave_shl" ||
10042 Ctrl == "wave_shr" ||
10043 Ctrl == "wave_rol" ||
10044 Ctrl == "wave_ror" ||
10045 Ctrl == "row_bcast")
10046 return isVI() || isGFX9();
10047
10048 return Ctrl == "row_mirror" ||
10049 Ctrl == "row_half_mirror" ||
10050 Ctrl == "quad_perm" ||
10051 Ctrl == "row_shl" ||
10052 Ctrl == "row_shr" ||
10053 Ctrl == "row_ror";
10054}
10055
10056int64_t
10057AMDGPUAsmParser::parseDPPCtrlPerm() {
10058 // quad_perm:[%d,%d,%d,%d]
10059
10060 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
10061 return -1;
10062
10063 int64_t Val = 0;
10064 for (int i = 0; i < 4; ++i) {
10065 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
10066 return -1;
10067
10068 int64_t Temp;
10069 SMLoc Loc = getLoc();
10070 if (getParser().parseAbsoluteExpression(Temp))
10071 return -1;
10072 if (Temp < 0 || Temp > 3) {
10073 Error(Loc, "expected a 2-bit value");
10074 return -1;
10075 }
10076
10077 Val += (Temp << i * 2);
10078 }
10079
10080 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
10081 return -1;
10082
10083 return Val;
10084}
10085
10086int64_t
10087AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
10088 using namespace AMDGPU::DPP;
10089
10090 // sel:%d
10091
10092 int64_t Val;
10093 SMLoc Loc = getLoc();
10094
10095 if (getParser().parseAbsoluteExpression(Val))
10096 return -1;
10097
10098 struct DppCtrlCheck {
10099 int64_t Ctrl;
10100 int Lo;
10101 int Hi;
10102 };
10103
10104 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
10105 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1})
10106 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1})
10107 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1})
10108 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1})
10109 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15})
10110 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15})
10111 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15})
10112 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
10113 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
10114 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
10115 .Default({-1, 0, 0});
10116
10117 bool Valid;
10118 if (Check.Ctrl == -1) {
10119 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
10120 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
10121 } else {
10122 Valid = Check.Lo <= Val && Val <= Check.Hi;
10123 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
10124 }
10125
10126 if (!Valid) {
10127 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
10128 return -1;
10129 }
10130
10131 return Val;
10132}
10133
10134ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
10135 using namespace AMDGPU::DPP;
10136
10137 if (!isToken(AsmToken::Identifier) ||
10138 !isSupportedDPPCtrl(getTokenStr(), Operands))
10139 return ParseStatus::NoMatch;
10140
10141 SMLoc S = getLoc();
10142 int64_t Val = -1;
10143 StringRef Ctrl;
10144
10145 parseId(Ctrl);
10146
10147 if (Ctrl == "row_mirror") {
10148 Val = DppCtrl::ROW_MIRROR;
10149 } else if (Ctrl == "row_half_mirror") {
10150 Val = DppCtrl::ROW_HALF_MIRROR;
10151 } else {
10152 if (skipToken(AsmToken::Colon, "expected a colon")) {
10153 if (Ctrl == "quad_perm") {
10154 Val = parseDPPCtrlPerm();
10155 } else {
10156 Val = parseDPPCtrlSel(Ctrl);
10157 }
10158 }
10159 }
10160
10161 if (Val == -1)
10162 return ParseStatus::Failure;
10163
10164 Operands.push_back(
10165 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
10166 return ParseStatus::Success;
10167}
10168
10169void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
10170 bool IsDPP8) {
10171 OptionalImmIndexMap OptionalIdx;
10172 unsigned Opc = Inst.getOpcode();
10173 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10174
10175 // MAC instructions are special because they have 'old'
10176 // operand which is not tied to dst (but assumed to be).
10177 // They also have dummy unused src2_modifiers.
10178 int OldIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::old);
10179 int Src2ModIdx =
10180 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers);
10181 bool IsMAC = OldIdx != -1 && Src2ModIdx != -1 &&
10182 Desc.getOperandConstraint(OldIdx, MCOI::TIED_TO) == -1;
10183
10184 unsigned I = 1;
10185 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10186 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10187 }
10188
10189 int Fi = 0;
10190 int VdstInIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in);
10191 bool IsVOP3CvtSrDpp = Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp8_gfx12 ||
10192 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp8_gfx12 ||
10193 Opc == AMDGPU::V_CVT_SR_BF8_F32_gfx12_e64_dpp_gfx12 ||
10194 Opc == AMDGPU::V_CVT_SR_FP8_F32_gfx12_e64_dpp_gfx12;
10195
10196 for (unsigned E = Operands.size(); I != E; ++I) {
10197
10198 if (IsMAC) {
10199 int NumOperands = Inst.getNumOperands();
10200 if (OldIdx == NumOperands) {
10201 // Handle old operand
10202 constexpr int DST_IDX = 0;
10203 Inst.addOperand(Inst.getOperand(DST_IDX));
10204 } else if (Src2ModIdx == NumOperands) {
10205 // Add unused dummy src2_modifiers
10207 }
10208 }
10209
10210 if (VdstInIdx == static_cast<int>(Inst.getNumOperands())) {
10211 Inst.addOperand(Inst.getOperand(0));
10212 }
10213
10214 if (IsVOP3CvtSrDpp) {
10215 if (Src2ModIdx == static_cast<int>(Inst.getNumOperands())) {
10217 Inst.addOperand(MCOperand::createReg(MCRegister()));
10218 }
10219 }
10220
10221 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10223 if (TiedTo != -1) {
10224 assert((unsigned)TiedTo < Inst.getNumOperands());
10225 // handle tied old or src2 for MAC instructions
10226 Inst.addOperand(Inst.getOperand(TiedTo));
10227 }
10228 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10229 // Add the register arguments
10230 if (IsDPP8 && Op.isDppFI()) {
10231 Fi = Op.getImm();
10232 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10233 Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
10234 } else if (Op.isReg()) {
10235 Op.addRegOperands(Inst, 1);
10236 } else if (Op.isImm() &&
10237 Desc.operands()[Inst.getNumOperands()].RegClass != -1) {
10238 Op.addImmOperands(Inst, 1);
10239 } else if (Op.isImm()) {
10240 OptionalIdx[Op.getImmTy()] = I;
10241 } else {
10242 llvm_unreachable("unhandled operand type");
10243 }
10244 }
10245
10246 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp) && !IsVOP3CvtSrDpp)
10247 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10248 AMDGPUOperand::ImmTyClamp);
10249
10250 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::byte_sel)) {
10251 if (VdstInIdx == static_cast<int>(Inst.getNumOperands()))
10252 Inst.addOperand(Inst.getOperand(0));
10253 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10254 AMDGPUOperand::ImmTyByteSel);
10255 }
10256
10257 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10258 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
10259
10260 if (Desc.TSFlags & SIInstrFlags::VOP3P)
10261 cvtVOP3P(Inst, Operands, OptionalIdx);
10262 else if (Desc.TSFlags & SIInstrFlags::VOP3)
10263 cvtVOP3OpSel(Inst, Operands, OptionalIdx);
10264 else if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel)) {
10265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOpSel);
10266 }
10267
10268 if (IsDPP8) {
10269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDPP8);
10270 using namespace llvm::AMDGPU::DPP;
10271 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10272 } else {
10273 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppCtrl, 0xe4);
10274 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10275 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10276 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10277
10278 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
10279 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10280 AMDGPUOperand::ImmTyDppFI);
10281 }
10282}
10283
10284void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
10285 OptionalImmIndexMap OptionalIdx;
10286
10287 unsigned I = 1;
10288 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10289 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10290 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10291 }
10292
10293 int Fi = 0;
10294 for (unsigned E = Operands.size(); I != E; ++I) {
10295 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
10297 if (TiedTo != -1) {
10298 assert((unsigned)TiedTo < Inst.getNumOperands());
10299 // handle tied old or src2 for MAC instructions
10300 Inst.addOperand(Inst.getOperand(TiedTo));
10301 }
10302 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10303 // Add the register arguments
10304 if (Op.isReg() && validateVccOperand(Op.getReg())) {
10305 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
10306 // Skip it.
10307 continue;
10308 }
10309
10310 if (IsDPP8) {
10311 if (Op.isDPP8()) {
10312 Op.addImmOperands(Inst, 1);
10313 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
10314 Op.addRegWithFPInputModsOperands(Inst, 2);
10315 } else if (Op.isDppFI()) {
10316 Fi = Op.getImm();
10317 } else if (Op.isReg()) {
10318 Op.addRegOperands(Inst, 1);
10319 } else {
10320 llvm_unreachable("Invalid operand type");
10321 }
10322 } else {
10324 Op.addRegWithFPInputModsOperands(Inst, 2);
10325 } else if (Op.isReg()) {
10326 Op.addRegOperands(Inst, 1);
10327 } else if (Op.isDPPCtrl()) {
10328 Op.addImmOperands(Inst, 1);
10329 } else if (Op.isImm()) {
10330 // Handle optional arguments
10331 OptionalIdx[Op.getImmTy()] = I;
10332 } else {
10333 llvm_unreachable("Invalid operand type");
10334 }
10335 }
10336 }
10337
10338 if (IsDPP8) {
10339 using namespace llvm::AMDGPU::DPP;
10340 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
10341 } else {
10342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
10343 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
10344 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
10345 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
10346 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10347 AMDGPUOperand::ImmTyDppFI);
10348 }
10349 }
10350}
10351
10352//===----------------------------------------------------------------------===//
10353// sdwa
10354//===----------------------------------------------------------------------===//
10355
10356ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
10357 StringRef Prefix,
10358 AMDGPUOperand::ImmTy Type) {
10359 return parseStringOrIntWithPrefix(
10360 Operands, Prefix,
10361 {"BYTE_0", "BYTE_1", "BYTE_2", "BYTE_3", "WORD_0", "WORD_1", "DWORD"},
10362 Type);
10363}
10364
10365ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
10366 return parseStringOrIntWithPrefix(
10367 Operands, "dst_unused", {"UNUSED_PAD", "UNUSED_SEXT", "UNUSED_PRESERVE"},
10368 AMDGPUOperand::ImmTySDWADstUnused);
10369}
10370
10371void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
10372 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
10373}
10374
10375void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
10376 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
10377}
10378
10379void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
10380 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
10381}
10382
10383void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
10384 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
10385}
10386
10387void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
10388 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
10389}
10390
10391void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
10392 uint64_t BasicInstType,
10393 bool SkipDstVcc,
10394 bool SkipSrcVcc) {
10395 using namespace llvm::AMDGPU::SDWA;
10396
10397 OptionalImmIndexMap OptionalIdx;
10398 bool SkipVcc = SkipDstVcc || SkipSrcVcc;
10399 bool SkippedVcc = false;
10400
10401 unsigned I = 1;
10402 const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
10403 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
10404 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
10405 }
10406
10407 for (unsigned E = Operands.size(); I != E; ++I) {
10408 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
10409 if (SkipVcc && !SkippedVcc && Op.isReg() &&
10410 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
10411 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
10412 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
10413 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
10414 // Skip VCC only if we didn't skip it on previous iteration.
10415 // Note that src0 and src1 occupy 2 slots each because of modifiers.
10416 if (BasicInstType == SIInstrFlags::VOP2 &&
10417 ((SkipDstVcc && Inst.getNumOperands() == 1) ||
10418 (SkipSrcVcc && Inst.getNumOperands() == 5))) {
10419 SkippedVcc = true;
10420 continue;
10421 }
10422 if (BasicInstType == SIInstrFlags::VOPC && Inst.getNumOperands() == 0) {
10423 SkippedVcc = true;
10424 continue;
10425 }
10426 }
10428 Op.addRegOrImmWithInputModsOperands(Inst, 2);
10429 } else if (Op.isImm()) {
10430 // Handle optional arguments
10431 OptionalIdx[Op.getImmTy()] = I;
10432 } else {
10433 llvm_unreachable("Invalid operand type");
10434 }
10435 SkippedVcc = false;
10436 }
10437
10438 const unsigned Opc = Inst.getOpcode();
10439 if (Opc != AMDGPU::V_NOP_sdwa_gfx10 && Opc != AMDGPU::V_NOP_sdwa_gfx9 &&
10440 Opc != AMDGPU::V_NOP_sdwa_vi) {
10441 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
10442 switch (BasicInstType) {
10443 case SIInstrFlags::VOP1:
10444 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::clamp))
10445 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10446 AMDGPUOperand::ImmTyClamp, 0);
10447
10448 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::omod))
10449 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10450 AMDGPUOperand::ImmTyOModSI, 0);
10451
10452 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
10453 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10454 AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10455
10456 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
10457 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10458 AMDGPUOperand::ImmTySDWADstUnused,
10459 DstUnused::UNUSED_PRESERVE);
10460
10461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10462 break;
10463
10464 case SIInstrFlags::VOP2:
10465 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10466 AMDGPUOperand::ImmTyClamp, 0);
10467
10468 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
10469 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
10470
10471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
10472 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
10473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10475 break;
10476
10477 case SIInstrFlags::VOPC:
10478 if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
10479 addOptionalImmOperand(Inst, Operands, OptionalIdx,
10480 AMDGPUOperand::ImmTyClamp, 0);
10481 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
10482 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
10483 break;
10484
10485 default:
10486 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
10487 }
10488 }
10489
10490 // special case v_mac_{f16, f32}:
10491 // it has src2 register operand that is tied to dst operand
10492 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
10493 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
10494 auto *it = Inst.begin();
10495 std::advance(
10496 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
10497 Inst.insert(it, Inst.getOperand(0)); // src2 = dst
10498 }
10499}
10500
10501/// Force static initialization.
10502extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void
10507
10508#define GET_MATCHER_IMPLEMENTATION
10509#define GET_MNEMONIC_SPELL_CHECKER
10510#define GET_MNEMONIC_CHECKER
10511#include "AMDGPUGenAsmMatcher.inc"
10512
10513ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
10514 unsigned MCK) {
10515 switch (MCK) {
10516 case MCK_addr64:
10517 return parseTokenOp("addr64", Operands);
10518 case MCK_done:
10519 return parseNamedBit("done", Operands, AMDGPUOperand::ImmTyDone, true);
10520 case MCK_idxen:
10521 return parseTokenOp("idxen", Operands);
10522 case MCK_lds:
10523 return parseTokenOp("lds", Operands);
10524 case MCK_offen:
10525 return parseTokenOp("offen", Operands);
10526 case MCK_off:
10527 return parseTokenOp("off", Operands);
10528 case MCK_row_95_en:
10529 return parseNamedBit("row_en", Operands, AMDGPUOperand::ImmTyRowEn, true);
10530 case MCK_gds:
10531 return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
10532 case MCK_tfe:
10533 return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
10534 }
10535 return tryCustomParseOperand(Operands, MCK);
10536}
10537
10538// This function should be defined after auto-generated include so that we have
10539// MatchClassKind enum defined
10540unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
10541 unsigned Kind) {
10542 // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
10543 // But MatchInstructionImpl() expects to meet token and fails to validate
10544 // operand. This method checks if we are given immediate operand but expect to
10545 // get corresponding token.
10546 AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
10547 switch (Kind) {
10548 case MCK_addr64:
10549 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
10550 case MCK_gds:
10551 return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
10552 case MCK_lds:
10553 return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
10554 case MCK_idxen:
10555 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
10556 case MCK_offen:
10557 return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
10558 case MCK_tfe:
10559 return Operand.isTFE() ? Match_Success : Match_InvalidOperand;
10560 case MCK_done:
10561 return Operand.isDone() ? Match_Success : Match_InvalidOperand;
10562 case MCK_row_95_en:
10563 return Operand.isRowEn() ? Match_Success : Match_InvalidOperand;
10564 case MCK_SSrc_b32:
10565 // When operands have expression values, they will return true for isToken,
10566 // because it is not possible to distinguish between a token and an
10567 // expression at parse time. MatchInstructionImpl() will always try to
10568 // match an operand as a token, when isToken returns true, and when the
10569 // name of the expression is not a valid token, the match will fail,
10570 // so we need to handle it here.
10571 return Operand.isSSrc_b32() ? Match_Success : Match_InvalidOperand;
10572 case MCK_SSrc_f32:
10573 return Operand.isSSrc_f32() ? Match_Success : Match_InvalidOperand;
10574 case MCK_SOPPBrTarget:
10575 return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
10576 case MCK_VReg32OrOff:
10577 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
10578 case MCK_InterpSlot:
10579 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
10580 case MCK_InterpAttr:
10581 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
10582 case MCK_InterpAttrChan:
10583 return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
10584 case MCK_SReg_64:
10585 case MCK_SReg_64_XEXEC:
10586 // Null is defined as a 32-bit register but
10587 // it should also be enabled with 64-bit operands or larger.
10588 // The following code enables it for SReg_64 and larger operands
10589 // used as source and destination. Remaining source
10590 // operands are handled in isInlinableImm.
10591 case MCK_SReg_96:
10592 case MCK_SReg_128:
10593 case MCK_SReg_256:
10594 case MCK_SReg_512:
10595 return Operand.isNull() ? Match_Success : Match_InvalidOperand;
10596 default:
10597 return Match_InvalidOperand;
10598 }
10599}
10600
10601//===----------------------------------------------------------------------===//
10602// endpgm
10603//===----------------------------------------------------------------------===//
10604
10605ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
10606 SMLoc S = getLoc();
10607 int64_t Imm = 0;
10608
10609 if (!parseExpr(Imm)) {
10610 // The operand is optional, if not present default to 0
10611 Imm = 0;
10612 }
10613
10614 if (!isUInt<16>(Imm))
10615 return Error(S, "expected a 16-bit value");
10616
10617 Operands.push_back(
10618 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
10619 return ParseStatus::Success;
10620}
10621
10622bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
10623
10624//===----------------------------------------------------------------------===//
10625// Split Barrier
10626//===----------------------------------------------------------------------===//
10627
10628bool AMDGPUOperand::isSplitBarrier() const { return isInlinableImm(MVT::i32); }
#define Success
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
SmallVector< int16_t, MAX_SRC_OPERANDS_NUM > OperandIndices
static bool checkWriteLane(const MCInst &Inst)
static bool getRegNum(StringRef Str, unsigned &Num)
static void addSrcModifiersAndSrc(MCInst &Inst, const OperandVector &Operands, unsigned i, unsigned Opc, AMDGPU::OpName OpName)
static constexpr RegInfo RegularRegisters[]
static const RegInfo * getRegularRegInfo(StringRef Str)
static ArrayRef< unsigned > getAllVariants()
static OperandIndices getSrcOperandIndices(unsigned Opcode, bool AddMandatoryLiterals=false)
static int IsAGPROperand(const MCInst &Inst, AMDGPU::OpName Name, const MCRegisterInfo *MRI)
static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
static const fltSemantics * getFltSemantics(unsigned Size)
static bool isRegularReg(RegisterKind Kind)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser()
Force static initialization.
static bool ConvertOmodMul(int64_t &Mul)
#define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)
static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi)
static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT)
constexpr uint64_t MIMGFlags
static bool AMDGPUCheckMnemonic(StringRef Mnemonic, const FeatureBitset &AvailableFeatures, unsigned VariantID)
static void applyMnemonicAliases(StringRef &Mnemonic, const FeatureBitset &Features, unsigned VariantID)
constexpr unsigned MAX_SRC_OPERANDS_NUM
#define EXPR_RESOLVE_OR_ERROR(RESOLVED)
static bool ConvertOmodDiv(int64_t &Div)
static bool IsRevOpcode(const unsigned Opcode)
static bool encodeCnt(const AMDGPU::IsaVersion ISA, int64_t &IntVal, int64_t CntVal, bool Saturate, unsigned(*encode)(const IsaVersion &Version, unsigned, unsigned), unsigned(*decode)(const IsaVersion &Version, unsigned))
static MCRegister getSpecialRegForName(StringRef RegName)
static void addOptionalImmOperand(MCInst &Inst, const OperandVector &Operands, AMDGPUAsmParser::OptionalImmIndexMap &OptionalIdx, AMDGPUOperand::ImmTy ImmT, int64_t Default=0, std::optional< unsigned > InsertAt=std::nullopt)
static void cvtVOP3DstOpSelOnly(MCInst &Inst, const MCRegisterInfo &MRI)
static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum)
static const fltSemantics * getOpFltSemantics(uint8_t OperandType)
static bool isInvalidVOPDY(const OperandVector &Operands, uint64_t InvalidOprIdx)
static std::string AMDGPUMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS, unsigned VariantID=0)
static LLVM_READNONE unsigned encodeBitmaskPerm(const unsigned AndMask, const unsigned OrMask, const unsigned XorMask)
static bool isSafeTruncation(int64_t Val, unsigned Size)
AMDHSA kernel descriptor MCExpr struct for use in MC layer.
Provides AMDGPU specific target descriptions.
AMDGPU metadata definitions and in-memory representations.
AMDHSA kernel descriptor definitions.
static bool parseExpr(MCAsmParser &MCParser, const MCExpr *&Value, raw_ostream &Err)
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.
@ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32
This file declares a class to represent arbitrary precision floating point values and provide a varie...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_READNONE
Definition Compiler.h:315
#define LLVM_ABI
Definition Compiler.h:213
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132
@ Default
#define Check(C,...)
static llvm::Expected< InlineInfo > decode(DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr)
Decode an InlineInfo in Data at the specified offset.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
Loop::LoopBounds::Direction Direction
Definition LoopInfo.cpp:253
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static bool isReg(const MCInst &MI, unsigned OpNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Interface definition for SIInstrInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file implements the SmallBitVector class.
StringSet - A set-like wrapper for the StringMap.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, const llvm::StringTable &StandardNames, VectorLibrary VecLib)
Initialize the set of available library functions based on the specified target triple.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
BinaryOperator * Mul
static const char * getRegisterName(MCRegister Reg)
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createLit(LitModifier Lit, int64_t Value, MCContext &Ctx)
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5976
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
StringRef getString() const
Get the string for the current token, this includes all characters (for example, the quotes on string...
Definition MCAsmMacro.h:103
bool is(TokenKind K) const
Definition MCAsmMacro.h:75
Register getReg() const
Container class for subtarget features.
constexpr bool test(unsigned I) const
constexpr FeatureBitset & flip(unsigned I)
void printExpr(raw_ostream &, const MCExpr &) const
virtual void Initialize(MCAsmParser &Parser)
Initialize the extension for parsing using the given Parser.
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212
Context object for machine code objects.
Definition MCContext.h:83
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
unsigned getNumOperands() const
Definition MCInst.h:212
SMLoc getLoc() const
Definition MCInst.h:208
void setLoc(SMLoc loc)
Definition MCInst.h:207
unsigned getOpcode() const
Definition MCInst.h:202
iterator insert(iterator I, const MCOperand &Op)
Definition MCInst.h:232
void addOperand(const MCOperand Op)
Definition MCInst.h:215
iterator begin()
Definition MCInst.h:227
size_t size() const
Definition MCInst.h:226
const MCOperand & getOperand(unsigned i) const
Definition MCInst.h:210
Describe properties that are true of each instruction in the target description file.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
int16_t getOpRegClassID(const MCOperandInfo &OpInfo, unsigned HwModeId) const
Return the ID of the register class to use for OpInfo, for the active HwMode HwModeId.
Definition MCInstrInfo.h:80
Instances of this class represent operands of the MCInst class.
Definition MCInst.h:40
void setImm(int64_t Val)
Definition MCInst.h:89
static MCOperand createExpr(const MCExpr *Val)
Definition MCInst.h:166
int64_t getImm() const
Definition MCInst.h:84
static MCOperand createReg(MCRegister Reg)
Definition MCInst.h:138
static MCOperand createImm(int64_t Val)
Definition MCInst.h:145
bool isImm() const
Definition MCInst.h:66
void setReg(MCRegister Reg)
Set the register number.
Definition MCInst.h:79
bool isReg() const
Definition MCInst.h:65
MCRegister getReg() const
Returns the register number.
Definition MCInst.h:73
const MCExpr * getExpr() const
Definition MCInst.h:118
bool isExpr() const
Definition MCInst.h:69
MCParsedAsmOperand - This abstract class represents a source-level assembly instruction operand.
MCRegisterClass - Base class of TargetRegisterClass.
MCRegister getRegister(unsigned i) const
getRegister - Return the specified register in the class.
unsigned getNumRegs() const
getNumRegs - Return the number of registers in this class.
bool contains(MCRegister Reg) const
contains - Return true if the specified register is included in this register class.
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
bool regsOverlap(MCRegister RegA, MCRegister RegB) const
Returns true if the two registers are equal or alias each other.
const MCRegisterClass & getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
MCRegister getSubReg(MCRegister Reg, unsigned Idx) const
Returns the physical register number of sub-register "Index" for physical register RegNo.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
virtual void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI)
Emit the given Instruction into the current section.
Generic base class for all target subtargets.
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267
LLVM_ABI void setVariableValue(const MCExpr *Value)
Definition MCSymbol.cpp:50
void setRedefinable(bool Value)
Mark this symbol as redefinable.
Definition MCSymbol.h:210
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270
MCTargetAsmParser - Generic interface to target specific assembly parsers.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
Ternary parse status returned by various parse* methods.
constexpr bool isFailure() const
static constexpr StatusTy Failure
constexpr bool isSuccess() const
static constexpr StatusTy Success
static constexpr StatusTy NoMatch
constexpr bool isNoMatch() const
constexpr unsigned id() const
Definition Register.h:100
Represents a location in source code.
Definition SMLoc.h:22
static SMLoc getFromPointer(const char *Ptr)
Definition SMLoc.h:35
constexpr const char * getPointer() const
Definition SMLoc.h:33
constexpr bool isValid() const
Definition SMLoc.h:28
SMLoc Start
Definition SMLoc.h:49
iterator insert(iterator I, T &&Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool consume_back(StringRef Suffix)
Returns true if this StringRef has the given suffix and removes that suffix.
Definition StringRef.h:685
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:137
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition StringRef.h:270
bool consume_front(char Prefix)
Returns true if this StringRef has the given prefix and removes that prefix.
Definition StringRef.h:655
bool contains(StringRef key) const
Check if the set contains the given key.
Definition StringSet.h:60
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
Definition Twine.cpp:17
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask, const MCSubtargetInfo &STI)
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI)
unsigned getTgtId(const StringRef Name)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char NumSGPRs[]
Key for Kernel::CodeProps::Metadata::mNumSGPRs.
constexpr char SymbolName[]
Key for Kernel::Metadata::mSymbolName.
constexpr char AssemblerDirectiveBegin[]
HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
HSA metadata ending assembler directive.
constexpr char AssemblerDirectiveBegin[]
Old HSA metadata beginning assembler directive for V2.
int64_t getHwregId(StringRef Name, const MCSubtargetInfo &STI)
static constexpr CustomOperand Operands[]
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
unsigned getLocalMemorySize(const MCSubtargetInfo *STI)
unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI)
unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI)
int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt, const MCSubtargetInfo &STI)
int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt)
int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI)
bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI)
int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI)
int64_t getDfmt(const StringRef Name)
constexpr char AssemblerDirective[]
PAL metadata (old linear format) assembler directive.
constexpr char AssemblerDirectiveBegin[]
PAL metadata (new MsgPack format) beginning assembler directive.
constexpr char AssemblerDirectiveEnd[]
PAL metadata (new MsgPack format) ending assembler directive.
int64_t getMsgOpId(int64_t MsgId, StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a sendmsg operation to the operation portion of the immediate encoding.
int64_t getMsgId(StringRef Name, const MCSubtargetInfo &STI)
Map from a symbolic name for a msg_id to the message portion of the immediate encoding.
uint64_t encodeMsg(uint64_t MsgId, uint64_t OpId, uint64_t StreamId)
bool msgSupportsStream(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI)
bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, const MCSubtargetInfo &STI, bool Strict)
bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI)
bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, bool Strict)
ArrayRef< GFXVersion > getGFXVersions()
constexpr unsigned COMPONENTS[]
constexpr const char *const ModMatrixFmt[]
constexpr const char *const ModMatrixScaleFmt[]
constexpr const char *const ModMatrixScale[]
bool isPackedFP32Inst(unsigned Opc)
bool isInlinableLiteralBF16(int16_t Literal, bool HasInv2Pi)
bool isGFX10_BEncoding(const MCSubtargetInfo &STI)
bool isInlineValue(MCRegister Reg)
bool isPKFMACF16InlineConstant(uint32_t Literal, bool IsGFX11Plus)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
bool isInlinableLiteralFP16(int16_t Literal, bool HasInv2Pi)
bool isSGPR(MCRegister Reg, const MCRegisterInfo *TRI)
Is Reg - scalar register.
MCRegister getMCReg(MCRegister Reg, const MCSubtargetInfo &STI)
If Reg is a pseudo reg, return the correct hardware register given STI otherwise return Reg.
uint8_t wmmaScaleF8F6F4FormatToNumRegs(unsigned Fmt)
const int OPR_ID_UNSUPPORTED
bool isInlinableLiteralV2I16(uint32_t Literal)
bool isHi16Reg(MCRegister Reg, const MCRegisterInfo &MRI)
unsigned getTemporalHintType(const MCInstrDesc TID)
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
bool isGFX10(const MCSubtargetInfo &STI)
LLVM_READONLY bool isLitExpr(const MCExpr *Expr)
bool isInlinableLiteralV2BF16(uint32_t Literal)
unsigned getMaxNumUserSGPRs(const MCSubtargetInfo &STI)
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST)
For pre-GFX12 FLAT instructions the offset must be positive; MSB is ignored and forced to zero.
bool hasA16(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset, bool IsBuffer)
bool isGFX12Plus(const MCSubtargetInfo &STI)
unsigned getNSAMaxSize(const MCSubtargetInfo &STI, bool HasSampler)
bool hasPackedD16(const MCSubtargetInfo &STI)
bool isGFX940(const MCSubtargetInfo &STI)
bool isInlinableLiteralV2F16(uint32_t Literal)
bool isHsaAbi(const MCSubtargetInfo &STI)
bool isGFX11(const MCSubtargetInfo &STI)
const int OPR_VAL_INVALID
bool getSMEMIsBuffer(unsigned Opc)
bool isGFX13(const MCSubtargetInfo &STI)
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal)
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
bool isValid32BitLiteral(uint64_t Val, bool IsFP64)
CanBeVOPD getCanBeVOPD(unsigned Opc, unsigned EncodingFamily, bool VOPD3)
LLVM_READNONE bool isLegalDPALU_DPPControl(const MCSubtargetInfo &ST, unsigned DC)
bool isSI(const MCSubtargetInfo &STI)
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getWaitcntBitMask(const IsaVersion &Version)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, OpName NamedIdx)
bool isGFX9(const MCSubtargetInfo &STI)
unsigned getVOPDEncodingFamily(const MCSubtargetInfo &ST)
bool isGFX10_AEncoding(const MCSubtargetInfo &STI)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isGFX90A(const MCSubtargetInfo &STI)
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByEncoding(uint8_t DimEnc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
bool isGFX12(const MCSubtargetInfo &STI)
unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Expcnt)
bool hasMAIInsts(const MCSubtargetInfo &STI)
constexpr bool isSISrcOperand(const MCOperandInfo &OpInfo)
Is this an AMDGPU specific source operand?
LLVM_READONLY const MIMGDimInfo * getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix)
bool hasMIMG_R128(const MCSubtargetInfo &STI)
bool hasG16(const MCSubtargetInfo &STI)
unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode, const MIMGDimInfo *Dim, bool IsA16, bool IsG16Supported)
bool isGFX13Plus(const MCSubtargetInfo &STI)
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI)
LLVM_READONLY int64_t getLitValue(const MCExpr *Expr)
bool isGFX11Plus(const MCSubtargetInfo &STI)
bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this floating-point operand?
bool isGFX10Plus(const MCSubtargetInfo &STI)
int64_t encode32BitLiteral(int64_t Imm, OperandType Type, bool IsLit)
@ OPERAND_KIMM32
Operand with 32-bit immediate that uses the constant bus.
Definition SIDefines.h:234
@ OPERAND_REG_IMM_INT64
Definition SIDefines.h:204
@ OPERAND_REG_IMM_V2FP16
Definition SIDefines.h:211
@ OPERAND_REG_INLINE_C_FP64
Definition SIDefines.h:225
@ OPERAND_REG_INLINE_C_BF16
Definition SIDefines.h:222
@ OPERAND_REG_INLINE_C_V2BF16
Definition SIDefines.h:227
@ OPERAND_REG_IMM_V2INT16
Definition SIDefines.h:213
@ OPERAND_REG_IMM_BF16
Definition SIDefines.h:208
@ OPERAND_REG_IMM_INT32
Operands with register, 32-bit, or 64-bit immediate.
Definition SIDefines.h:203
@ OPERAND_REG_IMM_V2BF16
Definition SIDefines.h:210
@ OPERAND_REG_IMM_FP16
Definition SIDefines.h:209
@ OPERAND_REG_IMM_V2FP16_SPLAT
Definition SIDefines.h:212
@ OPERAND_REG_INLINE_C_INT64
Definition SIDefines.h:221
@ OPERAND_REG_INLINE_C_INT16
Operands with register or inline constant.
Definition SIDefines.h:219
@ OPERAND_REG_IMM_NOINLINE_V2FP16
Definition SIDefines.h:214
@ OPERAND_REG_IMM_FP64
Definition SIDefines.h:207
@ OPERAND_REG_INLINE_C_V2FP16
Definition SIDefines.h:228
@ OPERAND_REG_INLINE_AC_INT32
Operands with an AccVGPR register or inline constant.
Definition SIDefines.h:239
@ OPERAND_REG_INLINE_AC_FP32
Definition SIDefines.h:240
@ OPERAND_REG_IMM_V2INT32
Definition SIDefines.h:215
@ OPERAND_REG_IMM_FP32
Definition SIDefines.h:206
@ OPERAND_REG_INLINE_C_FP32
Definition SIDefines.h:224
@ OPERAND_REG_INLINE_C_INT32
Definition SIDefines.h:220
@ OPERAND_REG_INLINE_C_V2INT16
Definition SIDefines.h:226
@ OPERAND_REG_IMM_V2FP32
Definition SIDefines.h:216
@ OPERAND_REG_INLINE_AC_FP64
Definition SIDefines.h:241
@ OPERAND_REG_INLINE_C_FP16
Definition SIDefines.h:223
@ OPERAND_REG_IMM_INT16
Definition SIDefines.h:205
@ OPERAND_INLINE_SPLIT_BARRIER_INT32
Definition SIDefines.h:231
bool isDPALU_DPP(const MCInstrDesc &OpDesc, const MCInstrInfo &MII, const MCSubtargetInfo &ST)
bool hasGDS(const MCSubtargetInfo &STI)
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, int64_t EncodedOffset)
bool isGFX9Plus(const MCSubtargetInfo &STI)
bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI)
const int OPR_ID_DUPLICATE
bool isVOPD(unsigned Opc)
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Vmcnt)
unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt)
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
bool isGFX1170(const MCSubtargetInfo &STI)
bool isGFX1250(const MCSubtargetInfo &STI)
const MIMGBaseOpcodeInfo * getMIMGBaseOpcode(unsigned Opc)
bool isVI(const MCSubtargetInfo &STI)
bool supportsScaleOffset(const MCInstrInfo &MII, unsigned Opcode)
MCRegister mc2PseudoReg(MCRegister Reg)
Convert hardware register Reg to a pseudo register.
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
bool supportsWGP(const MCSubtargetInfo &STI)
bool isMAC(unsigned Opc)
LLVM_READNONE unsigned getOperandSize(const MCOperandInfo &OpInfo)
bool isCI(const MCSubtargetInfo &STI)
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, unsigned Lgkmcnt)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
const int OPR_ID_UNKNOWN
bool isGFX1250Plus(const MCSubtargetInfo &STI)
unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt)
bool isInlinableLiteralI16(int32_t Literal, bool HasInv2Pi)
bool hasVOPD(const MCSubtargetInfo &STI)
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi)
Is this literal inlinable.
bool isPermlane16(unsigned Opc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1432
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ OPERAND_IMMEDIATE
Definition MCInstrDesc.h:61
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
void validate(const Triple &TT, const FeatureBitset &FeatureBits)
@ Valid
The data is already valid.
Context & getContext() const
Definition BasicBlock.h:99
bool isNull(StringRef S)
Definition YAMLTraits.h:570
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
bool errorToBool(Error Err)
Helper for converting an Error to a bool.
Definition Error.h:1113
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
StringMapEntry< Value * > ValueName
Definition Value.h:56
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
unsigned encode(MaybeAlign A)
Returns a representation of the alignment that encodes undefined as 0.
Definition Alignment.h:206
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
static bool isMem(const MachineInstr &MI, unsigned Op)
LLVM_ABI std::pair< StringRef, StringRef > getToken(StringRef Source, StringRef Delimiters=" \t\n\v\f\r")
getToken - This function extracts one token from source, ignoring any leading characters that appear ...
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
void PrintError(const Twine &Msg)
Definition Error.cpp:104
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
Op::Description Desc
Target & getTheR600Target()
The target for R600 GPUs.
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
SmallVectorImpl< std::unique_ptr< MCParsedAsmOperand > > OperandVector
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
Target & getTheGCNTarget()
The target for GCN GPUs.
@ Sub
Subtraction of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
#define N
RegisterKind Kind
StringLiteral Name
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Instruction set architecture version.
static void bits_set(const MCExpr *&Dst, const MCExpr *Value, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
static MCKernelDescriptor getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, MCContext &Ctx)
RegisterMCAsmParser - Helper template for registering a target specific assembly parser,...