LLVM 23.0.0git
AMDGPURegBankLegalizeRules.cpp
Go to the documentation of this file.
1//===-- AMDGPURegBankLegalizeRules.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// Definitions of RegBankLegalize Rules for all opcodes.
10/// Implementation of container for all the Rules and search.
11/// Fast search for most common case when Rule.Predicate checks LLT and
12/// uniformity of register in operand 0.
13//
14//===----------------------------------------------------------------------===//
15
17#include "AMDGPUInstrInfo.h"
18#include "GCNSubtarget.h"
21#include "llvm/IR/IntrinsicsAMDGPU.h"
23
24#define DEBUG_TYPE "amdgpu-regbanklegalize"
25
26using namespace llvm;
27using namespace AMDGPU;
28
29bool AMDGPU::isAnyPtr(LLT Ty, unsigned Width) {
30 return Ty.isPointer() && Ty.getSizeInBits() == Width;
31}
32
34 std::initializer_list<RegBankLLTMappingApplyID> DstOpMappingList,
35 std::initializer_list<RegBankLLTMappingApplyID> SrcOpMappingList,
37 : DstOpMapping(DstOpMappingList), SrcOpMapping(SrcOpMappingList),
39
41 std::initializer_list<UniformityLLTOpPredicateID> OpList,
42 std::function<bool(const MachineInstr &)> TestFunc)
44
46 const MachineUniformityInfo &MUI,
47 const MachineRegisterInfo &MRI) {
48 switch (UniID) {
49 case S1:
50 return MRI.getType(Reg) == LLT::scalar(1);
51 case S16:
52 return MRI.getType(Reg) == LLT::scalar(16);
53 case S32:
54 return MRI.getType(Reg) == LLT::scalar(32);
55 case S64:
56 return MRI.getType(Reg) == LLT::scalar(64);
57 case S128:
58 return MRI.getType(Reg) == LLT::scalar(128);
59 case P0:
60 return MRI.getType(Reg) == LLT::pointer(0, 64);
61 case P1:
62 return MRI.getType(Reg) == LLT::pointer(1, 64);
63 case P2:
64 return MRI.getType(Reg) == LLT::pointer(2, 32);
65 case P3:
66 return MRI.getType(Reg) == LLT::pointer(3, 32);
67 case P4:
68 return MRI.getType(Reg) == LLT::pointer(4, 64);
69 case P5:
70 return MRI.getType(Reg) == LLT::pointer(5, 32);
71 case P8:
72 return MRI.getType(Reg) == LLT::pointer(8, 128);
73 case Ptr32:
74 return isAnyPtr(MRI.getType(Reg), 32);
75 case Ptr64:
76 return isAnyPtr(MRI.getType(Reg), 64);
77 case Ptr128:
78 return isAnyPtr(MRI.getType(Reg), 128);
79 case V2S16:
80 return MRI.getType(Reg) == LLT::fixed_vector(2, 16);
81 case V2S32:
82 return MRI.getType(Reg) == LLT::fixed_vector(2, 32);
83 case V3S32:
84 return MRI.getType(Reg) == LLT::fixed_vector(3, 32);
85 case V4S32:
86 return MRI.getType(Reg) == LLT::fixed_vector(4, 32);
87 case B32:
88 return MRI.getType(Reg).getSizeInBits() == 32;
89 case B64:
90 return MRI.getType(Reg).getSizeInBits() == 64;
91 case B96:
92 return MRI.getType(Reg).getSizeInBits() == 96;
93 case B128:
94 return MRI.getType(Reg).getSizeInBits() == 128;
95 case B160:
96 return MRI.getType(Reg).getSizeInBits() == 160;
97 case B256:
98 return MRI.getType(Reg).getSizeInBits() == 256;
99 case B512:
100 return MRI.getType(Reg).getSizeInBits() == 512;
101 case UniS1:
102 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isUniform(Reg);
103 case UniS16:
104 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isUniform(Reg);
105 case UniS32:
106 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isUniform(Reg);
107 case UniS64:
108 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isUniform(Reg);
109 case UniS128:
110 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isUniform(Reg);
111 case UniP0:
112 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isUniform(Reg);
113 case UniP1:
114 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isUniform(Reg);
115 case UniP2:
116 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isUniform(Reg);
117 case UniP3:
118 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isUniform(Reg);
119 case UniP4:
120 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isUniform(Reg);
121 case UniP5:
122 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isUniform(Reg);
123 case UniP8:
124 return MRI.getType(Reg) == LLT::pointer(8, 128) && MUI.isUniform(Reg);
125 case UniPtr32:
126 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isUniform(Reg);
127 case UniPtr64:
128 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isUniform(Reg);
129 case UniPtr128:
130 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isUniform(Reg);
131 case UniV2S16:
132 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isUniform(Reg);
133 case UniV2S32:
134 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isUniform(Reg);
135 case UniB32:
136 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isUniform(Reg);
137 case UniB64:
138 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isUniform(Reg);
139 case UniB96:
140 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isUniform(Reg);
141 case UniB128:
142 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isUniform(Reg);
143 case UniB160:
144 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isUniform(Reg);
145 case UniB256:
146 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isUniform(Reg);
147 case UniB512:
148 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isUniform(Reg);
149 case UniBRC: {
150 if (!MUI.isUniform(Reg))
151 return false;
152 // Check if there is SGPR register class of same size as the LLT.
153 const SIRegisterInfo *TRI =
154 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
155 // There is no 16 bit SGPR register class. Extra size check is required
156 // since getSGPRClassForBitWidth returns SReg_32RegClass for Size 16.
157 unsigned LLTSize = MRI.getType(Reg).getSizeInBits();
158 return LLTSize >= 32 && TRI->getSGPRClassForBitWidth(LLTSize);
159 }
160 case DivS1:
161 return MRI.getType(Reg) == LLT::scalar(1) && MUI.isDivergent(Reg);
162 case DivS16:
163 return MRI.getType(Reg) == LLT::scalar(16) && MUI.isDivergent(Reg);
164 case DivS32:
165 return MRI.getType(Reg) == LLT::scalar(32) && MUI.isDivergent(Reg);
166 case DivS64:
167 return MRI.getType(Reg) == LLT::scalar(64) && MUI.isDivergent(Reg);
168 case DivS128:
169 return MRI.getType(Reg) == LLT::scalar(128) && MUI.isDivergent(Reg);
170 case DivP0:
171 return MRI.getType(Reg) == LLT::pointer(0, 64) && MUI.isDivergent(Reg);
172 case DivP1:
173 return MRI.getType(Reg) == LLT::pointer(1, 64) && MUI.isDivergent(Reg);
174 case DivP2:
175 return MRI.getType(Reg) == LLT::pointer(2, 32) && MUI.isDivergent(Reg);
176 case DivP3:
177 return MRI.getType(Reg) == LLT::pointer(3, 32) && MUI.isDivergent(Reg);
178 case DivP4:
179 return MRI.getType(Reg) == LLT::pointer(4, 64) && MUI.isDivergent(Reg);
180 case DivP5:
181 return MRI.getType(Reg) == LLT::pointer(5, 32) && MUI.isDivergent(Reg);
182 case DivPtr32:
183 return isAnyPtr(MRI.getType(Reg), 32) && MUI.isDivergent(Reg);
184 case DivPtr64:
185 return isAnyPtr(MRI.getType(Reg), 64) && MUI.isDivergent(Reg);
186 case DivPtr128:
187 return isAnyPtr(MRI.getType(Reg), 128) && MUI.isDivergent(Reg);
188 case DivV2S16:
189 return MRI.getType(Reg) == LLT::fixed_vector(2, 16) && MUI.isDivergent(Reg);
190 case DivV2S32:
191 return MRI.getType(Reg) == LLT::fixed_vector(2, 32) && MUI.isDivergent(Reg);
192 case DivB32:
193 return MRI.getType(Reg).getSizeInBits() == 32 && MUI.isDivergent(Reg);
194 case DivB64:
195 return MRI.getType(Reg).getSizeInBits() == 64 && MUI.isDivergent(Reg);
196 case DivB96:
197 return MRI.getType(Reg).getSizeInBits() == 96 && MUI.isDivergent(Reg);
198 case DivB128:
199 return MRI.getType(Reg).getSizeInBits() == 128 && MUI.isDivergent(Reg);
200 case DivB160:
201 return MRI.getType(Reg).getSizeInBits() == 160 && MUI.isDivergent(Reg);
202 case DivB256:
203 return MRI.getType(Reg).getSizeInBits() == 256 && MUI.isDivergent(Reg);
204 case DivB512:
205 return MRI.getType(Reg).getSizeInBits() == 512 && MUI.isDivergent(Reg);
206 case DivBRC: {
207 if (!MUI.isDivergent(Reg))
208 return false;
209 // Check if there is VGPR register class of same size as the LLT.
210 const SIRegisterInfo *TRI =
211 static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
212 return TRI->getSGPRClassForBitWidth(MRI.getType(Reg).getSizeInBits());
213 }
214 case _:
215 return true;
216 default:
217 llvm_unreachable("missing matchUniformityAndLLT");
218 }
219}
220
222 const MachineUniformityInfo &MUI,
223 const MachineRegisterInfo &MRI) const {
224 // Check LLT signature.
225 for (unsigned i = 0; i < OpUniformityAndTypes.size(); ++i) {
226 const MachineOperand &MO = MI.getOperand(i);
227 if (OpUniformityAndTypes[i] == _) {
228 assert((!MI.getOperand(i).isReg() ||
229 !MI.getOperand(i).getReg().isVirtual()) &&
230 "_ is for non-register and physical register operands only");
231 continue;
232 }
233
234 // Remaining IDs check registers.
235 if (!MO.isReg())
236 return false;
237
238 if (!matchUniformityAndLLT(MO.getReg(), OpUniformityAndTypes[i], MUI, MRI))
239 return false;
240 }
241
242 // More complex check.
243 if (TestFunc)
244 return TestFunc(MI);
245
246 return true;
247}
248
250
252 : FastTypes(FastTypes) {}
253
255 if (Ty == LLT::scalar(16))
256 return S16;
257 if (Ty == LLT::scalar(32))
258 return S32;
259 if (Ty == LLT::scalar(64))
260 return S64;
261 if (Ty == LLT::fixed_vector(2, 16))
262 return V2S16;
263 if (Ty == LLT::fixed_vector(2, 32))
264 return V2S32;
265 if (Ty == LLT::fixed_vector(3, 32))
266 return V3S32;
267 if (Ty == LLT::fixed_vector(4, 32))
268 return V4S32;
269 return _;
270}
271
273 if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
274 isAnyPtr(Ty, 32))
275 return B32;
276 if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) ||
277 Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64))
278 return B64;
279 if (Ty == LLT::fixed_vector(3, 32))
280 return B96;
281 if (Ty == LLT::fixed_vector(4, 32) || Ty == LLT::fixed_vector(2, 64) ||
282 Ty == LLT::fixed_vector(8, 16) || isAnyPtr(Ty, 128))
283 return B128;
284 return _;
285}
286
287const RegBankLLTMapping *
289 const MachineRegisterInfo &MRI,
290 const MachineUniformityInfo &MUI) const {
291 // Search in "Fast Rules".
292 // Note: if fast rules are enabled, RegBankLLTMapping must be added in each
293 // slot that could "match fast Predicate". If not, InvalidMapping is
294 // returned which results in failure, does not search "Slow Rules".
295 if (FastTypes != NoFastRules) {
296 Register Reg = MI.getOperand(0).getReg();
297 int Slot;
298 if (FastTypes == StandardB)
299 Slot = getFastPredicateSlot(LLTToBId(MRI.getType(Reg)));
300 else
301 Slot = getFastPredicateSlot(LLTToId(MRI.getType(Reg)));
302
303 if (Slot != -1)
304 return MUI.isUniform(Reg) ? &Uni[Slot] : &Div[Slot];
305 }
306
307 // Slow search for more complex rules.
308 for (const RegBankLegalizeRule &Rule : Rules) {
309 if (Rule.Predicate.match(MI, MUI, MRI))
310 return &Rule.OperandMapping;
311 }
312
313 return nullptr;
314}
315
317 Rules.push_back(Rule);
318}
319
321 RegBankLLTMapping RuleApplyIDs) {
322 int Slot = getFastPredicateSlot(Ty);
323 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
324 Div[Slot] = std::move(RuleApplyIDs);
325}
326
328 RegBankLLTMapping RuleApplyIDs) {
329 int Slot = getFastPredicateSlot(Ty);
330 assert(Slot != -1 && "Ty unsupported in this FastRulesTypes");
331 Uni[Slot] = std::move(RuleApplyIDs);
332}
333
334int SetOfRulesForOpcode::getFastPredicateSlot(
336 switch (FastTypes) {
337 case Standard: {
338 switch (Ty) {
339 case S32:
340 return 0;
341 case S16:
342 return 1;
343 case S64:
344 return 2;
345 case V2S16:
346 return 3;
347 default:
348 return -1;
349 }
350 }
351 case StandardB: {
352 switch (Ty) {
353 case B32:
354 return 0;
355 case B64:
356 return 1;
357 case B96:
358 return 2;
359 case B128:
360 return 3;
361 default:
362 return -1;
363 }
364 }
365 case Vector: {
366 switch (Ty) {
367 case S32:
368 return 0;
369 case V2S32:
370 return 1;
371 case V3S32:
372 return 2;
373 case V4S32:
374 return 3;
375 default:
376 return -1;
377 }
378 }
379 default:
380 return -1;
381 }
382}
383
384RegBankLegalizeRules::RuleSetInitializer
385RegBankLegalizeRules::addRulesForGOpcs(std::initializer_list<unsigned> OpcList,
386 FastRulesTypes FastTypes) {
387 return RuleSetInitializer(OpcList, GRulesAlias, GRules, FastTypes);
388}
389
390RegBankLegalizeRules::RuleSetInitializer
391RegBankLegalizeRules::addRulesForIOpcs(std::initializer_list<unsigned> OpcList,
392 FastRulesTypes FastTypes) {
393 return RuleSetInitializer(OpcList, IRulesAlias, IRules, FastTypes);
394}
395
398 unsigned Opc = MI.getOpcode();
399 if (Opc == AMDGPU::G_INTRINSIC || Opc == AMDGPU::G_INTRINSIC_CONVERGENT ||
400 Opc == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS ||
401 Opc == AMDGPU::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS) {
402 unsigned IntrID = cast<GIntrinsic>(MI).getIntrinsicID();
403 auto IRAIt = IRulesAlias.find(IntrID);
404 if (IRAIt == IRulesAlias.end())
405 return nullptr;
406 return &IRules.at(IRAIt->second);
407 }
408
409 auto GRAIt = GRulesAlias.find(Opc);
410 if (GRAIt == GRulesAlias.end())
411 return nullptr;
412 return &GRules.at(GRAIt->second);
413}
414
415// Syntactic sugar wrapper for predicate lambda that enables '&&', '||' and '!'.
416class Predicate {
417private:
418 struct Elt {
419 // Save formula composed of Pred, '&&', '||' and '!' as a jump table.
420 // Sink ! to Pred. For example !((A && !B) || C) -> (!A || B) && !C
421 // Sequences of && and || will be represented by jumps, for example:
422 // (A && B && ... X) or (A && B && ... X) || Y
423 // A == true jump to B
424 // A == false jump to end or Y, result is A(false) or Y
425 // (A || B || ... X) or (A || B || ... X) && Y
426 // A == true jump to end or Y, result is A(true) or Y
427 // A == false jump to B
428 // Notice that when negating expression, we simply flip Neg on each Pred
429 // and swap TJumpOffset and FJumpOffset (&& becomes ||, || becomes &&).
430 std::function<bool(const MachineInstr &)> Pred;
431 bool Neg; // Neg of Pred is calculated before jump
432 unsigned TJumpOffset;
433 unsigned FJumpOffset;
434 };
435
436 SmallVector<Elt, 8> Expression;
437
438 Predicate(SmallVectorImpl<Elt> &&Expr) { Expression.swap(Expr); };
439
440public:
441 Predicate(std::function<bool(const MachineInstr &)> Pred) {
442 Expression.push_back({Pred, false, 1, 1});
443 };
444
445 bool operator()(const MachineInstr &MI) const {
446 unsigned Idx = 0;
447 unsigned ResultIdx = Expression.size();
448 bool Result;
449 do {
450 Result = Expression[Idx].Pred(MI);
451 Result = Expression[Idx].Neg ? !Result : Result;
452 if (Result) {
453 Idx += Expression[Idx].TJumpOffset;
454 } else {
455 Idx += Expression[Idx].FJumpOffset;
456 }
457 } while ((Idx != ResultIdx));
458
459 return Result;
460 };
461
462 Predicate operator!() const {
463 SmallVector<Elt, 8> NegExpression;
464 for (const Elt &ExprElt : Expression) {
465 NegExpression.push_back({ExprElt.Pred, !ExprElt.Neg, ExprElt.FJumpOffset,
466 ExprElt.TJumpOffset});
467 }
468 return Predicate(std::move(NegExpression));
469 };
470
471 Predicate operator&&(const Predicate &RHS) const {
472 SmallVector<Elt, 8> AndExpression = Expression;
473
474 unsigned RHSSize = RHS.Expression.size();
475 unsigned ResultIdx = Expression.size();
476 for (unsigned i = 0; i < ResultIdx; ++i) {
477 // LHS results in false, whole expression results in false.
478 if (i + AndExpression[i].FJumpOffset == ResultIdx)
479 AndExpression[i].FJumpOffset += RHSSize;
480 }
481
482 AndExpression.append(RHS.Expression);
483
484 return Predicate(std::move(AndExpression));
485 }
486
487 Predicate operator||(const Predicate &RHS) const {
488 SmallVector<Elt, 8> OrExpression = Expression;
489
490 unsigned RHSSize = RHS.Expression.size();
491 unsigned ResultIdx = Expression.size();
492 for (unsigned i = 0; i < ResultIdx; ++i) {
493 // LHS results in true, whole expression results in true.
494 if (i + OrExpression[i].TJumpOffset == ResultIdx)
495 OrExpression[i].TJumpOffset += RHSSize;
496 }
497
498 OrExpression.append(RHS.Expression);
499
500 return Predicate(std::move(OrExpression));
501 }
502};
503
504// Initialize rules
507 : ST(&_ST), MRI(&_MRI) {
508
509 addRulesForGOpcs({G_ADD, G_SUB}, Standard)
510 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
511 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
512 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
513 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
515 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
516 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}})
517 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}});
518
519 addRulesForGOpcs({G_UADDO, G_USUBO}, Standard)
520 .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}})
521 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}});
522
523 addRulesForGOpcs({G_UADDE, G_USUBE, G_SADDE, G_SSUBE}, Standard)
525 .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}});
526
527 addRulesForGOpcs({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}, Standard)
528 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
529 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
530 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
531 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
533 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
534
535 bool HasVecMulU64 = ST->hasVectorMulU64();
536 addRulesForGOpcs({G_MUL}, Standard)
537 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
538 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
539 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
540 .Uni(S64, {{SgprB64}, {SgprB64, SgprB64}})
542 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
543 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}})
544 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}}, HasVecMulU64)
545 .Div(S64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32Mul}, !HasVecMulU64);
546
547 bool hasMulHi = ST->hasScalarMulHiInsts();
548 addRulesForGOpcs({G_UMULH, G_SMULH}, Standard)
549 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
550 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasMulHi)
551 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasMulHi);
552
553 addRulesForGOpcs({G_AMDGPU_MAD_U64_U32}, Standard)
554 .Div(S64, {{Vgpr64, Vcc}, {Vgpr32, Vgpr32, Vgpr64}})
556
557 bool HasScalarSMulU64 = ST->hasScalarSMulU64();
558 addRulesForGOpcs({G_AMDGPU_S_MUL_U64_U32, G_AMDGPU_S_MUL_I64_I32}, Standard)
559 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}, UniMul64}, HasScalarSMulU64)
560 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}, DivSMulToMAD});
561
562 addRulesForGOpcs({G_XOR, G_OR, G_AND}, StandardB)
564 .Any({{DivS1}, {{Vcc}, {Vcc, Vcc}}})
565 .Any({{UniS16}, {{Sgpr16}, {Sgpr16, Sgpr16}}})
566 .Any({{DivS16}, {{Vgpr16}, {Vgpr16, Vgpr16}}})
567 .Uni(B32, {{SgprB32}, {SgprB32, SgprB32}})
568 .Div(B32, {{VgprB32}, {VgprB32, VgprB32}})
569 .Uni(B64, {{SgprB64}, {SgprB64, SgprB64}})
570 .Div(B64, {{VgprB64}, {VgprB64, VgprB64}, SplitTo32});
571
572 addRulesForGOpcs({G_SHL}, Standard)
573 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32ZExt}})
574 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
576 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
577 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
578 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
579 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
580 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
581
582 addRulesForGOpcs({G_LSHR}, Standard)
583 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
584 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
586 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
587 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
588 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
589 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
590 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
591
592 addRulesForGOpcs({G_ASHR}, Standard)
593 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32ZExt}})
594 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
596 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
597 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
598 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32}})
599 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
600 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
601
602 addRulesForGOpcs({G_FSHR}, Standard)
603 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
604 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
605
606 addRulesForGOpcs({G_FRAME_INDEX}).Any({{UniP5, _}, {{SgprP5}, {None}}});
607
608 addRulesForGOpcs({G_UBFX, G_SBFX}, Standard)
609 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}, S_BFE})
610 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
611 .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr32, Sgpr32}, S_BFE})
612 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32, Vgpr32}, V_BFE});
613
614 addRulesForGOpcs({G_SMIN, G_SMAX}, Standard)
615 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt, Sgpr32SExt}})
616 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
617 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
618 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
620 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
621
622 addRulesForGOpcs({G_UMIN, G_UMAX}, Standard)
623 .Uni(S16, {{Sgpr32Trunc}, {Sgpr32ZExt, Sgpr32ZExt}})
624 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
625 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
626 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
628 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
629
630 // Note: we only write S1 rules for G_IMPLICIT_DEF, G_CONSTANT and G_FCONSTANT
631 // here, rest is trivially regbankselected earlier
632 addRulesForGOpcs({G_IMPLICIT_DEF}).Any({{UniS1}, {{Sgpr32Trunc}, {}}});
633 addRulesForGOpcs({G_CONSTANT})
634 .Any({{UniS1, _}, {{Sgpr32Trunc}, {None}, UniCstExt}});
635
636 addRulesForGOpcs({G_FREEZE})
637 .Any({{UniS1}, {{Sgpr32Trunc}, {Sgpr32AExt}}})
638 .Any({{DivS1}, {{Vcc}, {Vcc}}})
639 .Any({{UniS16}, {{Sgpr16}, {Sgpr16}}})
640 .Any({{UniBRC}, {{SgprBRC}, {SgprBRC}}})
641 .Any({{DivBRC}, {{VgprBRC}, {VgprBRC}}});
642
643 addRulesForGOpcs({G_UNMERGE_VALUES})
644 .Any({{UniS16}, {{}, {}, UnmergeToShiftTrunc}})
645 .Any({{UniBRC}, {{}, {}, VerifyAllSgpr}})
646 .Any({{DivBRC}, {{}, {}, ApplyAllVgpr}});
647
648 addRulesForGOpcs({G_PHI})
649 .Any({{UniS1}, {{}, {}, AextToS32InIncomingBlockGPHI}})
650 .Any({{UniS16}, {{}, {}, VerifyAllSgprGPHI}})
651 .Any({{UniBRC}, {{}, {}, VerifyAllSgprGPHI}})
652 .Any({{DivBRC}, {{}, {}, VerifyAllSgprOrVgprGPHI}});
653
654 // LOAD {Div}, {{VgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
655 // LOAD {Uni}, {{UniInVgprDst...}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
656 // LOAD_NORET {}, {{}, {Imm, VgprSrc, ..., Sgpr_WF_RsrcIdx}}
657 // STORE {}, {{}, {VgprSrc, ..., Sgpr_WF_RsrcIdx}}
658 addRulesForGOpcs({G_AMDGPU_INTRIN_IMAGE_LOAD, G_AMDGPU_INTRIN_IMAGE_LOAD_D16,
659 G_AMDGPU_INTRIN_IMAGE_LOAD_NORET,
660 G_AMDGPU_INTRIN_IMAGE_STORE,
661 G_AMDGPU_INTRIN_IMAGE_STORE_D16})
662 .Any({{}, {{}, {}, ApplyINTRIN_IMAGE}});
663
664 Predicate isSignedICmp([](const MachineInstr &MI) -> bool {
665 auto Pred =
666 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
667 return CmpInst::isSigned(Pred);
668 });
669
670 Predicate isEqualityICmp([](const MachineInstr &MI) -> bool {
671 auto Pred =
672 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
673 return ICmpInst::isEquality(Pred);
674 });
675
676 bool HasScalarCompareEq64 = ST->hasScalarCompareEq64();
677 // clang-format off
678 addRulesForGOpcs({G_ICMP})
679 .Any({{{UniS1, _, S16}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
680 .Any({{{UniS1, _, S16}, !isEqualityICmp && isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32SExt, Sgpr32SExt}}})
681 .Any({{{UniS1, _, S16}, !isEqualityICmp && !isSignedICmp}, {{Sgpr32Trunc}, {None, Sgpr32ZExt, Sgpr32ZExt}}})
682 .Any({{{DivS1, _, S16}}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
683 .Any({{{UniS1, _, S32}}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}})
684 .Any({{{DivS1, _, S32}}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
685 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, Sgpr64, Sgpr64}}}, HasScalarCompareEq64)
686 .Any({{{UniS1, _, S64}, isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}}, !HasScalarCompareEq64)
687 .Any({{{UniS1, _, S64}, !isEqualityICmp}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
688 .Any({{{DivS1, _, S64}}, {{Vcc}, {None, Vgpr64, Vgpr64}}})
689 .Any({{{UniS1, _, Ptr32}}, {{Sgpr32Trunc}, {None, SgprPtr32, SgprPtr32}}})
690 .Any({{{DivS1, _, Ptr32}}, {{Vcc}, {None, VgprPtr32, VgprPtr32}}})
691 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{Sgpr32Trunc}, {None, SgprPtr64, SgprPtr64}}}, HasScalarCompareEq64)
692 .Any({{{UniS1, _, Ptr64}, isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}}, !HasScalarCompareEq64)
693 .Any({{{UniS1, _, Ptr64}, !isEqualityICmp}, {{UniInVcc}, {None, VgprPtr64, VgprPtr64}}})
694 .Any({{{DivS1, _, Ptr64}}, {{Vcc}, {None, VgprPtr64, VgprPtr64}}});
695 // clang-format on
696
697 addRulesForGOpcs({G_BRCOND})
698 .Any({{UniS1}, {{}, {Sgpr32AExtBoolInReg}}})
699 .Any({{DivS1}, {{}, {Vcc}}});
700
701 addRulesForGOpcs({G_BR}).Any({{_}, {{}, {None}}});
702
703 addRulesForGOpcs({G_SELECT}, StandardB)
704 .Any({{DivS16}, {{Vgpr16}, {Vcc, Vgpr16, Vgpr16}}})
706 .Div(B32, {{VgprB32}, {Vcc, VgprB32, VgprB32}})
710
711 addRulesForGOpcs({G_ANYEXT})
712 .Any({{UniS16, S1}, {{None}, {None}}}) // should be combined away
713 .Any({{UniS32, S1}, {{None}, {None}}}) // should be combined away
714 .Any({{UniS64, S1}, {{None}, {None}}}) // should be combined away
715 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
716 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
717 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
718 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
719 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
720 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
721 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
722
723 bool Has16bitCmp = ST->has16BitInsts();
724
725 // In global-isel G_TRUNC in-reg is treated as no-op, inst selected into COPY.
726 // It is up to user to deal with truncated bits.
727 addRulesForGOpcs({G_TRUNC})
728 .Any({{UniS1, UniS16}, {{None}, {None}}}) // should be combined away
729 .Any({{UniS1, UniS32}, {{None}, {None}}}) // should be combined away
730 .Any({{UniS1, UniS64}, {{None}, {None}}}) // should be combined away
731 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}})
732 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
733 .Any({{UniS32, S64}, {{Sgpr32}, {Sgpr64}}})
734 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
735 .Any({{UniV2S16, V2S32}, {{SgprV2S16}, {SgprV2S32}}})
736 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
737 // This is non-trivial. VgprToVccCopy is done using compare instruction.
738 .Any({{DivS1, DivS16}, {{Vcc}, {Vgpr16}, VgprToVccCopy}}, Has16bitCmp)
740 !Has16bitCmp)
741 .Any({{DivS1, DivS32}, {{Vcc}, {Vgpr32}, VgprToVccCopy}})
742 .Any({{DivS1, DivS64}, {{Vcc}, {Vgpr64}, VgprToVccCopy}});
743
744 addRulesForGOpcs({G_ZEXT})
748 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
749 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
750 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
751 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
752 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
753 // not extending S16 to S32 is questionable.
754 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32ZExt}, Ext32To64}})
755 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32ZExt}, Ext32To64}})
756 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
757 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
758
759 addRulesForGOpcs({G_SEXT})
763 .Any({{DivS16, S1}, {{Vgpr16}, {Vcc}, VccExtToSel}})
764 .Any({{DivS32, S1}, {{Vgpr32}, {Vcc}, VccExtToSel}})
765 .Any({{DivS64, S1}, {{Vgpr64}, {Vcc}, VccExtToSel}})
766 .Any({{UniS64, S32}, {{Sgpr64}, {Sgpr32}, Ext32To64}})
767 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}, Ext32To64}})
768 // not extending S16 to S32 is questionable.
769 .Any({{UniS64, S16}, {{Sgpr64}, {Sgpr32SExt}, Ext32To64}})
770 .Any({{DivS64, S16}, {{Vgpr64}, {Vgpr32SExt}, Ext32To64}})
771 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}})
772 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}});
773
774 addRulesForGOpcs({G_SEXT_INREG})
775 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}})
776 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
777 .Any({{UniS64, S64}, {{Sgpr64}, {Sgpr64}}})
779
780 addRulesForGOpcs({G_ASSERT_ZEXT, G_ASSERT_SEXT}, Standard)
781 .Uni(S32, {{Sgpr32}, {Sgpr32, Imm}})
782 .Div(S32, {{Vgpr32}, {Vgpr32, Imm}})
783 .Uni(S64, {{Sgpr64}, {Sgpr64, Imm}})
784 .Div(S64, {{Vgpr64}, {Vgpr64, Imm}});
785
786 addRulesForGOpcs({G_ASSERT_ALIGN}, Standard)
787 .Uni(S32, {{Sgpr32}, {Sgpr32}})
788 .Div(S32, {{Vgpr32}, {Vgpr32}})
789 .Uni(S64, {{Sgpr64}, {Sgpr64}})
790 .Div(S64, {{Vgpr64}, {Vgpr64}})
791 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32}}})
792 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32}}})
793 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64}}})
794 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64}}});
795
796 // Atomic read-modify-write operations: result and value are always VGPR,
797 // pointer varies by address space.
798 addRulesForGOpcs({G_ATOMICRMW_ADD, G_ATOMICRMW_SUB, G_ATOMICRMW_XCHG,
799 G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
800 G_ATOMICRMW_MIN, G_ATOMICRMW_MAX, G_ATOMICRMW_UMIN,
801 G_ATOMICRMW_UMAX, G_ATOMICRMW_UINC_WRAP,
802 G_ATOMICRMW_UDEC_WRAP})
803 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
804 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
805 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
806 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
807 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
808 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}});
809
810 bool HasAtomicFlatPkAdd16Insts = ST->hasAtomicFlatPkAdd16Insts();
811 bool HasAtomicBufferGlobalPkAddF16Insts =
812 ST->hasAtomicBufferGlobalPkAddF16NoRtnInsts() ||
813 ST->hasAtomicBufferGlobalPkAddF16Insts();
814 bool HasAtomicDsPkAdd16Insts = ST->hasAtomicDsPkAdd16Insts();
815 addRulesForGOpcs({G_ATOMICRMW_FADD})
816 .Any({{DivS32, P0, S32}, {{Vgpr32}, {VgprP0, Vgpr32}}})
817 .Any({{DivS64, P0, S64}, {{Vgpr64}, {VgprP0, Vgpr64}}})
818 .Any({{DivS32, P1, S32}, {{Vgpr32}, {VgprP1, Vgpr32}}})
819 .Any({{DivS64, P1, S64}, {{Vgpr64}, {VgprP1, Vgpr64}}})
820 .Any({{DivS32, P3, S32}, {{Vgpr32}, {VgprP3, Vgpr32}}})
821 .Any({{DivS64, P3, S64}, {{Vgpr64}, {VgprP3, Vgpr64}}})
822 .Any({{DivV2S16, P0, V2S16}, {{VgprV2S16}, {VgprP0, VgprV2S16}}},
823 HasAtomicFlatPkAdd16Insts)
824 .Any({{DivV2S16, P1, V2S16}, {{VgprV2S16}, {VgprP1, VgprV2S16}}},
825 HasAtomicBufferGlobalPkAddF16Insts)
826 .Any({{DivV2S16, P3, V2S16}, {{VgprV2S16}, {VgprP3, VgprV2S16}}},
827 HasAtomicDsPkAdd16Insts);
828
829 addRulesForGOpcs({G_ATOMIC_CMPXCHG})
830 .Any({{DivS32, P2}, {{Vgpr32}, {VgprP2, Vgpr32, Vgpr32}}})
831 .Any({{DivS64, P2}, {{Vgpr64}, {VgprP2, Vgpr64, Vgpr64}}})
832 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3, Vgpr32, Vgpr32}}})
833 .Any({{DivS64, P3}, {{Vgpr64}, {VgprP3, Vgpr64, Vgpr64}}});
834
835 addRulesForGOpcs({G_AMDGPU_ATOMIC_CMPXCHG})
836 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0, VgprV2S32}}})
837 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1, VgprV2S32}}})
838 .Any({{DivS64, P0}, {{Vgpr64}, {VgprP0, VgprV2S64}}})
839 .Any({{DivS64, P1}, {{Vgpr64}, {VgprP1, VgprV2S64}}});
840
841 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_CMPSWAP}, Standard)
842 .Div(S32, {{Vgpr32},
844 .Div(S64, {{Vgpr64},
846
847 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_SWAP, G_AMDGPU_BUFFER_ATOMIC_UMAX,
848 G_AMDGPU_BUFFER_ATOMIC_UMIN, G_AMDGPU_BUFFER_ATOMIC_SMAX,
849 G_AMDGPU_BUFFER_ATOMIC_SMIN},
850 Standard)
853
854 bool hasSMRDx3 = ST->hasScalarDwordx3Loads();
855 bool hasSMRDSmall = ST->hasScalarSubwordLoads();
856 bool usesTrue16 = ST->useRealTrue16Insts();
857
858 Predicate isAlign16([](const MachineInstr &MI) -> bool {
859 return (*MI.memoperands_begin())->getAlign() >= Align(16);
860 });
861
862 Predicate isAlign4([](const MachineInstr &MI) -> bool {
863 return (*MI.memoperands_begin())->getAlign() >= Align(4);
864 });
865
866 Predicate isAtomicMMO([](const MachineInstr &MI) -> bool {
867 return (*MI.memoperands_begin())->isAtomic();
868 });
869
870 Predicate isUniMMO([](const MachineInstr &MI) -> bool {
871 return AMDGPU::isUniformMMO(*MI.memoperands_begin());
872 });
873
874 Predicate isConst([](const MachineInstr &MI) -> bool {
875 // Address space in MMO be different then address space on pointer.
876 const MachineMemOperand *MMO = *MI.memoperands_begin();
877 const unsigned AS = MMO->getAddrSpace();
878 return AS == AMDGPUAS::CONSTANT_ADDRESS ||
880 });
881
882 Predicate isVolatileMMO([](const MachineInstr &MI) -> bool {
883 return (*MI.memoperands_begin())->isVolatile();
884 });
885
886 Predicate isInvMMO([](const MachineInstr &MI) -> bool {
887 return (*MI.memoperands_begin())->isInvariant();
888 });
889
890 Predicate isNoClobberMMO([](const MachineInstr &MI) -> bool {
891 return (*MI.memoperands_begin())->getFlags() & MONoClobber;
892 });
893
894 Predicate isNaturalAligned([](const MachineInstr &MI) -> bool {
895 const MachineMemOperand *MMO = *MI.memoperands_begin();
896 return MMO->getAlign() >= Align(MMO->getSize().getValue());
897 });
898
899 Predicate is8Or16BitMMO([](const MachineInstr &MI) -> bool {
900 const MachineMemOperand *MMO = *MI.memoperands_begin();
901 const unsigned MemSize = 8 * MMO->getSize().getValue();
902 return MemSize == 16 || MemSize == 8;
903 });
904
905 Predicate is32BitMMO([](const MachineInstr &MI) -> bool {
906 const MachineMemOperand *MMO = *MI.memoperands_begin();
907 return 8 * MMO->getSize().getValue() == 32;
908 });
909
910 auto isUL = !isAtomicMMO && isUniMMO && (isConst || !isVolatileMMO) &&
911 (isConst || isInvMMO || isNoClobberMMO);
912
913 // clang-format off
914 // TODO: S32Dst, 16-bit any-extending load should not appear on True16 targets
915 addRulesForGOpcs({G_LOAD})
916 // flat, addrspace(0), never uniform - flat_load
917 .Any({{DivS16, P0}, {{Vgpr16}, {VgprP0}}}, usesTrue16)
918 .Any({{DivB32, P0}, {{VgprB32}, {VgprP0}}}) // 32-bit load, 8-bit and 16-bit any-extending load
919 .Any({{DivB64, P0}, {{VgprB64}, {VgprP0}}})
920 .Any({{DivB96, P0}, {{VgprB96}, {VgprP0}}})
921 .Any({{DivB128, P0}, {{VgprB128}, {VgprP0}}})
922
923 // global, addrspace(1)
924 // divergent - global_load
925 .Any({{DivS16, P1}, {{Vgpr16}, {VgprP1}}}, usesTrue16)
926 .Any({{DivB32, P1}, {{VgprB32}, {VgprP1}}}) //32-bit load, 8-bit and 16-bit any-extending load
927 .Any({{DivB64, P1}, {{VgprB64}, {VgprP1}}})
928 .Any({{DivB96, P1}, {{VgprB96}, {VgprP1}}})
929 .Any({{DivB128, P1}, {{VgprB128}, {VgprP1}}})
930 .Any({{DivB256, P1}, {{VgprB256}, {VgprP1}, SplitLoad}})
931 .Any({{DivB512, P1}, {{VgprB512}, {VgprP1}, SplitLoad}})
932
933 // uniform - s_load
934 .Any({{{UniS16, P1}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
935 .Any({{{UniS16, P1}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP1}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
936 .Any({{{UniB32, P1}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
937 // TODO: SplitLoad when !isNaturalAligned && isUL and target hasSMRDSmall
938 .Any({{{UniB32, P1}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
939 .Any({{{UniB32, P1}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP1}}}) //32-bit load
940 .Any({{{UniB64, P1}, isAlign4 && isUL}, {{SgprB64}, {SgprP1}}})
941 .Any({{{UniB96, P1}, isAlign16 && isUL}, {{SgprB96}, {SgprP1}, WidenLoad}}, !hasSMRDx3)
942 .Any({{{UniB96, P1}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP1}, SplitLoad}}, !hasSMRDx3)
943 .Any({{{UniB96, P1}, isAlign4 && isUL}, {{SgprB96}, {SgprP1}}}, hasSMRDx3)
944 .Any({{{UniB128, P1}, isAlign4 && isUL}, {{SgprB128}, {SgprP1}}})
945 .Any({{{UniB256, P1}, isAlign4 && isUL}, {{SgprB256}, {SgprP1}}})
946 .Any({{{UniB512, P1}, isAlign4 && isUL}, {{SgprB512}, {SgprP1}}})
947
948 // Uniform via global or buffer load, for example volatile or non-aligned
949 // uniform load. Not using standard {{UniInVgprTy}, {VgprP1}} since it is
950 // selected as global_load, use SgprP1 for pointer instead to match
951 // patterns without flat-for-global, default for GFX7 and older.
952 // -> +flat-for-global + {{UniInVgprTy}, {SgprP1}} - global_load
953 // -> -flat-for-global + {{UniInVgprTy}, {SgprP1}} - buffer_load
954 .Any({{{UniS16, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && hasSMRDSmall) // s16 load
955 .Any({{{UniS16, P1}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP1}}}, usesTrue16 && !hasSMRDSmall) // s16 load
956 .Any({{{UniB32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP1}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
957 .Any({{{UniB32, P1}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP1}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
958 .Any({{{UniB64, P1}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP1}}})
959 .Any({{{UniB96, P1}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP1}}})
960 .Any({{{UniB128, P1}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP1}}})
961 .Any({{{UniB256, P1}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP1}, SplitLoad}})
962 .Any({{{UniB512, P1}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP1}, SplitLoad}})
963
964 // local, addrspace(3) - ds_load
965 .Any({{DivS16, P3}, {{Vgpr16}, {VgprP3}}}, usesTrue16)
966 .Any({{DivB32, P3}, {{VgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
967 .Any({{DivB64, P3}, {{VgprB64}, {VgprP3}}})
968 .Any({{DivB96, P3}, {{VgprB96}, {VgprP3}}})
969 .Any({{DivB128, P3}, {{VgprB128}, {VgprP3}}})
970
971 .Any({{UniS16, P3}, {{UniInVgprS16}, {SgprP3}}}, usesTrue16) // 16-bit load
972 .Any({{UniB32, P3}, {{UniInVgprB32}, {VgprP3}}}) // 32-bit load, 8-bit and 16-bit any-extending load
973 .Any({{UniB64, P3}, {{UniInVgprB64}, {VgprP3}}})
974 .Any({{UniB96, P3}, {{UniInVgprB96}, {VgprP3}}})
975 .Any({{UniB128, P3}, {{UniInVgprB128}, {VgprP3}}})
976
977 // constant, addrspace(4)
978 // divergent - global_load
979 .Any({{DivS16, P4}, {{Vgpr16}, {VgprP4}}}, usesTrue16)
980 .Any({{DivB32, P4}, {{VgprB32}, {VgprP4}}}) //32-bit load, 8-bit and 16-bit any-extending load
981 .Any({{DivB64, P4}, {{VgprB64}, {VgprP4}}})
982 .Any({{DivB96, P4}, {{VgprB96}, {VgprP4}}})
983 .Any({{DivB128, P4}, {{VgprB128}, {VgprP4}}})
984 .Any({{DivB256, P4}, {{VgprB256}, {VgprP4}, SplitLoad}})
985 .Any({{DivB512, P4}, {{VgprB512}, {VgprP4}, SplitLoad}})
986
987 // uniform - s_load
988 .Any({{{UniS16, P4}, isNaturalAligned && isUL}, {{Sgpr32Trunc}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
989 .Any({{{UniS16, P4}, isAlign4 && isUL}, {{Sgpr32Trunc}, {SgprP4}, WidenMMOToS32}}, usesTrue16 && !hasSMRDSmall) // s16 load to 32-bit load
990 .Any({{{UniB32, P4}, isNaturalAligned && isUL}, {{SgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
991 .Any({{{UniB32, P4}, is8Or16BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall) //8-bit and 16-bit any-extending load to 32-bit load
992 .Any({{{UniB32, P4}, is32BitMMO && isAlign4 && isUL}, {{SgprB32}, {SgprP4}}}) //32-bit load
993 .Any({{{UniB64, P4}, isAlign4 && isUL}, {{SgprB64}, {SgprP4}}})
994 .Any({{{UniB96, P4}, isAlign16 && isUL}, {{SgprB96}, {SgprP4}, WidenLoad}}, !hasSMRDx3)
995 .Any({{{UniB96, P4}, isAlign4 && !isAlign16 && isUL}, {{SgprB96}, {SgprP4}, SplitLoad}}, !hasSMRDx3)
996 .Any({{{UniB96, P4}, isAlign4 && isUL}, {{SgprB96}, {SgprP4}}}, hasSMRDx3)
997 .Any({{{UniB128, P4}, isAlign4 && isUL}, {{SgprB128}, {SgprP4}}})
998 .Any({{{UniB256, P4}, isAlign4 && isUL}, {{SgprB256}, {SgprP4}}})
999 .Any({{{UniB512, P4}, isAlign4 && isUL}, {{SgprB512}, {SgprP4}}})
1000
1001 // uniform in vgpr - global_load or buffer_load
1002 .Any({{{UniS16, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && hasSMRDSmall) // s16 load
1003 .Any({{{UniS16, P4}, !isAlign4 || !isUL}, {{UniInVgprS16}, {SgprP4}}}, usesTrue16 && !hasSMRDSmall) // s16 load
1004 .Any({{{UniB32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprB32}, {SgprP4}}}, hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1005 .Any({{{UniB32, P4}, !isAlign4 || !isUL}, {{UniInVgprB32}, {SgprP4}}}, !hasSMRDSmall) //32-bit load, 8-bit and 16-bit any-extending load
1006 .Any({{{UniB64, P4}, !isAlign4 || !isUL}, {{UniInVgprB64}, {SgprP4}}})
1007 .Any({{{UniB96, P4}, !isAlign4 || !isUL}, {{UniInVgprB96}, {SgprP4}}})
1008 .Any({{{UniB128, P4}, !isAlign4 || !isUL}, {{UniInVgprB128}, {SgprP4}}})
1009 .Any({{{UniB256, P4}, !isAlign4 || !isUL}, {{UniInVgprB256}, {SgprP4}, SplitLoad}})
1010 .Any({{{UniB512, P4}, !isAlign4 || !isUL}, {{UniInVgprB512}, {SgprP4}, SplitLoad}})
1011
1012 // private, addrspace(5), never uniform - scratch_load
1013 .Any({{DivS16, P5}, {{Vgpr16}, {VgprP5}}}, usesTrue16)
1014 .Any({{DivB32, P5}, {{VgprB32}, {VgprP5}}}) // 32-bit load, 8-bit and 16-bit any-extending load
1015 .Any({{DivB64, P5}, {{VgprB64}, {VgprP5}}})
1016 .Any({{DivB96, P5}, {{VgprB96}, {VgprP5}}})
1017 .Any({{DivB128, P5}, {{VgprB128}, {VgprP5}}})
1018
1019 .Any({{DivS32, Ptr128}, {{Vgpr32}, {VgprPtr128}}});
1020
1021
1022 addRulesForGOpcs({G_ZEXTLOAD, G_SEXTLOAD}) // i8 and i16 zeroextending loads
1023 .Any({{DivS32, P0}, {{Vgpr32}, {VgprP0}}})
1024
1025 .Any({{DivS32, P1}, {{Vgpr32}, {VgprP1}}})
1026 .Any({{{UniS32, P1}, isAlign4 && isUL}, {{Sgpr32}, {SgprP1}, WidenMMOToS32}}, !hasSMRDSmall)
1027 .Any({{{UniS32, P1}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP1}}}, hasSMRDSmall)
1028 .Any({{{UniS32, P1}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP1}}}, !hasSMRDSmall)
1029 .Any({{{UniS32, P1}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP1}}}, hasSMRDSmall)
1030
1031 .Any({{DivS32, P3}, {{Vgpr32}, {VgprP3}}})
1032 .Any({{UniS32, P3}, {{UniInVgprS32}, {VgprP3}}})
1033
1034 .Any({{DivS32, P4}, {{Vgpr32}, {VgprP4}}})
1035 .Any({{{UniS32, P4}, isAlign4 && isUL}, {{Sgpr32}, {SgprP4}, WidenMMOToS32}}, !hasSMRDSmall)
1036 .Any({{{UniS32, P4}, isNaturalAligned && isUL}, {{Sgpr32}, {SgprP4}}}, hasSMRDSmall)
1037 .Any({{{UniS32, P4}, !isAlign4 || !isUL}, {{UniInVgprS32}, {SgprP4}}}, !hasSMRDSmall)
1038 .Any({{{UniS32, P4}, !isNaturalAligned || !isUL}, {{UniInVgprS32}, {SgprP4}}}, hasSMRDSmall)
1039
1040 .Any({{DivS32, P5}, {{Vgpr32}, {VgprP5}}});
1041
1042 addRulesForGOpcs({G_STORE})
1043 // addrspace(0)
1044 .Any({{S16, P0}, {{}, {Vgpr16, VgprP0}}}, usesTrue16) // 16-bit store
1045 .Any({{B32, P0}, {{}, {VgprB32, VgprP0}}}) // 32-bit store, 8-bit and 16-bit truncating store
1046 .Any({{B64, P0}, {{}, {VgprB64, VgprP0}}})
1047 .Any({{B96, P0}, {{}, {VgprB96, VgprP0}}})
1048 .Any({{B128, P0}, {{}, {VgprB128, VgprP0}}})
1049
1050 // addrspace(1), there are no stores to addrspace(4)
1051 // For targets:
1052 // - with "+flat-for-global" - global_store
1053 // - without(-flat-for-global) - buffer_store addr64
1054 .Any({{S16, DivP1}, {{}, {Vgpr16, VgprP1}}}, usesTrue16) // 16-bit store
1055 .Any({{B32, DivP1}, {{}, {VgprB32, VgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1056 .Any({{B64, DivP1}, {{}, {VgprB64, VgprP1}}})
1057 .Any({{B96, DivP1}, {{}, {VgprB96, VgprP1}}})
1058 .Any({{B128, DivP1}, {{}, {VgprB128, VgprP1}}})
1059
1060 // For UniP1, use sgpr ptr to match flat-for-global patterns. Targets:
1061 // - with "+flat-for-global" - global_store for both sgpr and vgpr ptr
1062 // - without(-flat-for-global) - need sgpr ptr to select buffer_store
1063 .Any({{S16, UniP1}, {{}, {Vgpr16, SgprP1}}}, usesTrue16) // 16-bit store
1064 .Any({{B32, UniP1}, {{}, {VgprB32, SgprP1}}}) // 32-bit store, 8-bit and 16-bit truncating store
1065 .Any({{B64, UniP1}, {{}, {VgprB64, SgprP1}}})
1066 .Any({{B96, UniP1}, {{}, {VgprB96, SgprP1}}})
1067 .Any({{B128, UniP1}, {{}, {VgprB128, SgprP1}}})
1068
1069 // addrspace(3) and addrspace(5)
1070 .Any({{S16, Ptr32}, {{}, {Vgpr16, VgprPtr32}}}, usesTrue16) // 16-bit store
1071 .Any({{B32, Ptr32}, {{}, {VgprB32, VgprPtr32}}}) // 32-bit store, 8-bit and 16-bit truncating store
1072 .Any({{B64, Ptr32}, {{}, {VgprB64, VgprPtr32}}})
1073 .Any({{B96, Ptr32}, {{}, {VgprB96, VgprPtr32}}})
1074 .Any({{B128, Ptr32}, {{}, {VgprB128, VgprPtr32}}});
1075
1076 // clang-format on
1077
1078 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD, G_AMDGPU_BUFFER_LOAD_FORMAT,
1079 G_AMDGPU_TBUFFER_LOAD_FORMAT},
1080 StandardB)
1089
1090 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_USHORT, G_AMDGPU_BUFFER_LOAD_UBYTE,
1091 G_AMDGPU_BUFFER_LOAD_SSHORT, G_AMDGPU_BUFFER_LOAD_SBYTE},
1092 StandardB)
1095
1096 addRulesForGOpcs(
1097 {G_AMDGPU_BUFFER_LOAD_UBYTE_TFE, G_AMDGPU_BUFFER_LOAD_USHORT_TFE},
1098 StandardB)
1101
1102 addRulesForGOpcs({G_AMDGPU_BUFFER_LOAD_TFE, G_AMDGPU_BUFFER_LOAD_FORMAT_TFE},
1103 StandardB)
1111 .Any({{UniB160},
1113
1114 addRulesForGOpcs(
1115 {G_AMDGPU_BUFFER_LOAD_FORMAT_D16, G_AMDGPU_TBUFFER_LOAD_FORMAT_D16},
1116 StandardB)
1123
1124 addRulesForGOpcs({G_AMDGPU_BUFFER_STORE, G_AMDGPU_BUFFER_STORE_BYTE,
1125 G_AMDGPU_BUFFER_STORE_SHORT, G_AMDGPU_BUFFER_STORE_FORMAT,
1126 G_AMDGPU_BUFFER_STORE_FORMAT_D16,
1127 G_AMDGPU_TBUFFER_STORE_FORMAT,
1128 G_AMDGPU_TBUFFER_STORE_FORMAT_D16})
1129 .Any({{B32}, {{}, {VgprB32, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1130 .Any({{B64}, {{}, {VgprB64, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1131 .Any({{B96}, {{}, {VgprB96, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}})
1132 .Any({{B128}, {{}, {VgprB128, SgprV4S32_WF, Vgpr32, Vgpr32, Sgpr32_WF}}});
1133
1134 // Buffer atomics: resource descriptor + scalar offset are SGPR, data and
1135 // address components are VGPR.
1136 //
1137 // Operand order (SIInstructions.td BufferAtomicGenericInstruction):
1138 // dst = op vdata, rsrc, vindex, voffset, soffset, offset_imm, cachepolicy,
1139 // idxen_imm
1140 addRulesForGOpcs({G_AMDGPU_BUFFER_ATOMIC_FADD})
1141 .Any({{S32, S32, V4S32, S32, S32, S32},
1143 .Any({{S64, S64, V4S32, S32, S32, S32},
1145 .Any({{V2S16, V2S16, V4S32, S32, S32, S32},
1146 {{VgprV2S16},
1148
1149 addRulesForGOpcs({G_PTR_ADD})
1150 .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
1151 .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
1152 .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
1153 .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
1154
1155 addRulesForGOpcs({G_INTTOPTR})
1156 .Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
1157 .Any({{DivPtr32}, {{VgprPtr32}, {Vgpr32}}})
1158 .Any({{UniPtr64}, {{SgprPtr64}, {Sgpr64}}})
1159 .Any({{DivPtr64}, {{VgprPtr64}, {Vgpr64}}})
1160 .Any({{UniPtr128}, {{SgprPtr128}, {Sgpr128}}})
1161 .Any({{DivPtr128}, {{VgprPtr128}, {Vgpr128}}});
1162
1163 addRulesForGOpcs({G_PTRTOINT})
1164 .Any({{UniS32}, {{Sgpr32}, {SgprPtr32}}})
1165 .Any({{DivS32}, {{Vgpr32}, {VgprPtr32}}})
1166 .Any({{UniS64}, {{Sgpr64}, {SgprPtr64}}})
1167 .Any({{DivS64}, {{Vgpr64}, {VgprPtr64}}})
1168 .Any({{UniS128}, {{Sgpr128}, {SgprPtr128}}})
1169 .Any({{DivS128}, {{Vgpr128}, {VgprPtr128}}});
1170
1171 // FIXME: Update llvm/test/CodeGen/AMDGPU/ptrmask.ll to use GlobalISel.
1172 // Currently crashes on P8 (buffer resource) tests due to legalizer issue.
1173 addRulesForGOpcs({G_PTRMASK})
1174 .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
1175 .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
1176 .Any({{UniP3}, {{SgprP3}, {SgprP3, Sgpr32}}})
1177 .Any({{DivP3}, {{VgprP3}, {VgprP3, Vgpr32}}});
1178
1179 addRulesForGOpcs({G_ABS}, Standard).Uni(S16, {{Sgpr32Trunc}, {Sgpr32SExt}});
1180
1181 addRulesForGOpcs({G_BITREVERSE}, Standard)
1182 .Uni(S32, {{Sgpr32}, {Sgpr32}})
1183 .Div(S32, {{Vgpr32}, {Vgpr32}})
1184 .Uni(S64, {{Sgpr64}, {Sgpr64}})
1185 .Div(S64, {{Vgpr64}, {Vgpr64}});
1186
1187 addRulesForGOpcs({G_FENCE}).Any({{{}}, {{}, {}}});
1188
1189 addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard)
1190 .Uni(S64, {{Sgpr64}, {}});
1191
1192 addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}});
1193
1194 addRulesForGOpcs({G_GLOBAL_VALUE})
1195 .Any({{UniP0}, {{SgprP0}, {}}})
1196 .Any({{UniP1}, {{SgprP1}, {}}})
1197 .Any({{UniP3}, {{SgprP3}, {}}})
1198 .Any({{UniP4}, {{SgprP4}, {}}})
1199 .Any({{UniP8}, {{SgprP8}, {}}});
1200
1201 addRulesForGOpcs({G_AMDGPU_WAVE_ADDRESS}).Any({{UniP5}, {{SgprP5}, {}}});
1202
1203 addRulesForGOpcs({G_SI_CALL})
1204 .Any({{_, UniP0}, {{None}, {SgprP0}}})
1205 .Any({{_, DivP0}, {{None}, {SgprP0Call_WF}}})
1206 .Any({{_, UniP4}, {{None}, {SgprP4}}})
1207 .Any({{_, DivP4}, {{None}, {SgprP4Call_WF}}});
1208
1209 bool hasSALUFloat = ST->hasSALUFloatInsts();
1210
1211 addRulesForGOpcs({G_FADD, G_FMUL, G_STRICT_FADD, G_STRICT_FMUL}, Standard)
1212 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1213 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1214 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1215 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1216 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat)
1217 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1218 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1219 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1220 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16, VgprV2S16}}, !hasSALUFloat)
1222 hasSALUFloat)
1223 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1224
1225 addRulesForGOpcs({G_FSUB, G_STRICT_FSUB}, Standard)
1226 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1227 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1228 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1229 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1230 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1231 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1232
1233 addRulesForGOpcs({G_FMAD}, Standard)
1234 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1235 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1236 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1237 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1238
1239 addRulesForGOpcs({G_FLDEXP, G_STRICT_FLDEXP}, Standard)
1240 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1241 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1242 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}})
1243 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1244 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr32}})
1245 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr32}});
1246
1247 addRulesForGOpcs({G_FMA, G_STRICT_FMA}, Standard)
1248 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1249 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}})
1250 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64, Vgpr64}})
1251 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64, Vgpr64}})
1255 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16, Sgpr16}}, hasSALUFloat)
1256 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}}, !hasSALUFloat)
1257 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32, Sgpr32}}, hasSALUFloat)
1258 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}}, !hasSALUFloat)
1259 .Uni(V2S16,
1261 hasSALUFloat)
1263 !hasSALUFloat);
1264
1265 addRulesForGOpcs({G_AMDGPU_FMED3}, Standard)
1266 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16, Vgpr16}})
1267 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16, Vgpr16}})
1268 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1269 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1270
1271 // TODO: This opcode is generated from the i64->i16 signed clamped pattern in
1272 // the PreLegalizerCombiner. Move the combine to RegBankCombiner to keep more
1273 // instructions on SALU.
1274 addRulesForGOpcs({G_AMDGPU_SMED3}, Standard)
1275 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32, Vgpr32}})
1276 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32, Vgpr32}});
1277
1278 // FNEG and FABS are either folded as source modifiers or can be selected as
1279 // bitwise XOR and AND with Mask. XOR and AND are available on SALU but for
1280 // targets without SALU float we still select them as VGPR since there would
1281 // be no real sgpr use.
1282 addRulesForGOpcs({G_FNEG, G_FABS}, Standard)
1283 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasSALUFloat)
1284 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasSALUFloat)
1285 .Div(S16, {{Vgpr16}, {Vgpr16}})
1286 .Uni(S32, {{UniInVgprS32}, {Vgpr32}}, !hasSALUFloat)
1287 .Uni(S32, {{Sgpr32}, {Sgpr32}}, hasSALUFloat)
1288 .Div(S32, {{Vgpr32}, {Vgpr32}})
1289 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1290 .Div(S64, {{Vgpr64}, {Vgpr64}})
1291 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}}, !hasSALUFloat)
1292 .Uni(V2S16, {{SgprV2S16}, {SgprV2S16}, ScalarizeToS16}, hasSALUFloat)
1293 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1294 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1295 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1296
1297 addRulesForGOpcs({G_FCANONICALIZE}, Standard)
1298 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1299 .Div(S32, {{Vgpr32}, {Vgpr32}})
1300 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1301 .Div(S16, {{Vgpr16}, {Vgpr16}})
1302 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1303 .Div(S64, {{Vgpr64}, {Vgpr64}})
1304 .Uni(V2S16, {{UniInVgprV2S16}, {VgprV2S16}})
1305 .Div(V2S16, {{VgprV2S16}, {VgprV2S16}})
1306 .Any({{UniV2S32}, {{UniInVgprV2S32}, {VgprV2S32}}})
1307 .Any({{DivV2S32}, {{VgprV2S32}, {VgprV2S32}}});
1308
1309 bool hasPST = ST->hasPseudoScalarTrans();
1310 addRulesForGOpcs({G_FSQRT}, Standard)
1311 .Div(S16, {{Vgpr16}, {Vgpr16}})
1312 .Uni(S16, {{Sgpr16}, {Sgpr16}}, hasPST)
1313 .Uni(S16, {{UniInVgprS16}, {Vgpr16}}, !hasPST);
1314
1315 addRulesForGOpcs({G_FPTOUI, G_FPTOSI})
1316 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1317 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1318 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1319 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat)
1320 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1321 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1322 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1323 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1324 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1325 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}});
1326
1327 addRulesForGOpcs({G_UITOFP, G_SITOFP})
1328 .Any({{UniS16, S16}, {{UniInVgprS16}, {Vgpr16}}})
1329 .Any({{DivS16, S16}, {{Vgpr16}, {Vgpr16}}})
1330 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1331 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat)
1332 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1333 .Any({{UniS32, S32}, {{Sgpr32}, {Sgpr32}}}, hasSALUFloat)
1334 .Any({{UniS32, S32}, {{UniInVgprS32}, {Vgpr32}}}, !hasSALUFloat)
1335 .Any({{DivS32, S32}, {{Vgpr32}, {Vgpr32}}})
1336 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1337 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}});
1338
1339 addRulesForGOpcs({G_FPEXT})
1340 .Any({{DivS32, S16}, {{Vgpr32}, {Vgpr16}}})
1341 .Any({{UniS64, S32}, {{UniInVgprS64}, {Vgpr32}}})
1342 .Any({{DivS64, S32}, {{Vgpr64}, {Vgpr32}}})
1343 .Any({{UniS32, S16}, {{Sgpr32}, {Sgpr16}}}, hasSALUFloat)
1344 .Any({{UniS32, S16}, {{UniInVgprS32}, {Vgpr16}}}, !hasSALUFloat);
1345
1346 addRulesForGOpcs({G_AMDGPU_CVT_PK_I16_I32}, Standard)
1347 .Uni(V2S16, {{UniInVgprV2S16}, {Vgpr32, Vgpr32}})
1348 .Div(V2S16, {{VgprV2S16}, {Vgpr32, Vgpr32}});
1349
1350 addRulesForGOpcs({G_AMDGPU_FMIN_LEGACY, G_AMDGPU_FMAX_LEGACY}, Standard)
1351 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}})
1352 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}});
1353
1354 addRulesForGOpcs({G_FMINIMUM, G_FMAXIMUM}, Standard)
1355 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}})
1356 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1357 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}})
1358 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1359 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1360 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1362 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}});
1363
1364 addRulesForGOpcs({G_FMINNUM_IEEE, G_FMAXNUM_IEEE, G_FMINNUM, G_FMAXNUM},
1365 Standard)
1366 .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}})
1367 .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}})
1368 .Uni(S64, {{UniInVgprS64}, {Vgpr64, Vgpr64}})
1369 .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}})
1371 .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}})
1372 .Uni(S16, {{Sgpr16}, {Sgpr16, Sgpr16}}, hasSALUFloat)
1373 .Uni(S16, {{UniInVgprS16}, {Vgpr16, Vgpr16}}, !hasSALUFloat)
1374 .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}, hasSALUFloat)
1375 .Uni(S32, {{UniInVgprS32}, {Vgpr32, Vgpr32}}, !hasSALUFloat);
1376
1377 addRulesForGOpcs({G_FPTRUNC})
1378 .Any({{DivS16, S32}, {{Vgpr16}, {Vgpr32}}})
1379 .Any({{UniS32, S64}, {{UniInVgprS32}, {Vgpr64}}})
1380 .Any({{DivS32, S64}, {{Vgpr32}, {Vgpr64}}})
1382 .Any({{DivV2S16, V2S32}, {{VgprV2S16}, {VgprV2S32}}})
1383 .Any({{UniS16, S32}, {{Sgpr16}, {Sgpr32}}}, hasSALUFloat)
1384 .Any({{UniS16, S32}, {{UniInVgprS16}, {Vgpr32}}}, !hasSALUFloat);
1385
1386 addRulesForGOpcs({G_IS_FPCLASS})
1387 .Any({{DivS1, S16}, {{Vcc}, {Vgpr16}}})
1388 .Any({{UniS1, S16}, {{UniInVcc}, {Vgpr16}}})
1389 .Any({{DivS1, S32}, {{Vcc}, {Vgpr32}}})
1390 .Any({{UniS1, S32}, {{UniInVcc}, {Vgpr32}}})
1391 .Any({{DivS1, S64}, {{Vcc}, {Vgpr64}}})
1392 .Any({{UniS1, S64}, {{UniInVcc}, {Vgpr64}}});
1393
1394 addRulesForGOpcs({G_FCMP}, Standard)
1395 .Any({{UniS1, _, S16}, {{Sgpr32Trunc}, {None, Sgpr16, Sgpr16}}},
1396 hasSALUFloat)
1397 .Any({{UniS1, _, S16}, {{UniInVcc}, {None, Vgpr16, Vgpr16}}},
1398 !hasSALUFloat)
1399 .Any({{DivS1, _, S16}, {{Vcc}, {None, Vgpr16, Vgpr16}}})
1400 .Any({{UniS1, _, S32}, {{Sgpr32Trunc}, {None, Sgpr32, Sgpr32}}},
1401 hasSALUFloat)
1402 .Any({{UniS1, _, S32}, {{UniInVcc}, {None, Vgpr32, Vgpr32}}},
1403 !hasSALUFloat)
1404 .Any({{DivS1, _, S32}, {{Vcc}, {None, Vgpr32, Vgpr32}}})
1405 .Any({{UniS1, _, S64}, {{UniInVcc}, {None, Vgpr64, Vgpr64}}})
1406 .Any({{DivS1, _, S64}, {{Vcc}, {None, Vgpr64, Vgpr64}}});
1407
1408 addRulesForGOpcs({G_INTRINSIC_TRUNC, G_INTRINSIC_ROUNDEVEN, G_FFLOOR, G_FCEIL,
1409 G_FEXP2, G_FLOG2},
1410 Standard)
1411 .Uni(S16, {{UniInVgprS16}, {Vgpr16}})
1412 .Div(S16, {{Vgpr16}, {Vgpr16}})
1413 .Uni(S32, {{UniInVgprS32}, {Vgpr32}})
1414 .Div(S32, {{Vgpr32}, {Vgpr32}})
1415 .Uni(S64, {{UniInVgprS64}, {Vgpr64}})
1416 .Div(S64, {{Vgpr64}, {Vgpr64}});
1417
1418 using namespace Intrinsic;
1419
1420 addRulesForIOpcs({amdgcn_s_getpc}).Any({{UniS64, _}, {{Sgpr64}, {None}}});
1421
1422 addRulesForIOpcs({amdgcn_groupstaticsize}).Any({{S32}, {{Sgpr32}, {IntrId}}});
1423
1424 // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir.
1425 addRulesForIOpcs({amdgcn_end_cf})
1426 .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}})
1427 .Any({{_, UniS64}, {{}, {IntrId, Sgpr64}}});
1428
1429 addRulesForIOpcs({amdgcn_if_break}, Standard)
1430 .Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
1431 .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
1432
1433 addRulesForIOpcs({amdgcn_exp})
1434 .Any({{_, _, _, S32, S32, S32, S32},
1435 {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
1436
1437 addRulesForIOpcs({amdgcn_exp_row})
1438 .Any({{_, _, _, S32, S32, S32, S32, _, S32},
1439 {{},
1441 SgprB32_M0}}});
1442
1443 addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
1444 .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});
1445
1446 addRulesForIOpcs({amdgcn_readfirstlane})
1447 .Any({{UniS32, _, DivS32}, {{}, {Sgpr32, None, Vgpr32}}})
1448 // this should not exist in the first place, it is from call lowering
1449 // readfirstlaning just in case register is not in sgpr.
1450 .Any({{UniS32, _, UniS32}, {{}, {Sgpr32, None, Vgpr32}}});
1451
1452 addRulesForIOpcs({amdgcn_s_sleep}).Any({{_, _}, {{}, {IntrId, Imm}}});
1453
1454 addRulesForIOpcs({amdgcn_bitop3}, Standard)
1455 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1456 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16, Vgpr16, Vgpr16}})
1457 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1458 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1459
1460 addRulesForIOpcs({amdgcn_mul_u24, amdgcn_mul_i24}, Standard)
1461 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1462 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}})
1463 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr32, Vgpr32}})
1464 .Div(S64, {{Vgpr64}, {IntrId, Vgpr32, Vgpr32}});
1465
1466 addRulesForIOpcs({amdgcn_mulhi_u24, amdgcn_mulhi_i24, amdgcn_fmul_legacy},
1467 Standard)
1468 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32}})
1469 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32}});
1470
1471 addRulesForIOpcs({amdgcn_fma_legacy}, Standard)
1472 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1473 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}});
1474
1475 addRulesForIOpcs({amdgcn_frexp_mant, amdgcn_fract}, Standard)
1476 .Uni(S16, {{UniInVgprS16}, {IntrId, Vgpr16}})
1477 .Div(S16, {{Vgpr16}, {IntrId, Vgpr16}})
1478 .Uni(S32, {{UniInVgprS32}, {IntrId, Vgpr32}})
1479 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}})
1480 .Uni(S64, {{UniInVgprS64}, {IntrId, Vgpr64}})
1481 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64}});
1482
1483 addRulesForIOpcs({amdgcn_prng_b32})
1484 .Any({{UniS32}, {{UniInVgprS32}, {IntrId, Vgpr32}}})
1485 .Any({{DivS32}, {{Vgpr32}, {IntrId, Vgpr32}}});
1486
1487 addRulesForIOpcs({amdgcn_sffbh}, Standard)
1488 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32}})
1489 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32}});
1490
1491 addRulesForIOpcs({amdgcn_ubfe, amdgcn_sbfe}, Standard)
1492 .Div(S32, {{Vgpr32}, {IntrId, Vgpr32, Vgpr32, Vgpr32}})
1493 .Uni(S32, {{Sgpr32}, {IntrId, Sgpr32, Sgpr32, Sgpr32}, S_BFE})
1494 .Uni(S64, {{Sgpr64}, {IntrId, Sgpr64, Sgpr32, Sgpr32}, S_BFE})
1495 .Div(S64, {{Vgpr64}, {IntrId, Vgpr64, Vgpr32, Vgpr32}, V_BFE});
1496
1497 addRulesForIOpcs({amdgcn_global_load_tr_b64})
1498 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1499 .Any({{DivB32}, {{VgprB32}, {IntrId, SgprP1}}});
1500
1501 addRulesForIOpcs({amdgcn_global_load_tr_b128})
1502 .Any({{DivB64}, {{VgprB64}, {IntrId, SgprP1}}})
1503 .Any({{DivB128}, {{VgprB128}, {IntrId, SgprP1}}});
1504
1505 addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
1506 .Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
1507
1508 addRulesForIOpcs(
1509 {amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, Standard)
1510 .Div(S32, {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
1511
1512 addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
1513 Standard)
1514 .Div(S32, {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
1515
1516 addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
1517 .Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
1518
1519 addRulesForIOpcs({amdgcn_struct_buffer_load_lds})
1520 .Any({{_},
1521 {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1522
1523 addRulesForIOpcs({amdgcn_raw_ptr_buffer_load_lds})
1524 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Sgpr32}}});
1525
1526 addRulesForIOpcs({amdgcn_struct_ptr_buffer_load_lds})
1527 .Any({{_}, {{}, {IntrId, SgprP8, SgprP3, Imm, Vgpr32, Vgpr32, Sgpr32}}});
1528
1529 addRulesForIOpcs({amdgcn_wwm, amdgcn_strict_wwm}, StandardB)
1530 .Div(B32, {{VgprB32}, {IntrId, VgprB32}})
1531 .Uni(B32, {{SgprB32}, {IntrId, SgprB32}})
1532 .Div(B64, {{VgprB64}, {IntrId, VgprB64}})
1533 .Uni(B64, {{SgprB64}, {IntrId, SgprB64}})
1534 .Div(B96, {{VgprB96}, {IntrId, VgprB96}})
1535 .Uni(B96, {{SgprB96}, {IntrId, SgprB96}})
1536 .Div(B128, {{VgprB128}, {IntrId, VgprB128}})
1537 .Uni(B128, {{SgprB128}, {IntrId, SgprB128}})
1538 .Any({{UniB256}, {{SgprB256}, {IntrId, SgprB256}}})
1539 .Any({{DivB256}, {{VgprB256}, {IntrId, VgprB256}}})
1540 .Any({{UniB512}, {{SgprB512}, {IntrId, SgprB512}}})
1541 .Any({{DivB512}, {{VgprB512}, {IntrId, VgprB512}}});
1542
1543} // end initialize rules
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
Contains the definition of a TargetInstrInfo class that is common to all AMD GPUs.
constexpr LLT S16
constexpr LLT S1
constexpr LLT V2S16
constexpr LLT S32
constexpr LLT V4S32
constexpr LLT V3S32
constexpr LLT S64
constexpr LLT V2S32
constexpr LLT S128
UniformityLLTOpPredicateID LLTToBId(LLT Ty)
bool matchUniformityAndLLT(Register Reg, UniformityLLTOpPredicateID UniID, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI)
UniformityLLTOpPredicateID LLTToId(LLT Ty)
AMD GCN specific subclass of TargetSubtarget.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
Register Reg
Register const TargetRegisterInfo * TRI
Machine IR instance of the generic uniformity analysis.
bool operator()(const MachineInstr &MI) const
Predicate operator||(const Predicate &RHS) const
Predicate operator&&(const Predicate &RHS) const
Predicate(std::function< bool(const MachineInstr &)> Pred)
Predicate operator!() const
RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI)
const SetOfRulesForOpcode * getRulesForOpc(MachineInstr &MI) const
const RegBankLLTMapping * findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineUniformityInfo &MUI) const
void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
void addFastRuleUniform(UniformityLLTOpPredicateID Ty, RegBankLLTMapping RuleApplyIDs)
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
bool isSigned() const
Definition InstrTypes.h:930
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
bool isEquality() const
Return true if this predicate is either EQ or NE.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
TypeSize getValue() const
Representation of each machine instruction.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
unsigned getAddrSpace() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const TargetRegisterInfo * getTargetRegisterInfo() const
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void swap(SmallVectorImpl &RHS)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isAnyPtr(LLT Ty, unsigned Width)
bool isUniformMMO(const MachineMemOperand *MMO)
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
GenericUniformityInfo< MachineSSAContext > MachineUniformityInfo
static const MachineMemOperand::Flags MONoClobber
Mark the MMO of a uniform load if there are no potentially clobbering stores on any path from the sta...
Definition SIInstrInfo.h:44
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
SmallVector< UniformityLLTOpPredicateID, 4 > OpUniformityAndTypes
PredicateMapping(std::initializer_list< UniformityLLTOpPredicateID > OpList, std::function< bool(const MachineInstr &)> TestFunc=nullptr)
bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, const MachineRegisterInfo &MRI) const
std::function< bool(const MachineInstr &)> TestFunc
RegBankLLTMapping(std::initializer_list< RegBankLLTMappingApplyID > DstOpMappingList, std::initializer_list< RegBankLLTMappingApplyID > SrcOpMappingList, LoweringMethodID LoweringMethod=DoNotLower)
SmallVector< RegBankLLTMappingApplyID, 2 > DstOpMapping
SmallVector< RegBankLLTMappingApplyID, 4 > SrcOpMapping
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39