LLVM 23.0.0git
AArch64ISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines an instruction selector for the AArch64 target.
10//
11//===----------------------------------------------------------------------===//
12
16#include "llvm/ADT/APSInt.h"
19#include "llvm/IR/Function.h" // To access function attributes.
20#include "llvm/IR/GlobalValue.h"
21#include "llvm/IR/Intrinsics.h"
22#include "llvm/IR/IntrinsicsAArch64.h"
23#include "llvm/Support/Debug.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-isel"
32#define PASS_NAME "AArch64 Instruction Selection"
33
34// https://github.com/llvm/llvm-project/issues/114425
35#if defined(_MSC_VER) && !defined(__clang__) && !defined(NDEBUG)
36#pragma inline_depth(0)
37#endif
38
39//===--------------------------------------------------------------------===//
40/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
41/// instructions for SelectionDAG operations.
42///
43namespace {
44
45class AArch64DAGToDAGISel : public SelectionDAGISel {
46
47 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
48 /// make the right decision when generating code for different targets.
49 const AArch64Subtarget *Subtarget;
50
51public:
52 AArch64DAGToDAGISel() = delete;
53
54 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
55 CodeGenOptLevel OptLevel)
56 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
57
58 bool runOnMachineFunction(MachineFunction &MF) override {
59 Subtarget = &MF.getSubtarget<AArch64Subtarget>();
61 }
62
63 void Select(SDNode *Node) override;
64 void PreprocessISelDAG() override;
65
66 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
67 /// inline asm expressions.
68 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
69 InlineAsm::ConstraintCode ConstraintID,
70 std::vector<SDValue> &OutOps) override;
71
72 template <signed Low, signed High, signed Scale>
73 bool SelectRDVLImm(SDValue N, SDValue &Imm);
74
75 template <signed Low, signed High>
76 bool SelectRDSVLShiftImm(SDValue N, SDValue &Imm);
77
78 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
79 bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
80 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
81 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
82 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
83 return SelectShiftedRegister(N, false, Reg, Shift);
84 }
85 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
86 return SelectShiftedRegister(N, true, Reg, Shift);
87 }
88 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
89 return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
90 }
91 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
92 return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
93 }
94 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
95 return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
96 }
97 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
98 return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
99 }
100 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
101 return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
102 }
103 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
104 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
105 }
106 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
107 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
108 }
109 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
110 return SelectAddrModeIndexed(N, 1, Base, OffImm);
111 }
112 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
113 return SelectAddrModeIndexed(N, 2, Base, OffImm);
114 }
115 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
116 return SelectAddrModeIndexed(N, 4, Base, OffImm);
117 }
118 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
119 return SelectAddrModeIndexed(N, 8, Base, OffImm);
120 }
121 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
122 return SelectAddrModeIndexed(N, 16, Base, OffImm);
123 }
124 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
125 return SelectAddrModeUnscaled(N, 1, Base, OffImm);
126 }
127 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
128 return SelectAddrModeUnscaled(N, 2, Base, OffImm);
129 }
130 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
131 return SelectAddrModeUnscaled(N, 4, Base, OffImm);
132 }
133 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
134 return SelectAddrModeUnscaled(N, 8, Base, OffImm);
135 }
136 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
137 return SelectAddrModeUnscaled(N, 16, Base, OffImm);
138 }
139 template <unsigned Size, unsigned Max>
140 bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
141 // Test if there is an appropriate addressing mode and check if the
142 // immediate fits.
143 bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
144 if (Found) {
145 if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
146 int64_t C = CI->getSExtValue();
147 if (C <= Max)
148 return true;
149 }
150 }
151
152 // Otherwise, base only, materialize address in register.
153 Base = N;
154 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
155 return true;
156 }
157
158 template<int Width>
159 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
160 SDValue &SignExtend, SDValue &DoShift) {
161 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
162 }
163
164 template<int Width>
165 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
166 SDValue &SignExtend, SDValue &DoShift) {
167 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
168 }
169
170 bool SelectExtractHigh(SDValue N, SDValue &Res) {
171 if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
172 N = N->getOperand(0);
173 if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
174 !isa<ConstantSDNode>(N->getOperand(1)))
175 return false;
176 EVT VT = N->getValueType(0);
177 EVT LVT = N->getOperand(0).getValueType();
178 unsigned Index = N->getConstantOperandVal(1);
179 if (!VT.is64BitVector() || !LVT.is128BitVector() ||
180 Index != VT.getVectorNumElements())
181 return false;
182 Res = N->getOperand(0);
183 return true;
184 }
185
186 bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
187 if (N.getOpcode() != AArch64ISD::VLSHR)
188 return false;
189 SDValue Op = N->getOperand(0);
190 EVT VT = Op.getValueType();
191 unsigned ShtAmt = N->getConstantOperandVal(1);
192 if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
193 return false;
194
195 APInt Imm;
196 if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
197 Imm = APInt(VT.getScalarSizeInBits(),
198 Op.getOperand(1).getConstantOperandVal(0)
199 << Op.getOperand(1).getConstantOperandVal(1));
200 else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
201 isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
202 Imm = APInt(VT.getScalarSizeInBits(),
203 Op.getOperand(1).getConstantOperandVal(0));
204 else
205 return false;
206
207 if (Imm != 1ULL << (ShtAmt - 1))
208 return false;
209
210 Res1 = Op.getOperand(0);
211 Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
212 return true;
213 }
214
215 bool SelectDupZeroOrUndef(SDValue N) {
216 switch(N->getOpcode()) {
217 case ISD::UNDEF:
218 return true;
219 case AArch64ISD::DUP:
220 case ISD::SPLAT_VECTOR: {
221 auto Opnd0 = N->getOperand(0);
222 if (isNullConstant(Opnd0))
223 return true;
224 if (isNullFPConstant(Opnd0))
225 return true;
226 break;
227 }
228 default:
229 break;
230 }
231
232 return false;
233 }
234
235 bool SelectAny(SDValue) { return true; }
236
237 bool SelectDupZero(SDValue N) {
238 switch(N->getOpcode()) {
239 case AArch64ISD::DUP:
240 case ISD::SPLAT_VECTOR: {
241 auto Opnd0 = N->getOperand(0);
242 if (isNullConstant(Opnd0))
243 return true;
244 if (isNullFPConstant(Opnd0))
245 return true;
246 break;
247 }
248 }
249
250 return false;
251 }
252
253 template <MVT::SimpleValueType VT, bool Negate>
254 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
255 return SelectSVEAddSubImm(N, VT, Imm, Shift, Negate);
256 }
257
258 template <MVT::SimpleValueType VT, bool Negate>
259 bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
260 return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
261 }
262
263 template <MVT::SimpleValueType VT>
264 bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
265 return SelectSVECpyDupImm(N, VT, Imm, Shift);
266 }
267
268 template <MVT::SimpleValueType VT, bool Invert = false>
269 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
270 return SelectSVELogicalImm(N, VT, Imm, Invert);
271 }
272
273 template <MVT::SimpleValueType VT>
274 bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
275 return SelectSVEArithImm(N, VT, Imm);
276 }
277
278 template <unsigned Low, unsigned High, bool AllowSaturation = false>
279 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
280 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
281 }
282
283 bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
284 if (N->getOpcode() != ISD::SPLAT_VECTOR)
285 return false;
286
287 EVT EltVT = N->getValueType(0).getVectorElementType();
288 return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
289 /* High */ EltVT.getFixedSizeInBits(),
290 /* AllowSaturation */ true, Imm);
291 }
292
293 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
294 template<signed Min, signed Max, signed Scale, bool Shift>
295 bool SelectCntImm(SDValue N, SDValue &Imm) {
297 return false;
298
299 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
300 if (Shift)
301 MulImm = 1LL << MulImm;
302
303 if ((MulImm % std::abs(Scale)) != 0)
304 return false;
305
306 MulImm /= Scale;
307 if ((MulImm >= Min) && (MulImm <= Max)) {
308 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
309 return true;
310 }
311
312 return false;
313 }
314
315 template <signed Max, signed Scale>
316 bool SelectEXTImm(SDValue N, SDValue &Imm) {
318 return false;
319
320 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
321
322 if (MulImm >= 0 && MulImm <= Max) {
323 MulImm *= Scale;
324 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
325 return true;
326 }
327
328 return false;
329 }
330
331 template <unsigned BaseReg, unsigned Max>
332 bool ImmToReg(SDValue N, SDValue &Imm) {
333 if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
334 uint64_t C = CI->getZExtValue();
335
336 if (C > Max)
337 return false;
338
339 Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
340 return true;
341 }
342 return false;
343 }
344
345 /// Form sequences of consecutive 64/128-bit registers for use in NEON
346 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
347 /// between 1 and 4 elements. If it contains a single element that is returned
348 /// unchanged; otherwise a REG_SEQUENCE value is returned.
351 // Form a sequence of SVE registers for instructions using list of vectors,
352 // e.g. structured loads and stores (ldN, stN).
353 SDValue createZTuple(ArrayRef<SDValue> Vecs);
354
355 // Similar to above, except the register must start at a multiple of the
356 // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
357 SDValue createZMulTuple(ArrayRef<SDValue> Regs);
358
359 /// Generic helper for the createDTuple/createQTuple
360 /// functions. Those should almost always be called instead.
361 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
362 const unsigned SubRegs[]);
363
364 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
365
366 bool tryIndexedLoad(SDNode *N);
367
368 void SelectPtrauthAuth(SDNode *N);
369 void SelectPtrauthResign(SDNode *N);
370
371 bool trySelectStackSlotTagP(SDNode *N);
372 void SelectTagP(SDNode *N);
373
374 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
375 unsigned SubRegIdx);
376 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
377 unsigned SubRegIdx);
378 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
379 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
381 unsigned Opc_rr, unsigned Opc_ri,
382 bool IsIntr = false);
383 void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
384 unsigned Scale, unsigned Opc_ri,
385 unsigned Opc_rr);
386 void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
387 bool IsZmMulti, unsigned Opcode,
388 bool HasPred = false);
389 void SelectPExtPair(SDNode *N, unsigned Opc);
390 void SelectWhilePair(SDNode *N, unsigned Opc);
391 void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
392 void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
393 void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394 void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395 bool IsTupleInput, unsigned Opc);
396 void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397
398 template <unsigned MaxIdx, unsigned Scale>
399 void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400 unsigned Op);
401 void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402 unsigned Op, unsigned MaxIdx, unsigned Scale,
403 unsigned BaseReg = 0);
404 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405 /// SVE Reg+Imm addressing mode.
406 template <int64_t Min, int64_t Max>
407 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408 SDValue &OffImm);
409 /// SVE Reg+Reg address mode.
410 template <unsigned Scale>
411 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412 return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413 }
414
415 void SelectMultiVectorLutiLane(SDNode *Node, unsigned NumOutVecs,
416 unsigned Opc, uint32_t MaxImm);
417
418 void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc);
419
420 template <unsigned MaxIdx, unsigned Scale>
421 bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
422 return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
423 }
424
425 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
426 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
427 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
428 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
429 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
430 unsigned Opc_rr, unsigned Opc_ri);
431 std::tuple<unsigned, SDValue, SDValue>
432 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
433 const SDValue &OldBase, const SDValue &OldOffset,
434 unsigned Scale);
435
436 bool tryBitfieldExtractOp(SDNode *N);
437 bool tryBitfieldExtractOpFromSExt(SDNode *N);
438 bool tryBitfieldInsertOp(SDNode *N);
439 bool tryBitfieldInsertInZeroOp(SDNode *N);
440 bool tryShiftAmountMod(SDNode *N);
441
442 bool tryReadRegister(SDNode *N);
443 bool tryWriteRegister(SDNode *N);
444
445 bool trySelectCastFixedLengthToScalableVector(SDNode *N);
446 bool trySelectCastScalableToFixedLengthVector(SDNode *N);
447
448 bool trySelectXAR(SDNode *N);
449
450 SDValue tryFoldCselToFMaxMin(SDNode &N);
451
452// Include the pieces autogenerated from the target description.
453#include "AArch64GenDAGISel.inc"
454
455private:
456 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
457 SDValue &Shift);
458 bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
459 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
460 SDValue &OffImm) {
461 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
462 }
463 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
464 unsigned Size, SDValue &Base,
465 SDValue &OffImm);
466 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
467 SDValue &OffImm);
468 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
469 SDValue &OffImm);
470 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
471 SDValue &Offset, SDValue &SignExtend,
472 SDValue &DoShift);
473 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
474 SDValue &Offset, SDValue &SignExtend,
475 SDValue &DoShift);
476 bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
477 bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
478 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
479 SDValue &Offset, SDValue &SignExtend);
480
481 template<unsigned RegWidth>
482 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
483 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
484 }
485 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
486
487 template <unsigned RegWidth>
488 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos) {
489 return SelectCVTFixedPointVec(N, FixedPos, RegWidth);
490 }
491 bool SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos, unsigned Width);
492
493 template<unsigned RegWidth>
494 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
495 return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
496 }
497
498 bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
499 unsigned Width);
500
501 template <unsigned FloatWidth>
502 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos) {
503 return SelectCVTFixedPosRecipOperandVec(N, FixedPos, FloatWidth);
504 }
505
506 bool SelectCVTFixedPosRecipOperandVec(SDValue N, SDValue &FixedPos,
507 unsigned Width);
508
509 bool SelectCMP_SWAP(SDNode *N);
510
511 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
512 bool Negate);
513 bool SelectSVEAddSubImm(SDLoc DL, APInt Value, MVT VT, SDValue &Imm,
514 SDValue &Shift, bool Negate);
515 bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
516 bool Negate);
517 bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
518 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
519
520 // Match `<NEON Splat> SVEImm` (where <NEON Splat> could be fmov, movi, etc).
521 bool SelectNEONSplatOfSVELogicalImm(SDValue N, SDValue &Imm);
522 bool SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift);
523 bool SelectNEONSplatOfSVEArithSImm(SDValue N, SDValue &Imm);
524
525 bool SelectSVESignedArithImm(SDLoc DL, APInt Value, SDValue &Imm);
526 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
527 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
528 bool AllowSaturation, SDValue &Imm);
529
530 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
531 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
532 SDValue &Offset);
533 bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
534 SDValue &Offset, unsigned Scale = 1);
535
536 bool SelectAllActivePredicate(SDValue N);
537 bool SelectAnyPredicate(SDValue N);
538
539 bool SelectCmpBranchUImm6Operand(SDNode *P, SDValue N, SDValue &Imm);
540
541 template <bool MatchCBB>
542 bool SelectCmpBranchExtOperand(SDValue N, SDValue &Reg, SDValue &ExtType);
543};
544
545class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
546public:
547 static char ID;
548 explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
549 CodeGenOptLevel OptLevel)
551 ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
552};
553} // end anonymous namespace
554
555char AArch64DAGToDAGISelLegacy::ID = 0;
556
557INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
558
559/// addBitcastHints - This method adds bitcast hints to the operands of a node
560/// to help instruction selector determine which operands are in Neon registers.
562 SDLoc DL(&N);
563 auto getFloatVT = [&](EVT VT) {
564 EVT ScalarVT = VT.getScalarType();
565 assert((ScalarVT == MVT::i32 || ScalarVT == MVT::i64) && "Unexpected VT");
566 return VT.changeElementType(*(DAG.getContext()),
567 ScalarVT == MVT::i32 ? MVT::f32 : MVT::f64);
568 };
570 NewOps.reserve(N.getNumOperands());
571
572 for (unsigned I = 0, E = N.getNumOperands(); I < E; ++I) {
573 auto bitcasted = DAG.getBitcast(getFloatVT(N.getOperand(I).getValueType()),
574 N.getOperand(I));
575 NewOps.push_back(bitcasted);
576 }
577 EVT OrigVT = N.getValueType(0);
578 SDValue OpNode = DAG.getNode(N.getOpcode(), DL, getFloatVT(OrigVT), NewOps);
579 return DAG.getBitcast(OrigVT, OpNode);
580}
581
582/// isIntImmediate - This method tests to see if the node is a constant
583/// operand. If so Imm will receive the 64-bit value.
584static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
586 Imm = C->getZExtValue();
587 return true;
588 }
589 return false;
590}
591
592// isIntImmediate - This method tests to see if a constant operand.
593// If so Imm will receive the value.
594static bool isIntImmediate(SDValue N, uint64_t &Imm) {
595 return isIntImmediate(N.getNode(), Imm);
596}
597
598// isOpcWithIntImmediate - This method tests to see if the node is a specific
599// opcode and that it has a immediate integer right operand.
600// If so Imm will receive the 32 bit value.
601static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
602 uint64_t &Imm) {
603 return N->getOpcode() == Opc &&
604 isIntImmediate(N->getOperand(1).getNode(), Imm);
605}
606
607// isIntImmediateEq - This method tests to see if N is a constant operand that
608// is equivalent to 'ImmExpected'.
609#ifndef NDEBUG
610static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
611 uint64_t Imm;
612 if (!isIntImmediate(N.getNode(), Imm))
613 return false;
614 return Imm == ImmExpected;
615}
616#endif
617
618static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth) {
619 assert(RegWidth == 32 || RegWidth == 64);
620 if (RegWidth == 32)
621 return APInt(RegWidth,
623 return APInt(RegWidth, AArch64_AM::decodeAdvSIMDModImmType12(Imm));
624}
625
626// Decodes the raw integer splat value from a NEON splat operation.
627static std::optional<APInt> DecodeNEONSplat(SDValue N) {
628 assert(N.getValueType().isInteger() && "Only integers are supported");
629 if (N->getOpcode() == AArch64ISD::NVCAST)
630 N = N->getOperand(0);
631 unsigned SplatWidth = N.getScalarValueSizeInBits();
632 if (N.getOpcode() == AArch64ISD::FMOV)
633 return DecodeFMOVImm(N.getConstantOperandVal(0), SplatWidth);
634 if (N->getOpcode() == AArch64ISD::MOVI)
635 return APInt(SplatWidth, N.getConstantOperandVal(0));
636 if (N->getOpcode() == AArch64ISD::MOVIshift)
637 return APInt(SplatWidth, N.getConstantOperandVal(0)
638 << N.getConstantOperandVal(1));
639 if (N->getOpcode() == AArch64ISD::MVNIshift)
640 return ~APInt(SplatWidth, N.getConstantOperandVal(0)
641 << N.getConstantOperandVal(1));
642 if (N->getOpcode() == AArch64ISD::MOVIedit)
644 N.getConstantOperandVal(0)));
645 if (N->getOpcode() == AArch64ISD::DUP)
646 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(0)))
647 return Const->getAPIntValue().trunc(SplatWidth);
648 // TODO: Recognize more splat-like NEON operations. See ConstantBuildVector
649 // in AArch64ISelLowering.
650 return std::nullopt;
651}
652
653// If \p N is a NEON splat operation (movi, fmov, etc), return the splat value
654// matching the element size of N.
655static std::optional<APInt> GetNEONSplatValue(SDValue N) {
656 unsigned SplatWidth = N.getScalarValueSizeInBits();
657 if (std::optional<APInt> SplatVal = DecodeNEONSplat(N)) {
658 if (SplatVal->getBitWidth() <= SplatWidth)
659 return APInt::getSplat(SplatWidth, *SplatVal);
660 if (SplatVal->isSplat(SplatWidth))
661 return SplatVal->trunc(SplatWidth);
662 }
663 return std::nullopt;
664}
665
666bool AArch64DAGToDAGISel::SelectNEONSplatOfSVELogicalImm(SDValue N,
667 SDValue &Imm) {
668 std::optional<APInt> ImmVal = GetNEONSplatValue(N);
669 if (!ImmVal)
670 return false;
671 uint64_t Encoding;
672 if (!AArch64_AM::isSVELogicalImm(N.getScalarValueSizeInBits(),
673 ImmVal->getZExtValue(), Encoding))
674 return false;
675
676 Imm = CurDAG->getTargetConstant(Encoding, SDLoc(N), MVT::i64);
677 return true;
678}
679
680bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEAddSubImm(SDValue N, SDValue &Imm,
681 SDValue &Shift) {
682 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
683 return SelectSVEAddSubImm(SDLoc(N), *ImmVal,
684 N.getValueType().getScalarType().getSimpleVT(),
685 Imm, Shift,
686 /*Negate=*/false);
687 return false;
688}
689
690bool AArch64DAGToDAGISel::SelectNEONSplatOfSVEArithSImm(SDValue N,
691 SDValue &Imm) {
692 if (std::optional<APInt> ImmVal = GetNEONSplatValue(N))
693 return SelectSVESignedArithImm(SDLoc(N), *ImmVal, Imm);
694 return false;
695}
696
697bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
698 const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
699 std::vector<SDValue> &OutOps) {
700 switch(ConstraintID) {
701 default:
702 llvm_unreachable("Unexpected asm memory constraint");
703 case InlineAsm::ConstraintCode::m:
704 case InlineAsm::ConstraintCode::o:
705 case InlineAsm::ConstraintCode::Q:
706 // We need to make sure that this one operand does not end up in XZR, thus
707 // require the address to be in a PointerRegClass register.
708 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
709 const TargetRegisterClass *TRC = TRI->getPointerRegClass();
710 SDLoc dl(Op);
711 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
712 SDValue NewOp =
713 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
714 dl, Op.getValueType(),
715 Op, RC), 0);
716 OutOps.push_back(NewOp);
717 return false;
718 }
719 return true;
720}
721
722/// SelectArithImmed - Select an immediate value that can be represented as
723/// a 12-bit value shifted left by either 0 or 12. If so, return true with
724/// Val set to the 12-bit value and Shift set to the shifter operand.
725bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
726 SDValue &Shift) {
727 // This function is called from the addsub_shifted_imm ComplexPattern,
728 // which lists [imm] as the list of opcode it's interested in, however
729 // we still need to check whether the operand is actually an immediate
730 // here because the ComplexPattern opcode list is only used in
731 // root-level opcode matching.
732 if (!isa<ConstantSDNode>(N.getNode()))
733 return false;
734
735 uint64_t Immed = N.getNode()->getAsZExtVal();
736 unsigned ShiftAmt;
737
738 if (Immed >> 12 == 0) {
739 ShiftAmt = 0;
740 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
741 ShiftAmt = 12;
742 Immed = Immed >> 12;
743 } else
744 return false;
745
746 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
747 SDLoc dl(N);
748 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
749 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
750 return true;
751}
752
753/// SelectNegArithImmed - As above, but negates the value before trying to
754/// select it.
755bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
756 SDValue &Shift) {
757 // This function is called from the addsub_shifted_imm ComplexPattern,
758 // which lists [imm] as the list of opcode it's interested in, however
759 // we still need to check whether the operand is actually an immediate
760 // here because the ComplexPattern opcode list is only used in
761 // root-level opcode matching.
762 if (!isa<ConstantSDNode>(N.getNode()))
763 return false;
764
765 // The immediate operand must be a 24-bit zero-extended immediate.
766 uint64_t Immed = N.getNode()->getAsZExtVal();
767
768 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
769 // have the opposite effect on the C flag, so this pattern mustn't match under
770 // those circumstances.
771 if (Immed == 0)
772 return false;
773
774 if (N.getValueType() == MVT::i32)
775 Immed = ~((uint32_t)Immed) + 1;
776 else
777 Immed = ~Immed + 1ULL;
778 if (Immed & 0xFFFFFFFFFF000000ULL)
779 return false;
780
781 Immed &= 0xFFFFFFULL;
782 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
783 Shift);
784}
785
786/// getShiftTypeForNode - Translate a shift node to the corresponding
787/// ShiftType value.
789 switch (N.getOpcode()) {
790 default:
792 case ISD::SHL:
793 return AArch64_AM::LSL;
794 case ISD::SRL:
795 return AArch64_AM::LSR;
796 case ISD::SRA:
797 return AArch64_AM::ASR;
798 case ISD::ROTR:
799 return AArch64_AM::ROR;
800 }
801}
802
804 return isa<MemSDNode>(*N) || N->getOpcode() == AArch64ISD::PREFETCH;
805}
806
807/// Determine whether it is worth it to fold SHL into the addressing
808/// mode.
810 assert(V.getOpcode() == ISD::SHL && "invalid opcode");
811 // It is worth folding logical shift of up to three places.
812 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
813 if (!CSD)
814 return false;
815 unsigned ShiftVal = CSD->getZExtValue();
816 if (ShiftVal > 3)
817 return false;
818
819 // Check if this particular node is reused in any non-memory related
820 // operation. If yes, do not try to fold this node into the address
821 // computation, since the computation will be kept.
822 const SDNode *Node = V.getNode();
823 for (SDNode *UI : Node->users())
824 if (!isMemOpOrPrefetch(UI))
825 for (SDNode *UII : UI->users())
826 if (!isMemOpOrPrefetch(UII))
827 return false;
828 return true;
829}
830
831/// Determine whether it is worth to fold V into an extended register addressing
832/// mode.
833bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
834 // Trivial if we are optimizing for code size or if there is only
835 // one use of the value.
836 if (CurDAG->shouldOptForSize() || V.hasOneUse())
837 return true;
838
839 // If a subtarget has a slow shift, folding a shift into multiple loads
840 // costs additional micro-ops.
841 if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
842 return false;
843
844 // Check whether we're going to emit the address arithmetic anyway because
845 // it's used by a non-address operation.
846 if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
847 return true;
848 if (V.getOpcode() == ISD::ADD) {
849 const SDValue LHS = V.getOperand(0);
850 const SDValue RHS = V.getOperand(1);
851 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
852 return true;
853 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
854 return true;
855 }
856
857 // It hurts otherwise, since the value will be reused.
858 return false;
859}
860
861/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
862/// to select more shifted register
863bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
864 SDValue &Shift) {
865 EVT VT = N.getValueType();
866 if (VT != MVT::i32 && VT != MVT::i64)
867 return false;
868
869 if (N->getOpcode() != ISD::AND || !N->hasOneUse())
870 return false;
871 SDValue LHS = N.getOperand(0);
872 if (!LHS->hasOneUse())
873 return false;
874
875 unsigned LHSOpcode = LHS->getOpcode();
876 if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
877 return false;
878
879 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
880 if (!ShiftAmtNode)
881 return false;
882
883 uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
884 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
885 if (!RHSC)
886 return false;
887
888 APInt AndMask = RHSC->getAPIntValue();
889 unsigned LowZBits, MaskLen;
890 if (!AndMask.isShiftedMask(LowZBits, MaskLen))
891 return false;
892
893 unsigned BitWidth = N.getValueSizeInBits();
894 SDLoc DL(LHS);
895 uint64_t NewShiftC;
896 unsigned NewShiftOp;
897 if (LHSOpcode == ISD::SHL) {
898 // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
899 // BitWidth != LowZBits + MaskLen doesn't match the pattern
900 if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
901 return false;
902
903 NewShiftC = LowZBits - ShiftAmtC;
904 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
905 } else {
906 if (LowZBits == 0)
907 return false;
908
909 // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
910 NewShiftC = LowZBits + ShiftAmtC;
911 if (NewShiftC >= BitWidth)
912 return false;
913
914 // SRA need all high bits
915 if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
916 return false;
917
918 // SRL high bits can be 0 or 1
919 if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
920 return false;
921
922 if (LHSOpcode == ISD::SRL)
923 NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
924 else
925 NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
926 }
927
928 assert(NewShiftC < BitWidth && "Invalid shift amount");
929 SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
930 SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
931 Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
932 NewShiftAmt, BitWidthMinus1),
933 0);
934 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
935 Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
936 return true;
937}
938
939/// getExtendTypeForNode - Translate an extend node to the corresponding
940/// ExtendType value.
942getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
943 if (N.getOpcode() == ISD::SIGN_EXTEND ||
944 N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
945 EVT SrcVT;
946 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
947 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
948 else
949 SrcVT = N.getOperand(0).getValueType();
950
951 if (!IsLoadStore && SrcVT == MVT::i8)
952 return AArch64_AM::SXTB;
953 else if (!IsLoadStore && SrcVT == MVT::i16)
954 return AArch64_AM::SXTH;
955 else if (SrcVT == MVT::i32)
956 return AArch64_AM::SXTW;
957 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
958
960 } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
961 N.getOpcode() == ISD::ANY_EXTEND) {
962 EVT SrcVT = N.getOperand(0).getValueType();
963 if (!IsLoadStore && SrcVT == MVT::i8)
964 return AArch64_AM::UXTB;
965 else if (!IsLoadStore && SrcVT == MVT::i16)
966 return AArch64_AM::UXTH;
967 else if (SrcVT == MVT::i32)
968 return AArch64_AM::UXTW;
969 assert(SrcVT != MVT::i64 && "extend from 64-bits?");
970
972 } else if (N.getOpcode() == ISD::AND) {
973 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
974 if (!CSD)
976 uint64_t AndMask = CSD->getZExtValue();
977
978 switch (AndMask) {
979 default:
981 case 0xFF:
982 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
983 case 0xFFFF:
984 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
985 case 0xFFFFFFFF:
986 return AArch64_AM::UXTW;
987 }
988 }
989
991}
992
993/// Determine whether it is worth to fold V into an extended register of an
994/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
995/// instruction, and the shift should be treated as worth folding even if has
996/// multiple uses.
997bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
998 // Trivial if we are optimizing for code size or if there is only
999 // one use of the value.
1000 if (CurDAG->shouldOptForSize() || V.hasOneUse())
1001 return true;
1002
1003 // If a subtarget has a fastpath LSL we can fold a logical shift into
1004 // the add/sub and save a cycle.
1005 if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
1006 V.getConstantOperandVal(1) <= 4 &&
1008 return true;
1009
1010 // It hurts otherwise, since the value will be reused.
1011 return false;
1012}
1013
1014/// SelectShiftedRegister - Select a "shifted register" operand. If the value
1015/// is not shifted, set the Shift operand to default of "LSL 0". The logical
1016/// instructions allow the shifted register to be rotated, but the arithmetic
1017/// instructions do not. The AllowROR parameter specifies whether ROR is
1018/// supported.
1019bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
1020 SDValue &Reg, SDValue &Shift) {
1021 if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
1022 return true;
1023
1025 if (ShType == AArch64_AM::InvalidShiftExtend)
1026 return false;
1027 if (!AllowROR && ShType == AArch64_AM::ROR)
1028 return false;
1029
1030 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1031 unsigned BitSize = N.getValueSizeInBits();
1032 unsigned Val = RHS->getZExtValue() & (BitSize - 1);
1033 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
1034
1035 Reg = N.getOperand(0);
1036 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
1037 return isWorthFoldingALU(N, true);
1038 }
1039
1040 return false;
1041}
1042
1043/// Instructions that accept extend modifiers like UXTW expect the register
1044/// being extended to be a GPR32, but the incoming DAG might be acting on a
1045/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
1046/// this is the case.
1048 if (N.getValueType() == MVT::i32)
1049 return N;
1050
1051 SDLoc dl(N);
1052 return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
1053}
1054
1055// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
1056template<signed Low, signed High, signed Scale>
1057bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
1058 if (!isa<ConstantSDNode>(N))
1059 return false;
1060
1061 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
1062 if ((MulImm % std::abs(Scale)) == 0) {
1063 int64_t RDVLImm = MulImm / Scale;
1064 if ((RDVLImm >= Low) && (RDVLImm <= High)) {
1065 Imm = CurDAG->getSignedTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
1066 return true;
1067 }
1068 }
1069
1070 return false;
1071}
1072
1073// Returns a suitable RDSVL multiplier from a left shift.
1074template <signed Low, signed High>
1075bool AArch64DAGToDAGISel::SelectRDSVLShiftImm(SDValue N, SDValue &Imm) {
1076 if (!isa<ConstantSDNode>(N))
1077 return false;
1078
1079 int64_t MulImm = 1LL << cast<ConstantSDNode>(N)->getSExtValue();
1080 if (MulImm >= Low && MulImm <= High) {
1081 Imm = CurDAG->getSignedTargetConstant(MulImm, SDLoc(N), MVT::i32);
1082 return true;
1083 }
1084
1085 return false;
1086}
1087
1088/// SelectArithExtendedRegister - Select a "extended register" operand. This
1089/// operand folds in an extend followed by an optional left shift.
1090bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
1091 SDValue &Shift) {
1092 unsigned ShiftVal = 0;
1094
1095 if (N.getOpcode() == ISD::SHL) {
1096 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1097 if (!CSD)
1098 return false;
1099 ShiftVal = CSD->getZExtValue();
1100 if (ShiftVal > 4)
1101 return false;
1102
1103 Ext = getExtendTypeForNode(N.getOperand(0));
1105 return false;
1106
1107 Reg = N.getOperand(0).getOperand(0);
1108 } else {
1109 Ext = getExtendTypeForNode(N);
1111 return false;
1112
1113 // Don't match sext of vector extracts. These can use SMOV, but if we match
1114 // this as an extended register, we'll always fold the extend into an ALU op
1115 // user of the extend (which results in a UMOV).
1117 SDValue Op = N.getOperand(0);
1118 if (Op->getOpcode() == ISD::ANY_EXTEND)
1119 Op = Op->getOperand(0);
1120 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
1121 Op.getOperand(0).getValueType().isFixedLengthVector())
1122 return false;
1123 }
1124
1125 Reg = N.getOperand(0);
1126
1127 // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
1128 // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
1129 auto isDef32 = [](SDValue N) {
1130 unsigned Opc = N.getOpcode();
1131 return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
1134 Opc != ISD::FREEZE;
1135 };
1136 if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
1137 isDef32(Reg))
1138 return false;
1139 }
1140
1141 // AArch64 mandates that the RHS of the operation must use the smallest
1142 // register class that could contain the size being extended from. Thus,
1143 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
1144 // there might not be an actual 32-bit value in the program. We can
1145 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
1146 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
1147 Reg = narrowIfNeeded(CurDAG, Reg);
1148 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1149 MVT::i32);
1150 return isWorthFoldingALU(N);
1151}
1152
1153/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
1154/// operand is referred by the instructions have SP operand
1155bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
1156 SDValue &Shift) {
1157 unsigned ShiftVal = 0;
1159
1160 if (N.getOpcode() != ISD::SHL)
1161 return false;
1162
1163 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1164 if (!CSD)
1165 return false;
1166 ShiftVal = CSD->getZExtValue();
1167 if (ShiftVal > 4)
1168 return false;
1169
1170 Ext = AArch64_AM::UXTX;
1171 Reg = N.getOperand(0);
1172 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1173 MVT::i32);
1174 return isWorthFoldingALU(N);
1175}
1176
1177/// If there's a use of this ADDlow that's not itself a load/store then we'll
1178/// need to create a real ADD instruction from it anyway and there's no point in
1179/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1180/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1181/// leads to duplicated ADRP instructions.
1183 for (auto *User : N->users()) {
1184 if (User->getOpcode() != ISD::LOAD && User->getOpcode() != ISD::STORE &&
1185 User->getOpcode() != ISD::ATOMIC_LOAD &&
1186 User->getOpcode() != ISD::ATOMIC_STORE)
1187 return false;
1188
1189 // ldar and stlr have much more restrictive addressing modes (just a
1190 // register).
1191 if (isStrongerThanMonotonic(cast<MemSDNode>(User)->getSuccessOrdering()))
1192 return false;
1193 }
1194
1195 return true;
1196}
1197
1198/// Check if the immediate offset is valid as a scaled immediate.
1199static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1200 unsigned Size) {
1201 if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1202 Offset < (Range << Log2_32(Size)))
1203 return true;
1204 return false;
1205}
1206
1207/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1208/// immediate" address. The "Size" argument is the size in bytes of the memory
1209/// reference, which determines the scale.
1210bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1211 unsigned BW, unsigned Size,
1212 SDValue &Base,
1213 SDValue &OffImm) {
1214 SDLoc dl(N);
1215 const DataLayout &DL = CurDAG->getDataLayout();
1216 const TargetLowering *TLI = getTargetLowering();
1217 if (N.getOpcode() == ISD::FrameIndex) {
1218 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1219 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1220 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1221 return true;
1222 }
1223
1224 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1225 // selected here doesn't support labels/immediates, only base+offset.
1226 if (CurDAG->isBaseWithConstantOffset(N)) {
1227 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1228 if (IsSignedImm) {
1229 int64_t RHSC = RHS->getSExtValue();
1230 unsigned Scale = Log2_32(Size);
1231 int64_t Range = 0x1LL << (BW - 1);
1232
1233 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1234 RHSC < (Range << Scale)) {
1235 Base = N.getOperand(0);
1236 if (Base.getOpcode() == ISD::FrameIndex) {
1237 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1238 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1239 }
1240 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1241 return true;
1242 }
1243 } else {
1244 // unsigned Immediate
1245 uint64_t RHSC = RHS->getZExtValue();
1246 unsigned Scale = Log2_32(Size);
1247 uint64_t Range = 0x1ULL << BW;
1248
1249 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1250 Base = N.getOperand(0);
1251 if (Base.getOpcode() == ISD::FrameIndex) {
1252 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1253 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1254 }
1255 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1256 return true;
1257 }
1258 }
1259 }
1260 }
1261 // Base only. The address will be materialized into a register before
1262 // the memory is accessed.
1263 // add x0, Xbase, #offset
1264 // stp x1, x2, [x0]
1265 Base = N;
1266 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1267 return true;
1268}
1269
1270/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1271/// immediate" address. The "Size" argument is the size in bytes of the memory
1272/// reference, which determines the scale.
1273bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1274 SDValue &Base, SDValue &OffImm) {
1275 SDLoc dl(N);
1276 const DataLayout &DL = CurDAG->getDataLayout();
1277 const TargetLowering *TLI = getTargetLowering();
1278 if (N.getOpcode() == ISD::FrameIndex) {
1279 int FI = cast<FrameIndexSDNode>(N)->getIndex();
1280 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1281 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1282 return true;
1283 }
1284
1285 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1286 GlobalAddressSDNode *GAN =
1287 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1288 Base = N.getOperand(0);
1289 OffImm = N.getOperand(1);
1290 if (!GAN)
1291 return true;
1292
1293 if (GAN->getOffset() % Size == 0 &&
1295 return true;
1296 }
1297
1298 if (CurDAG->isBaseWithConstantOffset(N)) {
1299 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1300 int64_t RHSC = (int64_t)RHS->getZExtValue();
1301 unsigned Scale = Log2_32(Size);
1302 if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1303 Base = N.getOperand(0);
1304 if (Base.getOpcode() == ISD::FrameIndex) {
1305 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1306 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1307 }
1308 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1309 return true;
1310 }
1311 }
1312 }
1313
1314 // Before falling back to our general case, check if the unscaled
1315 // instructions can handle this. If so, that's preferable.
1316 if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1317 return false;
1318
1319 // Base only. The address will be materialized into a register before
1320 // the memory is accessed.
1321 // add x0, Xbase, #offset
1322 // ldr x0, [x0]
1323 Base = N;
1324 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1325 return true;
1326}
1327
1328/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1329/// immediate" address. This should only match when there is an offset that
1330/// is not valid for a scaled immediate addressing mode. The "Size" argument
1331/// is the size in bytes of the memory reference, which is needed here to know
1332/// what is valid for a scaled immediate.
1333bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1334 SDValue &Base,
1335 SDValue &OffImm) {
1336 if (!CurDAG->isBaseWithConstantOffset(N))
1337 return false;
1338 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1339 int64_t RHSC = RHS->getSExtValue();
1340 if (RHSC >= -256 && RHSC < 256) {
1341 Base = N.getOperand(0);
1342 if (Base.getOpcode() == ISD::FrameIndex) {
1343 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1344 const TargetLowering *TLI = getTargetLowering();
1345 Base = CurDAG->getTargetFrameIndex(
1346 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1347 }
1348 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1349 return true;
1350 }
1351 }
1352 return false;
1353}
1354
1356 SDLoc dl(N);
1357 SDValue ImpDef = SDValue(
1358 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1359 return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1360 N);
1361}
1362
1363/// Check if the given SHL node (\p N), can be used to form an
1364/// extended register for an addressing mode.
1365bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1366 bool WantExtend, SDValue &Offset,
1367 SDValue &SignExtend) {
1368 assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1369 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1370 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1371 return false;
1372
1373 SDLoc dl(N);
1374 if (WantExtend) {
1376 getExtendTypeForNode(N.getOperand(0), true);
1378 return false;
1379
1380 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1381 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1382 MVT::i32);
1383 } else {
1384 Offset = N.getOperand(0);
1385 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1386 }
1387
1388 unsigned LegalShiftVal = Log2_32(Size);
1389 unsigned ShiftVal = CSD->getZExtValue();
1390
1391 if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1392 return false;
1393
1394 return isWorthFoldingAddr(N, Size);
1395}
1396
1397bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1399 SDValue &SignExtend,
1400 SDValue &DoShift) {
1401 if (N.getOpcode() != ISD::ADD)
1402 return false;
1403 SDValue LHS = N.getOperand(0);
1404 SDValue RHS = N.getOperand(1);
1405 SDLoc dl(N);
1406
1407 // We don't want to match immediate adds here, because they are better lowered
1408 // to the register-immediate addressing modes.
1410 return false;
1411
1412 // Check if this particular node is reused in any non-memory related
1413 // operation. If yes, do not try to fold this node into the address
1414 // computation, since the computation will be kept.
1415 const SDNode *Node = N.getNode();
1416 for (SDNode *UI : Node->users()) {
1417 if (!isMemOpOrPrefetch(UI))
1418 return false;
1419 }
1420
1421 // Remember if it is worth folding N when it produces extended register.
1422 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1423
1424 // Try to match a shifted extend on the RHS.
1425 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1426 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1427 Base = LHS;
1428 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1429 return true;
1430 }
1431
1432 // Try to match a shifted extend on the LHS.
1433 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1434 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1435 Base = RHS;
1436 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1437 return true;
1438 }
1439
1440 // There was no shift, whatever else we find.
1441 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1442
1444 // Try to match an unshifted extend on the LHS.
1445 if (IsExtendedRegisterWorthFolding &&
1446 (Ext = getExtendTypeForNode(LHS, true)) !=
1448 Base = RHS;
1449 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1450 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1451 MVT::i32);
1452 if (isWorthFoldingAddr(LHS, Size))
1453 return true;
1454 }
1455
1456 // Try to match an unshifted extend on the RHS.
1457 if (IsExtendedRegisterWorthFolding &&
1458 (Ext = getExtendTypeForNode(RHS, true)) !=
1460 Base = LHS;
1461 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1462 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1463 MVT::i32);
1464 if (isWorthFoldingAddr(RHS, Size))
1465 return true;
1466 }
1467
1468 return false;
1469}
1470
1471// Check if the given immediate is preferred by ADD. If an immediate can be
1472// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1473// encoded by one MOVZ, return true.
1474static bool isPreferredADD(int64_t ImmOff) {
1475 // Constant in [0x0, 0xfff] can be encoded in ADD.
1476 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1477 return true;
1478 // Check if it can be encoded in an "ADD LSL #12".
1479 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1480 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1481 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1482 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1483 return false;
1484}
1485
1486bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1488 SDValue &SignExtend,
1489 SDValue &DoShift) {
1490 if (N.getOpcode() != ISD::ADD)
1491 return false;
1492 SDValue LHS = N.getOperand(0);
1493 SDValue RHS = N.getOperand(1);
1494 SDLoc DL(N);
1495
1496 // Check if this particular node is reused in any non-memory related
1497 // operation. If yes, do not try to fold this node into the address
1498 // computation, since the computation will be kept.
1499 const SDNode *Node = N.getNode();
1500 for (SDNode *UI : Node->users()) {
1501 if (!isMemOpOrPrefetch(UI))
1502 return false;
1503 }
1504
1505 // Watch out if RHS is a wide immediate, it can not be selected into
1506 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1507 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1508 // instructions like:
1509 // MOV X0, WideImmediate
1510 // ADD X1, BaseReg, X0
1511 // LDR X2, [X1, 0]
1512 // For such situation, using [BaseReg, XReg] addressing mode can save one
1513 // ADD/SUB:
1514 // MOV X0, WideImmediate
1515 // LDR X2, [BaseReg, X0]
1516 if (isa<ConstantSDNode>(RHS)) {
1517 int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1518 // Skip the immediate can be selected by load/store addressing mode.
1519 // Also skip the immediate can be encoded by a single ADD (SUB is also
1520 // checked by using -ImmOff).
1521 if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1522 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1523 return false;
1524
1525 SDValue Ops[] = { RHS };
1526 SDNode *MOVI =
1527 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1528 SDValue MOVIV = SDValue(MOVI, 0);
1529 // This ADD of two X register will be selected into [Reg+Reg] mode.
1530 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1531 }
1532
1533 // Remember if it is worth folding N when it produces extended register.
1534 bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1535
1536 // Try to match a shifted extend on the RHS.
1537 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1538 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1539 Base = LHS;
1540 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1541 return true;
1542 }
1543
1544 // Try to match a shifted extend on the LHS.
1545 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1546 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1547 Base = RHS;
1548 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1549 return true;
1550 }
1551
1552 // Match any non-shifted, non-extend, non-immediate add expression.
1553 Base = LHS;
1554 Offset = RHS;
1555 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1556 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1557 // Reg1 + Reg2 is free: no check needed.
1558 return true;
1559}
1560
1561SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1562 static const unsigned RegClassIDs[] = {
1563 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1564 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1565 AArch64::dsub2, AArch64::dsub3};
1566
1567 return createTuple(Regs, RegClassIDs, SubRegs);
1568}
1569
1570SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1571 static const unsigned RegClassIDs[] = {
1572 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1573 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1574 AArch64::qsub2, AArch64::qsub3};
1575
1576 return createTuple(Regs, RegClassIDs, SubRegs);
1577}
1578
1579SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1580 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1581 AArch64::ZPR3RegClassID,
1582 AArch64::ZPR4RegClassID};
1583 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1584 AArch64::zsub2, AArch64::zsub3};
1585
1586 return createTuple(Regs, RegClassIDs, SubRegs);
1587}
1588
1589SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1590 assert(Regs.size() == 2 || Regs.size() == 4);
1591
1592 // The createTuple interface requires 3 RegClassIDs for each possible
1593 // tuple type even though we only have them for ZPR2 and ZPR4.
1594 static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1595 AArch64::ZPR4Mul4RegClassID};
1596 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1597 AArch64::zsub2, AArch64::zsub3};
1598 return createTuple(Regs, RegClassIDs, SubRegs);
1599}
1600
1601SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1602 const unsigned RegClassIDs[],
1603 const unsigned SubRegs[]) {
1604 // There's no special register-class for a vector-list of 1 element: it's just
1605 // a vector.
1606 if (Regs.size() == 1)
1607 return Regs[0];
1608
1609 assert(Regs.size() >= 2 && Regs.size() <= 4);
1610
1611 SDLoc DL(Regs[0]);
1612
1614
1615 // First operand of REG_SEQUENCE is the desired RegClass.
1616 Ops.push_back(
1617 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1618
1619 // Then we get pairs of source & subregister-position for the components.
1620 for (unsigned i = 0; i < Regs.size(); ++i) {
1621 Ops.push_back(Regs[i]);
1622 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1623 }
1624
1625 SDNode *N =
1626 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1627 return SDValue(N, 0);
1628}
1629
1630void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1631 bool isExt) {
1632 SDLoc dl(N);
1633 EVT VT = N->getValueType(0);
1634
1635 unsigned ExtOff = isExt;
1636
1637 // Form a REG_SEQUENCE to force register allocation.
1638 unsigned Vec0Off = ExtOff + 1;
1639 SmallVector<SDValue, 4> Regs(N->ops().slice(Vec0Off, NumVecs));
1640 SDValue RegSeq = createQTuple(Regs);
1641
1643 if (isExt)
1644 Ops.push_back(N->getOperand(1));
1645 Ops.push_back(RegSeq);
1646 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1647 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1648}
1649
1650static std::tuple<SDValue, SDValue>
1652 SDLoc DL(Disc);
1653 SDValue AddrDisc;
1654 SDValue ConstDisc;
1655
1656 // If this is a blend, remember the constant and address discriminators.
1657 // Otherwise, it's either a constant discriminator, or a non-blended
1658 // address discriminator.
1659 if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1660 Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1661 AddrDisc = Disc->getOperand(1);
1662 ConstDisc = Disc->getOperand(2);
1663 } else {
1664 ConstDisc = Disc;
1665 }
1666
1667 // If the constant discriminator (either the blend RHS, or the entire
1668 // discriminator value) isn't a 16-bit constant, bail out, and let the
1669 // discriminator be computed separately.
1670 auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1671 if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1672 return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1673
1674 // If there's no address discriminator, use XZR directly.
1675 if (!AddrDisc)
1676 AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1677
1678 return std::make_tuple(
1679 DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1680 AddrDisc);
1681}
1682
1683void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1684 SDLoc DL(N);
1685 // IntrinsicID is operand #0
1686 SDValue Val = N->getOperand(1);
1687 SDValue AUTKey = N->getOperand(2);
1688 SDValue AUTDisc = N->getOperand(3);
1689
1690 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1691 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1692
1693 SDValue AUTAddrDisc, AUTConstDisc;
1694 std::tie(AUTConstDisc, AUTAddrDisc) =
1695 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1696
1697 if (!Subtarget->isX16X17Safer()) {
1698 std::vector<SDValue> Ops = {Val, AUTKey, AUTConstDisc, AUTAddrDisc};
1699 // Copy deactivation symbol if present.
1700 if (N->getNumOperands() > 4)
1701 Ops.push_back(N->getOperand(4));
1702
1703 SDNode *AUT =
1704 CurDAG->getMachineNode(AArch64::AUTxMxN, DL, MVT::i64, MVT::i64, Ops);
1705 ReplaceNode(N, AUT);
1706 } else {
1707 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1708 AArch64::X16, Val, SDValue());
1709 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1710
1711 SDNode *AUT = CurDAG->getMachineNode(AArch64::AUTx16x17, DL, MVT::i64, Ops);
1712 ReplaceNode(N, AUT);
1713 }
1714}
1715
1716void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1717 SDLoc DL(N);
1718 // IntrinsicID is operand #0, if W_CHAIN it is #1
1719 int OffsetBase = N->getOpcode() == ISD::INTRINSIC_W_CHAIN ? 1 : 0;
1720 SDValue Val = N->getOperand(OffsetBase + 1);
1721 SDValue AUTKey = N->getOperand(OffsetBase + 2);
1722 SDValue AUTDisc = N->getOperand(OffsetBase + 3);
1723 SDValue PACKey = N->getOperand(OffsetBase + 4);
1724 SDValue PACDisc = N->getOperand(OffsetBase + 5);
1725 uint32_t IntNum = N->getConstantOperandVal(OffsetBase + 0);
1726 bool HasLoad = IntNum == Intrinsic::ptrauth_resign_load_relative;
1727
1728 unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1729 unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1730
1731 AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1732 PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1733
1734 SDValue AUTAddrDisc, AUTConstDisc;
1735 std::tie(AUTConstDisc, AUTAddrDisc) =
1736 extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1737
1738 SDValue PACAddrDisc, PACConstDisc;
1739 std::tie(PACConstDisc, PACAddrDisc) =
1740 extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1741
1742 SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1743 AArch64::X16, Val, SDValue());
1744
1745 if (HasLoad) {
1746 SDValue Addend = N->getOperand(OffsetBase + 6);
1747 SDValue IncomingChain = N->getOperand(0);
1748 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,
1749 PACKey, PACConstDisc, PACAddrDisc,
1750 Addend, IncomingChain, X16Copy.getValue(1)};
1751
1752 SDNode *AUTRELLOADPAC = CurDAG->getMachineNode(AArch64::AUTRELLOADPAC, DL,
1753 MVT::i64, MVT::Other, Ops);
1754 ReplaceNode(N, AUTRELLOADPAC);
1755 } else {
1756 SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,
1757 PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};
1758
1759 SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1760 ReplaceNode(N, AUTPAC);
1761 }
1762}
1763
1764bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1765 LoadSDNode *LD = cast<LoadSDNode>(N);
1766 if (LD->isUnindexed())
1767 return false;
1768 EVT VT = LD->getMemoryVT();
1769 EVT DstVT = N->getValueType(0);
1770 ISD::MemIndexedMode AM = LD->getAddressingMode();
1771 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1772 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1773 int OffsetVal = (int)OffsetOp->getZExtValue();
1774
1775 // We're not doing validity checking here. That was done when checking
1776 // if we should mark the load as indexed or not. We're just selecting
1777 // the right instruction.
1778 unsigned Opcode = 0;
1779
1780 ISD::LoadExtType ExtType = LD->getExtensionType();
1781 bool InsertTo64 = false;
1782 if (VT == MVT::i64)
1783 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1784 else if (VT == MVT::i32) {
1785 if (ExtType == ISD::NON_EXTLOAD)
1786 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1787 else if (ExtType == ISD::SEXTLOAD)
1788 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1789 else {
1790 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1791 InsertTo64 = true;
1792 // The result of the load is only i32. It's the subreg_to_reg that makes
1793 // it into an i64.
1794 DstVT = MVT::i32;
1795 }
1796 } else if (VT == MVT::i16) {
1797 if (ExtType == ISD::SEXTLOAD) {
1798 if (DstVT == MVT::i64)
1799 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1800 else
1801 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1802 } else {
1803 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1804 InsertTo64 = DstVT == MVT::i64;
1805 // The result of the load is only i32. It's the subreg_to_reg that makes
1806 // it into an i64.
1807 DstVT = MVT::i32;
1808 }
1809 } else if (VT == MVT::i8) {
1810 if (ExtType == ISD::SEXTLOAD) {
1811 if (DstVT == MVT::i64)
1812 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1813 else
1814 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1815 } else {
1816 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1817 InsertTo64 = DstVT == MVT::i64;
1818 // The result of the load is only i32. It's the subreg_to_reg that makes
1819 // it into an i64.
1820 DstVT = MVT::i32;
1821 }
1822 } else if (VT == MVT::f16) {
1823 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1824 } else if (VT == MVT::bf16) {
1825 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1826 } else if (VT == MVT::f32) {
1827 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1828 } else if (VT == MVT::f64 ||
1829 (VT.is64BitVector() && Subtarget->isLittleEndian())) {
1830 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1831 } else if (VT.is128BitVector() && Subtarget->isLittleEndian()) {
1832 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1833 } else if (VT.is64BitVector()) {
1834 if (IsPre || OffsetVal != 8)
1835 return false;
1836 switch (VT.getScalarSizeInBits()) {
1837 case 8:
1838 Opcode = AArch64::LD1Onev8b_POST;
1839 break;
1840 case 16:
1841 Opcode = AArch64::LD1Onev4h_POST;
1842 break;
1843 case 32:
1844 Opcode = AArch64::LD1Onev2s_POST;
1845 break;
1846 case 64:
1847 Opcode = AArch64::LD1Onev1d_POST;
1848 break;
1849 default:
1850 llvm_unreachable("Expected vector element to be a power of 2");
1851 }
1852 } else if (VT.is128BitVector()) {
1853 if (IsPre || OffsetVal != 16)
1854 return false;
1855 switch (VT.getScalarSizeInBits()) {
1856 case 8:
1857 Opcode = AArch64::LD1Onev16b_POST;
1858 break;
1859 case 16:
1860 Opcode = AArch64::LD1Onev8h_POST;
1861 break;
1862 case 32:
1863 Opcode = AArch64::LD1Onev4s_POST;
1864 break;
1865 case 64:
1866 Opcode = AArch64::LD1Onev2d_POST;
1867 break;
1868 default:
1869 llvm_unreachable("Expected vector element to be a power of 2");
1870 }
1871 } else
1872 return false;
1873 SDValue Chain = LD->getChain();
1874 SDValue Base = LD->getBasePtr();
1875 SDLoc dl(N);
1876 // LD1 encodes an immediate offset by using XZR as the offset register.
1877 SDValue Offset = (VT.isVector() && !Subtarget->isLittleEndian())
1878 ? CurDAG->getRegister(AArch64::XZR, MVT::i64)
1879 : CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1880 SDValue Ops[] = { Base, Offset, Chain };
1881 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1882 MVT::Other, Ops);
1883
1884 // Transfer memoperands.
1885 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1886 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1887
1888 // Either way, we're replacing the node, so tell the caller that.
1889 SDValue LoadedVal = SDValue(Res, 1);
1890 if (InsertTo64) {
1891 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1892 LoadedVal = SDValue(CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, dl,
1893 MVT::i64, LoadedVal, SubReg),
1894 0);
1895 }
1896
1897 ReplaceUses(SDValue(N, 0), LoadedVal);
1898 ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1899 ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1900 CurDAG->RemoveDeadNode(N);
1901 return true;
1902}
1903
1904void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1905 unsigned SubRegIdx) {
1906 SDLoc dl(N);
1907 EVT VT = N->getValueType(0);
1908 SDValue Chain = N->getOperand(0);
1909
1910 SDValue Ops[] = {N->getOperand(2), // Mem operand;
1911 Chain};
1912
1913 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1914
1915 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1916 SDValue SuperReg = SDValue(Ld, 0);
1917 for (unsigned i = 0; i < NumVecs; ++i)
1918 ReplaceUses(SDValue(N, i),
1919 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1920
1921 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1922
1923 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1924 // because it's too simple to have needed special treatment during lowering.
1925 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1926 MachineMemOperand *MemOp = MemIntr->getMemOperand();
1927 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1928 }
1929
1930 CurDAG->RemoveDeadNode(N);
1931}
1932
1933void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1934 unsigned Opc, unsigned SubRegIdx) {
1935 SDLoc dl(N);
1936 EVT VT = N->getValueType(0);
1937 SDValue Chain = N->getOperand(0);
1938
1939 SDValue Ops[] = {N->getOperand(1), // Mem operand
1940 N->getOperand(2), // Incremental
1941 Chain};
1942
1943 const EVT ResTys[] = {MVT::i64, // Type of the write back register
1944 MVT::Untyped, MVT::Other};
1945
1946 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1947
1948 // Update uses of write back register
1949 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1950
1951 // Update uses of vector list
1952 SDValue SuperReg = SDValue(Ld, 1);
1953 if (NumVecs == 1)
1954 ReplaceUses(SDValue(N, 0), SuperReg);
1955 else
1956 for (unsigned i = 0; i < NumVecs; ++i)
1957 ReplaceUses(SDValue(N, i),
1958 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1959
1960 // Transfer memoperands.
1961 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
1962 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1963
1964 // Update the chain
1965 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1966 CurDAG->RemoveDeadNode(N);
1967}
1968
1969/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1970/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1971/// new Base and an SDValue representing the new offset.
1972std::tuple<unsigned, SDValue, SDValue>
1973AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1974 unsigned Opc_ri,
1975 const SDValue &OldBase,
1976 const SDValue &OldOffset,
1977 unsigned Scale) {
1978 SDValue NewBase = OldBase;
1979 SDValue NewOffset = OldOffset;
1980 // Detect a possible Reg+Imm addressing mode.
1981 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1982 N, OldBase, NewBase, NewOffset);
1983
1984 // Detect a possible reg+reg addressing mode, but only if we haven't already
1985 // detected a Reg+Imm one.
1986 const bool IsRegReg =
1987 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1988
1989 // Select the instruction.
1990 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1991}
1992
1993enum class SelectTypeKind {
1994 Int1 = 0,
1995 Int = 1,
1996 FP = 2,
1998};
1999
2000/// This function selects an opcode from a list of opcodes, which is
2001/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
2002/// element types, in this order.
2003template <SelectTypeKind Kind>
2004static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
2005 // Only match scalable vector VTs
2006 if (!VT.isScalableVector())
2007 return 0;
2008
2009 EVT EltVT = VT.getVectorElementType();
2010 unsigned Key = VT.getVectorMinNumElements();
2011 switch (Kind) {
2013 break;
2015 if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
2016 EltVT != MVT::i64)
2017 return 0;
2018 break;
2020 if (EltVT != MVT::i1)
2021 return 0;
2022 break;
2023 case SelectTypeKind::FP:
2024 if (EltVT == MVT::bf16)
2025 Key = 16;
2026 else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
2027 EltVT != MVT::f64)
2028 return 0;
2029 break;
2030 }
2031
2032 unsigned Offset;
2033 switch (Key) {
2034 case 16: // 8-bit or bf16
2035 Offset = 0;
2036 break;
2037 case 8: // 16-bit
2038 Offset = 1;
2039 break;
2040 case 4: // 32-bit
2041 Offset = 2;
2042 break;
2043 case 2: // 64-bit
2044 Offset = 3;
2045 break;
2046 default:
2047 return 0;
2048 }
2049
2050 return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
2051}
2052
2053// This function is almost identical to SelectWhilePair, but has an
2054// extra check on the range of the immediate operand.
2055// TODO: Merge these two functions together at some point?
2056void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
2057 // Immediate can be either 0 or 1.
2058 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
2059 if (Imm->getZExtValue() > 1)
2060 return;
2061
2062 SDLoc DL(N);
2063 EVT VT = N->getValueType(0);
2064 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2065 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2066 SDValue SuperReg = SDValue(WhilePair, 0);
2067
2068 for (unsigned I = 0; I < 2; ++I)
2069 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2070 AArch64::psub0 + I, DL, VT, SuperReg));
2071
2072 CurDAG->RemoveDeadNode(N);
2073}
2074
2075void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
2076 SDLoc DL(N);
2077 EVT VT = N->getValueType(0);
2078
2079 SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
2080
2081 SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2082 SDValue SuperReg = SDValue(WhilePair, 0);
2083
2084 for (unsigned I = 0; I < 2; ++I)
2085 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2086 AArch64::psub0 + I, DL, VT, SuperReg));
2087
2088 CurDAG->RemoveDeadNode(N);
2089}
2090
2091void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
2092 unsigned Opcode) {
2093 EVT VT = N->getValueType(0);
2094 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2095 SDValue Ops = createZTuple(Regs);
2096 SDLoc DL(N);
2097 SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2098 SDValue SuperReg = SDValue(Intrinsic, 0);
2099 for (unsigned i = 0; i < NumVecs; ++i)
2100 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2101 AArch64::zsub0 + i, DL, VT, SuperReg));
2102
2103 CurDAG->RemoveDeadNode(N);
2104}
2105
2106void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
2107 unsigned Opcode) {
2108 SDLoc DL(N);
2109 EVT VT = N->getValueType(0);
2110 SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
2111 Ops.push_back(/*Chain*/ N->getOperand(0));
2112
2113 SDNode *Instruction =
2114 CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
2115 SDValue SuperReg = SDValue(Instruction, 0);
2116
2117 for (unsigned i = 0; i < NumVecs; ++i)
2118 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2119 AArch64::zsub0 + i, DL, VT, SuperReg));
2120
2121 // Copy chain
2122 unsigned ChainIdx = NumVecs;
2123 ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
2124 CurDAG->RemoveDeadNode(N);
2125}
2126
2127void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
2128 unsigned NumVecs,
2129 bool IsZmMulti,
2130 unsigned Opcode,
2131 bool HasPred) {
2132 assert(Opcode != 0 && "Unexpected opcode");
2133
2134 SDLoc DL(N);
2135 EVT VT = N->getValueType(0);
2136 SDUse *OpsIter = N->op_begin() + 1; // Skip intrinsic ID
2138
2139 auto GetMultiVecOperand = [&]() {
2140 SmallVector<SDValue, 4> Regs(OpsIter, OpsIter + NumVecs);
2141 OpsIter += NumVecs;
2142 return createZMulTuple(Regs);
2143 };
2144
2145 if (HasPred)
2146 Ops.push_back(*OpsIter++);
2147
2148 Ops.push_back(GetMultiVecOperand());
2149 if (IsZmMulti)
2150 Ops.push_back(GetMultiVecOperand());
2151 else
2152 Ops.push_back(*OpsIter++);
2153
2154 // Append any remaining operands.
2155 Ops.append(OpsIter, N->op_end());
2156 SDNode *Intrinsic;
2157 Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
2158 SDValue SuperReg = SDValue(Intrinsic, 0);
2159 for (unsigned i = 0; i < NumVecs; ++i)
2160 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2161 AArch64::zsub0 + i, DL, VT, SuperReg));
2162
2163 CurDAG->RemoveDeadNode(N);
2164}
2165
2166void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
2167 unsigned Scale, unsigned Opc_ri,
2168 unsigned Opc_rr, bool IsIntr) {
2169 assert(Scale < 5 && "Invalid scaling value.");
2170 SDLoc DL(N);
2171 EVT VT = N->getValueType(0);
2172 SDValue Chain = N->getOperand(0);
2173
2174 // Optimize addressing mode.
2176 unsigned Opc;
2177 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2178 N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
2179 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
2180
2181 SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
2182 Base, // Memory operand
2183 Offset, Chain};
2184
2185 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2186
2187 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2188 SDValue SuperReg = SDValue(Load, 0);
2189 for (unsigned i = 0; i < NumVecs; ++i)
2190 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2191 AArch64::zsub0 + i, DL, VT, SuperReg));
2192
2193 // Copy chain
2194 unsigned ChainIdx = NumVecs;
2195 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2196 CurDAG->RemoveDeadNode(N);
2197}
2198
2199void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
2200 unsigned NumVecs,
2201 unsigned Scale,
2202 unsigned Opc_ri,
2203 unsigned Opc_rr) {
2204 assert(Scale < 4 && "Invalid scaling value.");
2205 SDLoc DL(N);
2206 EVT VT = N->getValueType(0);
2207 SDValue Chain = N->getOperand(0);
2208
2209 SDValue PNg = N->getOperand(2);
2210 SDValue Base = N->getOperand(3);
2211 SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
2212 unsigned Opc;
2213 std::tie(Opc, Base, Offset) =
2214 findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
2215
2216 SDValue Ops[] = {PNg, // Predicate-as-counter
2217 Base, // Memory operand
2218 Offset, Chain};
2219
2220 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2221
2222 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
2223 SDValue SuperReg = SDValue(Load, 0);
2224 for (unsigned i = 0; i < NumVecs; ++i)
2225 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2226 AArch64::zsub0 + i, DL, VT, SuperReg));
2227
2228 // Copy chain
2229 unsigned ChainIdx = NumVecs;
2230 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
2231 CurDAG->RemoveDeadNode(N);
2232}
2233
2234void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
2235 unsigned Opcode) {
2236 if (N->getValueType(0) != MVT::nxv4f32)
2237 return;
2238 SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
2239}
2240
2241void AArch64DAGToDAGISel::SelectMultiVectorLutiLane(SDNode *Node,
2242 unsigned NumOutVecs,
2243 unsigned Opc,
2244 uint32_t MaxImm) {
2245 if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
2246 if (Imm->getZExtValue() > MaxImm)
2247 return;
2248
2249 SDValue ZtValue;
2250 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2251 return;
2252
2253 SDValue Chain = Node->getOperand(0);
2254 SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4), Chain};
2255 SDLoc DL(Node);
2256 EVT VT = Node->getValueType(0);
2257
2258 SDNode *Instruction =
2259 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2260 SDValue SuperReg = SDValue(Instruction, 0);
2261
2262 for (unsigned I = 0; I < NumOutVecs; ++I)
2263 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2264 AArch64::zsub0 + I, DL, VT, SuperReg));
2265
2266 // Copy chain
2267 unsigned ChainIdx = NumOutVecs;
2268 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2269 CurDAG->RemoveDeadNode(Node);
2270}
2271
2272void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
2273 unsigned NumOutVecs,
2274 unsigned Opc) {
2275 SDValue ZtValue;
2276 if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
2277 return;
2278
2279 SDValue Chain = Node->getOperand(0);
2280 SDValue Ops[] = {ZtValue,
2281 createZMulTuple({Node->getOperand(3), Node->getOperand(4)}),
2282 Chain};
2283
2284 SDLoc DL(Node);
2285 EVT VT = Node->getValueType(0);
2286
2287 SDNode *Instruction =
2288 CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2289 SDValue SuperReg = SDValue(Instruction, 0);
2290
2291 for (unsigned I = 0; I < NumOutVecs; ++I)
2292 ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2293 AArch64::zsub0 + I, DL, VT, SuperReg));
2294
2295 // Copy chain
2296 unsigned ChainIdx = NumOutVecs;
2297 ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2298 CurDAG->RemoveDeadNode(Node);
2299}
2300
2301void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2302 unsigned Op) {
2303 SDLoc DL(N);
2304 EVT VT = N->getValueType(0);
2305
2306 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2307 SDValue Zd = createZMulTuple(Regs);
2308 SDValue Zn = N->getOperand(1 + NumVecs);
2309 SDValue Zm = N->getOperand(2 + NumVecs);
2310
2311 SDValue Ops[] = {Zd, Zn, Zm};
2312
2313 SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2314 SDValue SuperReg = SDValue(Intrinsic, 0);
2315 for (unsigned i = 0; i < NumVecs; ++i)
2316 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2317 AArch64::zsub0 + i, DL, VT, SuperReg));
2318
2319 CurDAG->RemoveDeadNode(N);
2320}
2321
2322bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2323 switch (BaseReg) {
2324 default:
2325 return false;
2326 case AArch64::ZA:
2327 case AArch64::ZAB0:
2328 if (TileNum == 0)
2329 break;
2330 return false;
2331 case AArch64::ZAH0:
2332 if (TileNum <= 1)
2333 break;
2334 return false;
2335 case AArch64::ZAS0:
2336 if (TileNum <= 3)
2337 break;
2338 return false;
2339 case AArch64::ZAD0:
2340 if (TileNum <= 7)
2341 break;
2342 return false;
2343 }
2344
2345 BaseReg += TileNum;
2346 return true;
2347}
2348
2349template <unsigned MaxIdx, unsigned Scale>
2350void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2351 unsigned BaseReg, unsigned Op) {
2352 unsigned TileNum = 0;
2353 if (BaseReg != AArch64::ZA)
2354 TileNum = N->getConstantOperandVal(2);
2355
2356 if (!SelectSMETile(BaseReg, TileNum))
2357 return;
2358
2359 SDValue SliceBase, Base, Offset;
2360 if (BaseReg == AArch64::ZA)
2361 SliceBase = N->getOperand(2);
2362 else
2363 SliceBase = N->getOperand(3);
2364
2365 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2366 return;
2367
2368 SDLoc DL(N);
2369 SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2370 SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2371 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2372
2373 EVT VT = N->getValueType(0);
2374 for (unsigned I = 0; I < NumVecs; ++I)
2375 ReplaceUses(SDValue(N, I),
2376 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2377 SDValue(Mov, 0)));
2378 // Copy chain
2379 unsigned ChainIdx = NumVecs;
2380 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2381 CurDAG->RemoveDeadNode(N);
2382}
2383
2384void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2385 unsigned Op, unsigned MaxIdx,
2386 unsigned Scale, unsigned BaseReg) {
2387 // Slice can be in different positions
2388 // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2389 // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2390 SDValue SliceBase = N->getOperand(2);
2391 if (BaseReg != AArch64::ZA)
2392 SliceBase = N->getOperand(3);
2393
2395 if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2396 return;
2397 // The correct Za tile number is computed in Machine Instruction
2398 // See EmitZAInstr
2399 // DAG cannot select Za tile as an output register with ZReg
2400 SDLoc DL(N);
2402 if (BaseReg != AArch64::ZA )
2403 Ops.push_back(N->getOperand(2));
2404 Ops.push_back(Base);
2405 Ops.push_back(Offset);
2406 Ops.push_back(N->getOperand(0)); //Chain
2407 SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2408
2409 EVT VT = N->getValueType(0);
2410 for (unsigned I = 0; I < NumVecs; ++I)
2411 ReplaceUses(SDValue(N, I),
2412 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2413 SDValue(Mov, 0)));
2414
2415 // Copy chain
2416 unsigned ChainIdx = NumVecs;
2417 ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2418 CurDAG->RemoveDeadNode(N);
2419}
2420
2421void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2422 unsigned NumOutVecs,
2423 bool IsTupleInput,
2424 unsigned Opc) {
2425 SDLoc DL(N);
2426 EVT VT = N->getValueType(0);
2427 unsigned NumInVecs = N->getNumOperands() - 1;
2428
2430 if (IsTupleInput) {
2431 assert((NumInVecs == 2 || NumInVecs == 4) &&
2432 "Don't know how to handle multi-register input!");
2433 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumInVecs));
2434 Ops.push_back(createZMulTuple(Regs));
2435 } else {
2436 // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2437 for (unsigned I = 0; I < NumInVecs; I++)
2438 Ops.push_back(N->getOperand(1 + I));
2439 }
2440
2441 SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2442 SDValue SuperReg = SDValue(Res, 0);
2443
2444 for (unsigned I = 0; I < NumOutVecs; I++)
2445 ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2446 AArch64::zsub0 + I, DL, VT, SuperReg));
2447 CurDAG->RemoveDeadNode(N);
2448}
2449
2450void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2451 unsigned Opc) {
2452 SDLoc dl(N);
2453 EVT VT = N->getOperand(2)->getValueType(0);
2454
2455 // Form a REG_SEQUENCE to force register allocation.
2456 bool Is128Bit = VT.getSizeInBits() == 128;
2457 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2458 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2459
2460 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2461 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2462
2463 // Transfer memoperands.
2464 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2465 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2466
2467 ReplaceNode(N, St);
2468}
2469
2470void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2471 unsigned Scale, unsigned Opc_rr,
2472 unsigned Opc_ri) {
2473 SDLoc dl(N);
2474
2475 // Form a REG_SEQUENCE to force register allocation.
2476 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2477 SDValue RegSeq = createZTuple(Regs);
2478
2479 // Optimize addressing mode.
2480 unsigned Opc;
2482 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2483 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2484 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2485
2486 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2487 Base, // address
2488 Offset, // offset
2489 N->getOperand(0)}; // chain
2490 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2491
2492 ReplaceNode(N, St);
2493}
2494
2495bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2496 SDValue &OffImm) {
2497 SDLoc dl(N);
2498 const DataLayout &DL = CurDAG->getDataLayout();
2499 const TargetLowering *TLI = getTargetLowering();
2500
2501 // Try to match it for the frame address
2502 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2503 int FI = FINode->getIndex();
2504 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2505 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2506 return true;
2507 }
2508
2509 return false;
2510}
2511
2512void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2513 unsigned Opc) {
2514 SDLoc dl(N);
2515 EVT VT = N->getOperand(2)->getValueType(0);
2516 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2517 MVT::Other}; // Type for the Chain
2518
2519 // Form a REG_SEQUENCE to force register allocation.
2520 bool Is128Bit = VT.getSizeInBits() == 128;
2521 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2522 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2523
2524 SDValue Ops[] = {RegSeq,
2525 N->getOperand(NumVecs + 1), // base register
2526 N->getOperand(NumVecs + 2), // Incremental
2527 N->getOperand(0)}; // Chain
2528 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2529
2530 // Transfer memoperands.
2531 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2532 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2533
2534 ReplaceNode(N, St);
2535}
2536
2537namespace {
2538/// WidenVector - Given a value in the V64 register class, produce the
2539/// equivalent value in the V128 register class.
2540class WidenVector {
2541 SelectionDAG &DAG;
2542
2543public:
2544 WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2545
2546 SDValue operator()(SDValue V64Reg) {
2547 EVT VT = V64Reg.getValueType();
2548 unsigned NarrowSize = VT.getVectorNumElements();
2549 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2550 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2551 SDLoc DL(V64Reg);
2552
2553 SDValue Undef =
2554 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2555 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2556 }
2557};
2558} // namespace
2559
2560/// NarrowVector - Given a value in the V128 register class, produce the
2561/// equivalent value in the V64 register class.
2563 EVT VT = V128Reg.getValueType();
2564 unsigned WideSize = VT.getVectorNumElements();
2565 MVT EltTy = VT.getVectorElementType().getSimpleVT();
2566 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2567
2568 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2569 V128Reg);
2570}
2571
2572void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2573 unsigned Opc) {
2574 SDLoc dl(N);
2575 EVT VT = N->getValueType(0);
2576 bool Narrow = VT.getSizeInBits() == 64;
2577
2578 // Form a REG_SEQUENCE to force register allocation.
2579 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2580
2581 if (Narrow)
2582 transform(Regs, Regs.begin(),
2583 WidenVector(*CurDAG));
2584
2585 SDValue RegSeq = createQTuple(Regs);
2586
2587 const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2588
2589 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2590
2591 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2592 N->getOperand(NumVecs + 3), N->getOperand(0)};
2593 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2594 SDValue SuperReg = SDValue(Ld, 0);
2595
2596 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2597 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2598 AArch64::qsub2, AArch64::qsub3 };
2599 for (unsigned i = 0; i < NumVecs; ++i) {
2600 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2601 if (Narrow)
2602 NV = NarrowVector(NV, *CurDAG);
2603 ReplaceUses(SDValue(N, i), NV);
2604 }
2605
2606 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2607 CurDAG->RemoveDeadNode(N);
2608}
2609
2610void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2611 unsigned Opc) {
2612 SDLoc dl(N);
2613 EVT VT = N->getValueType(0);
2614 bool Narrow = VT.getSizeInBits() == 64;
2615
2616 // Form a REG_SEQUENCE to force register allocation.
2617 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2618
2619 if (Narrow)
2620 transform(Regs, Regs.begin(),
2621 WidenVector(*CurDAG));
2622
2623 SDValue RegSeq = createQTuple(Regs);
2624
2625 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2626 RegSeq->getValueType(0), MVT::Other};
2627
2628 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2629
2630 SDValue Ops[] = {RegSeq,
2631 CurDAG->getTargetConstant(LaneNo, dl,
2632 MVT::i64), // Lane Number
2633 N->getOperand(NumVecs + 2), // Base register
2634 N->getOperand(NumVecs + 3), // Incremental
2635 N->getOperand(0)};
2636 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2637
2638 // Update uses of the write back register
2639 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2640
2641 // Update uses of the vector list
2642 SDValue SuperReg = SDValue(Ld, 1);
2643 if (NumVecs == 1) {
2644 ReplaceUses(SDValue(N, 0),
2645 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2646 } else {
2647 EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2648 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2649 AArch64::qsub2, AArch64::qsub3 };
2650 for (unsigned i = 0; i < NumVecs; ++i) {
2651 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2652 SuperReg);
2653 if (Narrow)
2654 NV = NarrowVector(NV, *CurDAG);
2655 ReplaceUses(SDValue(N, i), NV);
2656 }
2657 }
2658
2659 // Update the Chain
2660 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2661 CurDAG->RemoveDeadNode(N);
2662}
2663
2664void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2665 unsigned Opc) {
2666 SDLoc dl(N);
2667 EVT VT = N->getOperand(2)->getValueType(0);
2668 bool Narrow = VT.getSizeInBits() == 64;
2669
2670 // Form a REG_SEQUENCE to force register allocation.
2671 SmallVector<SDValue, 4> Regs(N->ops().slice(2, NumVecs));
2672
2673 if (Narrow)
2674 transform(Regs, Regs.begin(),
2675 WidenVector(*CurDAG));
2676
2677 SDValue RegSeq = createQTuple(Regs);
2678
2679 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2680
2681 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2682 N->getOperand(NumVecs + 3), N->getOperand(0)};
2683 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2684
2685 // Transfer memoperands.
2686 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2687 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2688
2689 ReplaceNode(N, St);
2690}
2691
2692void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2693 unsigned Opc) {
2694 SDLoc dl(N);
2695 EVT VT = N->getOperand(2)->getValueType(0);
2696 bool Narrow = VT.getSizeInBits() == 64;
2697
2698 // Form a REG_SEQUENCE to force register allocation.
2699 SmallVector<SDValue, 4> Regs(N->ops().slice(1, NumVecs));
2700
2701 if (Narrow)
2702 transform(Regs, Regs.begin(),
2703 WidenVector(*CurDAG));
2704
2705 SDValue RegSeq = createQTuple(Regs);
2706
2707 const EVT ResTys[] = {MVT::i64, // Type of the write back register
2708 MVT::Other};
2709
2710 unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2711
2712 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2713 N->getOperand(NumVecs + 2), // Base Register
2714 N->getOperand(NumVecs + 3), // Incremental
2715 N->getOperand(0)};
2716 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2717
2718 // Transfer memoperands.
2719 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2720 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2721
2722 ReplaceNode(N, St);
2723}
2724
2726 unsigned &Opc, SDValue &Opd0,
2727 unsigned &LSB, unsigned &MSB,
2728 unsigned NumberOfIgnoredLowBits,
2729 bool BiggerPattern) {
2730 assert(N->getOpcode() == ISD::AND &&
2731 "N must be a AND operation to call this function");
2732
2733 EVT VT = N->getValueType(0);
2734
2735 // Here we can test the type of VT and return false when the type does not
2736 // match, but since it is done prior to that call in the current context
2737 // we turned that into an assert to avoid redundant code.
2738 assert((VT == MVT::i32 || VT == MVT::i64) &&
2739 "Type checking must have been done before calling this function");
2740
2741 // FIXME: simplify-demanded-bits in DAGCombine will probably have
2742 // changed the AND node to a 32-bit mask operation. We'll have to
2743 // undo that as part of the transform here if we want to catch all
2744 // the opportunities.
2745 // Currently the NumberOfIgnoredLowBits argument helps to recover
2746 // from these situations when matching bigger pattern (bitfield insert).
2747
2748 // For unsigned extracts, check for a shift right and mask
2749 uint64_t AndImm = 0;
2750 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2751 return false;
2752
2753 const SDNode *Op0 = N->getOperand(0).getNode();
2754
2755 // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2756 // simplified. Try to undo that
2757 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2758
2759 // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2760 if (AndImm & (AndImm + 1))
2761 return false;
2762
2763 bool ClampMSB = false;
2764 uint64_t SrlImm = 0;
2765 // Handle the SRL + ANY_EXTEND case.
2766 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2767 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2768 // Extend the incoming operand of the SRL to 64-bit.
2769 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2770 // Make sure to clamp the MSB so that we preserve the semantics of the
2771 // original operations.
2772 ClampMSB = true;
2773 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2775 SrlImm)) {
2776 // If the shift result was truncated, we can still combine them.
2777 Opd0 = Op0->getOperand(0).getOperand(0);
2778
2779 // Use the type of SRL node.
2780 VT = Opd0->getValueType(0);
2781 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2782 Opd0 = Op0->getOperand(0);
2783 ClampMSB = (VT == MVT::i32);
2784 } else if (BiggerPattern) {
2785 // Let's pretend a 0 shift right has been performed.
2786 // The resulting code will be at least as good as the original one
2787 // plus it may expose more opportunities for bitfield insert pattern.
2788 // FIXME: Currently we limit this to the bigger pattern, because
2789 // some optimizations expect AND and not UBFM.
2790 Opd0 = N->getOperand(0);
2791 } else
2792 return false;
2793
2794 // Bail out on large immediates. This happens when no proper
2795 // combining/constant folding was performed.
2796 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2797 LLVM_DEBUG(
2798 (dbgs() << N
2799 << ": Found large shift immediate, this should not happen\n"));
2800 return false;
2801 }
2802
2803 LSB = SrlImm;
2804 MSB = SrlImm +
2805 (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2806 : llvm::countr_one<uint64_t>(AndImm)) -
2807 1;
2808 if (ClampMSB)
2809 // Since we're moving the extend before the right shift operation, we need
2810 // to clamp the MSB to make sure we don't shift in undefined bits instead of
2811 // the zeros which would get shifted in with the original right shift
2812 // operation.
2813 MSB = MSB > 31 ? 31 : MSB;
2814
2815 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2816 return true;
2817}
2818
2820 SDValue &Opd0, unsigned &Immr,
2821 unsigned &Imms) {
2822 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2823
2824 EVT VT = N->getValueType(0);
2825 unsigned BitWidth = VT.getSizeInBits();
2826 assert((VT == MVT::i32 || VT == MVT::i64) &&
2827 "Type checking must have been done before calling this function");
2828
2829 SDValue Op = N->getOperand(0);
2830 if (Op->getOpcode() == ISD::TRUNCATE) {
2831 Op = Op->getOperand(0);
2832 VT = Op->getValueType(0);
2833 BitWidth = VT.getSizeInBits();
2834 }
2835
2836 uint64_t ShiftImm;
2837 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2838 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2839 return false;
2840
2841 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2842 if (ShiftImm + Width > BitWidth)
2843 return false;
2844
2845 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2846 Opd0 = Op.getOperand(0);
2847 Immr = ShiftImm;
2848 Imms = ShiftImm + Width - 1;
2849 return true;
2850}
2851
2853 SDValue &Opd0, unsigned &LSB,
2854 unsigned &MSB) {
2855 // We are looking for the following pattern which basically extracts several
2856 // continuous bits from the source value and places it from the LSB of the
2857 // destination value, all other bits of the destination value or set to zero:
2858 //
2859 // Value2 = AND Value, MaskImm
2860 // SRL Value2, ShiftImm
2861 //
2862 // with MaskImm >> ShiftImm to search for the bit width.
2863 //
2864 // This gets selected into a single UBFM:
2865 //
2866 // UBFM Value, ShiftImm, Log2_64(MaskImm)
2867 //
2868
2869 if (N->getOpcode() != ISD::SRL)
2870 return false;
2871
2872 uint64_t AndMask = 0;
2873 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2874 return false;
2875
2876 Opd0 = N->getOperand(0).getOperand(0);
2877
2878 uint64_t SrlImm = 0;
2879 if (!isIntImmediate(N->getOperand(1), SrlImm))
2880 return false;
2881
2882 // Check whether we really have several bits extract here.
2883 if (!isMask_64(AndMask >> SrlImm))
2884 return false;
2885
2886 Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2887 LSB = SrlImm;
2888 MSB = llvm::Log2_64(AndMask);
2889 return true;
2890}
2891
2892static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2893 unsigned &Immr, unsigned &Imms,
2894 bool BiggerPattern) {
2895 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2896 "N must be a SHR/SRA operation to call this function");
2897
2898 EVT VT = N->getValueType(0);
2899
2900 // Here we can test the type of VT and return false when the type does not
2901 // match, but since it is done prior to that call in the current context
2902 // we turned that into an assert to avoid redundant code.
2903 assert((VT == MVT::i32 || VT == MVT::i64) &&
2904 "Type checking must have been done before calling this function");
2905
2906 // Check for AND + SRL doing several bits extract.
2907 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2908 return true;
2909
2910 // We're looking for a shift of a shift.
2911 uint64_t ShlImm = 0;
2912 uint64_t TruncBits = 0;
2913 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2914 Opd0 = N->getOperand(0).getOperand(0);
2915 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2916 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2917 // We are looking for a shift of truncate. Truncate from i64 to i32 could
2918 // be considered as setting high 32 bits as zero. Our strategy here is to
2919 // always generate 64bit UBFM. This consistency will help the CSE pass
2920 // later find more redundancy.
2921 Opd0 = N->getOperand(0).getOperand(0);
2922 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2923 VT = Opd0.getValueType();
2924 assert(VT == MVT::i64 && "the promoted type should be i64");
2925 } else if (BiggerPattern) {
2926 // Let's pretend a 0 shift left has been performed.
2927 // FIXME: Currently we limit this to the bigger pattern case,
2928 // because some optimizations expect AND and not UBFM
2929 Opd0 = N->getOperand(0);
2930 } else
2931 return false;
2932
2933 // Missing combines/constant folding may have left us with strange
2934 // constants.
2935 if (ShlImm >= VT.getSizeInBits()) {
2936 LLVM_DEBUG(
2937 (dbgs() << N
2938 << ": Found large shift immediate, this should not happen\n"));
2939 return false;
2940 }
2941
2942 uint64_t SrlImm = 0;
2943 if (!isIntImmediate(N->getOperand(1), SrlImm))
2944 return false;
2945
2946 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2947 "bad amount in shift node!");
2948 int immr = SrlImm - ShlImm;
2949 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2950 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2951 // SRA requires a signed extraction
2952 if (VT == MVT::i32)
2953 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2954 else
2955 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2956 return true;
2957}
2958
2959bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2960 assert(N->getOpcode() == ISD::SIGN_EXTEND);
2961
2962 EVT VT = N->getValueType(0);
2963 EVT NarrowVT = N->getOperand(0)->getValueType(0);
2964 if (VT != MVT::i64 || NarrowVT != MVT::i32)
2965 return false;
2966
2967 uint64_t ShiftImm;
2968 SDValue Op = N->getOperand(0);
2969 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2970 return false;
2971
2972 SDLoc dl(N);
2973 // Extend the incoming operand of the shift to 64-bits.
2974 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2975 unsigned Immr = ShiftImm;
2976 unsigned Imms = NarrowVT.getSizeInBits() - 1;
2977 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2978 CurDAG->getTargetConstant(Imms, dl, VT)};
2979 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2980 return true;
2981}
2982
2983static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2984 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2985 unsigned NumberOfIgnoredLowBits = 0,
2986 bool BiggerPattern = false) {
2987 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2988 return false;
2989
2990 switch (N->getOpcode()) {
2991 default:
2992 if (!N->isMachineOpcode())
2993 return false;
2994 break;
2995 case ISD::AND:
2996 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2997 NumberOfIgnoredLowBits, BiggerPattern);
2998 case ISD::SRL:
2999 case ISD::SRA:
3000 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
3001
3003 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
3004 }
3005
3006 unsigned NOpc = N->getMachineOpcode();
3007 switch (NOpc) {
3008 default:
3009 return false;
3010 case AArch64::SBFMWri:
3011 case AArch64::UBFMWri:
3012 case AArch64::SBFMXri:
3013 case AArch64::UBFMXri:
3014 Opc = NOpc;
3015 Opd0 = N->getOperand(0);
3016 Immr = N->getConstantOperandVal(1);
3017 Imms = N->getConstantOperandVal(2);
3018 return true;
3019 }
3020 // Unreachable
3021 return false;
3022}
3023
3024bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
3025 unsigned Opc, Immr, Imms;
3026 SDValue Opd0;
3027 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
3028 return false;
3029
3030 EVT VT = N->getValueType(0);
3031 SDLoc dl(N);
3032
3033 // If the bit extract operation is 64bit but the original type is 32bit, we
3034 // need to add one EXTRACT_SUBREG.
3035 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
3036 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
3037 CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
3038
3039 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
3040 SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
3041 MVT::i32, SDValue(BFM, 0));
3042 ReplaceNode(N, Inner.getNode());
3043 return true;
3044 }
3045
3046 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
3047 CurDAG->getTargetConstant(Imms, dl, VT)};
3048 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3049 return true;
3050}
3051
3052/// Does DstMask form a complementary pair with the mask provided by
3053/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
3054/// this asks whether DstMask zeroes precisely those bits that will be set by
3055/// the other half.
3056static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
3057 unsigned NumberOfIgnoredHighBits, EVT VT) {
3058 assert((VT == MVT::i32 || VT == MVT::i64) &&
3059 "i32 or i64 mask type expected!");
3060 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
3061
3062 // Enable implicitTrunc as we're intentionally ignoring high bits.
3063 APInt SignificantDstMask =
3064 APInt(BitWidth, DstMask, /*isSigned=*/false, /*implicitTrunc=*/true);
3065 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
3066
3067 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
3068 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
3069}
3070
3071// Look for bits that will be useful for later uses.
3072// A bit is consider useless as soon as it is dropped and never used
3073// before it as been dropped.
3074// E.g., looking for useful bit of x
3075// 1. y = x & 0x7
3076// 2. z = y >> 2
3077// After #1, x useful bits are 0x7, then the useful bits of x, live through
3078// y.
3079// After #2, the useful bits of x are 0x4.
3080// However, if x is used on an unpredictable instruction, then all its bits
3081// are useful.
3082// E.g.
3083// 1. y = x & 0x7
3084// 2. z = y >> 2
3085// 3. str x, [@x]
3086static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
3087
3089 unsigned Depth) {
3090 uint64_t Imm =
3091 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3092 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
3093 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
3094 getUsefulBits(Op, UsefulBits, Depth + 1);
3095}
3096
3098 uint64_t Imm, uint64_t MSB,
3099 unsigned Depth) {
3100 // inherit the bitwidth value
3101 APInt OpUsefulBits(UsefulBits);
3102 OpUsefulBits = 1;
3103
3104 if (MSB >= Imm) {
3105 OpUsefulBits <<= MSB - Imm + 1;
3106 --OpUsefulBits;
3107 // The interesting part will be in the lower part of the result
3108 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3109 // The interesting part was starting at Imm in the argument
3110 OpUsefulBits <<= Imm;
3111 } else {
3112 OpUsefulBits <<= MSB + 1;
3113 --OpUsefulBits;
3114 // The interesting part will be shifted in the result
3115 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
3116 getUsefulBits(Op, OpUsefulBits, Depth + 1);
3117 // The interesting part was at zero in the argument
3118 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
3119 }
3120
3121 UsefulBits &= OpUsefulBits;
3122}
3123
3124static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
3125 unsigned Depth) {
3126 uint64_t Imm =
3127 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
3128 uint64_t MSB =
3129 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3130
3131 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
3132}
3133
3135 unsigned Depth) {
3136 uint64_t ShiftTypeAndValue =
3137 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3138 APInt Mask(UsefulBits);
3139 Mask.clearAllBits();
3140 Mask.flipAllBits();
3141
3142 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
3143 // Shift Left
3144 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3145 Mask <<= ShiftAmt;
3146 getUsefulBits(Op, Mask, Depth + 1);
3147 Mask.lshrInPlace(ShiftAmt);
3148 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
3149 // Shift Right
3150 // We do not handle AArch64_AM::ASR, because the sign will change the
3151 // number of useful bits
3152 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
3153 Mask.lshrInPlace(ShiftAmt);
3154 getUsefulBits(Op, Mask, Depth + 1);
3155 Mask <<= ShiftAmt;
3156 } else
3157 return;
3158
3159 UsefulBits &= Mask;
3160}
3161
3162static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
3163 unsigned Depth) {
3164 uint64_t Imm =
3165 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
3166 uint64_t MSB =
3167 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
3168
3169 APInt OpUsefulBits(UsefulBits);
3170 OpUsefulBits = 1;
3171
3172 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
3173 ResultUsefulBits.flipAllBits();
3174 APInt Mask(UsefulBits.getBitWidth(), 0);
3175
3176 getUsefulBits(Op, ResultUsefulBits, Depth + 1);
3177
3178 if (MSB >= Imm) {
3179 // The instruction is a BFXIL.
3180 uint64_t Width = MSB - Imm + 1;
3181 uint64_t LSB = Imm;
3182
3183 OpUsefulBits <<= Width;
3184 --OpUsefulBits;
3185
3186 if (Op.getOperand(1) == Orig) {
3187 // Copy the low bits from the result to bits starting from LSB.
3188 Mask = ResultUsefulBits & OpUsefulBits;
3189 Mask <<= LSB;
3190 }
3191
3192 if (Op.getOperand(0) == Orig)
3193 // Bits starting from LSB in the input contribute to the result.
3194 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3195 } else {
3196 // The instruction is a BFI.
3197 uint64_t Width = MSB + 1;
3198 uint64_t LSB = UsefulBits.getBitWidth() - Imm;
3199
3200 OpUsefulBits <<= Width;
3201 --OpUsefulBits;
3202 OpUsefulBits <<= LSB;
3203
3204 if (Op.getOperand(1) == Orig) {
3205 // Copy the bits from the result to the zero bits.
3206 Mask = ResultUsefulBits & OpUsefulBits;
3207 Mask.lshrInPlace(LSB);
3208 }
3209
3210 if (Op.getOperand(0) == Orig)
3211 Mask |= (ResultUsefulBits & ~OpUsefulBits);
3212 }
3213
3214 UsefulBits &= Mask;
3215}
3216
3217static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
3218 SDValue Orig, unsigned Depth) {
3219
3220 // Users of this node should have already been instruction selected
3221 // FIXME: Can we turn that into an assert?
3222 if (!UserNode->isMachineOpcode())
3223 return;
3224
3225 switch (UserNode->getMachineOpcode()) {
3226 default:
3227 return;
3228 case AArch64::ANDSWri:
3229 case AArch64::ANDSXri:
3230 case AArch64::ANDWri:
3231 case AArch64::ANDXri:
3232 // We increment Depth only when we call the getUsefulBits
3233 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
3234 Depth);
3235 case AArch64::UBFMWri:
3236 case AArch64::UBFMXri:
3237 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
3238
3239 case AArch64::ORRWrs:
3240 case AArch64::ORRXrs:
3241 if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
3242 getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
3243 Depth);
3244 return;
3245 case AArch64::BFMWri:
3246 case AArch64::BFMXri:
3247 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
3248
3249 case AArch64::STRBBui:
3250 case AArch64::STURBBi:
3251 if (UserNode->getOperand(0) != Orig)
3252 return;
3253 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
3254 return;
3255
3256 case AArch64::STRHHui:
3257 case AArch64::STURHHi:
3258 if (UserNode->getOperand(0) != Orig)
3259 return;
3260 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
3261 return;
3262 }
3263}
3264
3265static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
3267 return;
3268 // Initialize UsefulBits
3269 if (!Depth) {
3270 unsigned Bitwidth = Op.getScalarValueSizeInBits();
3271 // At the beginning, assume every produced bits is useful
3272 UsefulBits = APInt(Bitwidth, 0);
3273 UsefulBits.flipAllBits();
3274 }
3275 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
3276
3277 for (SDNode *Node : Op.getNode()->users()) {
3278 // A use cannot produce useful bits
3279 APInt UsefulBitsForUse = APInt(UsefulBits);
3280 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
3281 UsersUsefulBits |= UsefulBitsForUse;
3282 }
3283 // UsefulBits contains the produced bits that are meaningful for the
3284 // current definition, thus a user cannot make a bit meaningful at
3285 // this point
3286 UsefulBits &= UsersUsefulBits;
3287}
3288
3289/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3290/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3291/// 0, return Op unchanged.
3292static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3293 if (ShlAmount == 0)
3294 return Op;
3295
3296 EVT VT = Op.getValueType();
3297 SDLoc dl(Op);
3298 unsigned BitWidth = VT.getSizeInBits();
3299 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3300
3301 SDNode *ShiftNode;
3302 if (ShlAmount > 0) {
3303 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3304 ShiftNode = CurDAG->getMachineNode(
3305 UBFMOpc, dl, VT, Op,
3306 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3307 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3308 } else {
3309 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3310 assert(ShlAmount < 0 && "expected right shift");
3311 int ShrAmount = -ShlAmount;
3312 ShiftNode = CurDAG->getMachineNode(
3313 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3314 CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3315 }
3316
3317 return SDValue(ShiftNode, 0);
3318}
3319
3320// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3321static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3322 bool BiggerPattern,
3323 const uint64_t NonZeroBits,
3324 SDValue &Src, int &DstLSB,
3325 int &Width);
3326
3327// For bit-field-positioning pattern "shl VAL, N)".
3328static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3329 bool BiggerPattern,
3330 const uint64_t NonZeroBits,
3331 SDValue &Src, int &DstLSB,
3332 int &Width);
3333
3334/// Does this tree qualify as an attempt to move a bitfield into position,
3335/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3337 bool BiggerPattern, SDValue &Src,
3338 int &DstLSB, int &Width) {
3339 EVT VT = Op.getValueType();
3340 unsigned BitWidth = VT.getSizeInBits();
3341 (void)BitWidth;
3342 assert(BitWidth == 32 || BitWidth == 64);
3343
3344 KnownBits Known = CurDAG->computeKnownBits(Op);
3345
3346 // Non-zero in the sense that they're not provably zero, which is the key
3347 // point if we want to use this value
3348 const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3349 if (!isShiftedMask_64(NonZeroBits))
3350 return false;
3351
3352 switch (Op.getOpcode()) {
3353 default:
3354 break;
3355 case ISD::AND:
3356 return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3357 NonZeroBits, Src, DstLSB, Width);
3358 case ISD::SHL:
3359 return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3360 NonZeroBits, Src, DstLSB, Width);
3361 }
3362
3363 return false;
3364}
3365
3367 bool BiggerPattern,
3368 const uint64_t NonZeroBits,
3369 SDValue &Src, int &DstLSB,
3370 int &Width) {
3371 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3372
3373 EVT VT = Op.getValueType();
3374 assert((VT == MVT::i32 || VT == MVT::i64) &&
3375 "Caller guarantees VT is one of i32 or i64");
3376 (void)VT;
3377
3378 uint64_t AndImm;
3379 if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3380 return false;
3381
3382 // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3383 // 1) (AndImm & (1 << POS) == 0)
3384 // 2) the result of AND is not zero at POS bit (according to NonZeroBits)
3385 //
3386 // 1) and 2) don't agree so something must be wrong (e.g., in
3387 // 'SelectionDAG::computeKnownBits')
3388 assert((~AndImm & NonZeroBits) == 0 &&
3389 "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3390
3391 SDValue AndOp0 = Op.getOperand(0);
3392
3393 uint64_t ShlImm;
3394 SDValue ShlOp0;
3395 if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3396 // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3397 ShlOp0 = AndOp0.getOperand(0);
3398 } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3400 ShlImm)) {
3401 // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3402
3403 // ShlVal == shl(val, N), which is a left shift on a smaller type.
3404 SDValue ShlVal = AndOp0.getOperand(0);
3405
3406 // Since this is after type legalization and ShlVal is extended to MVT::i64,
3407 // expect VT to be MVT::i32.
3408 assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3409
3410 // Widens 'val' to MVT::i64 as the source of bit field positioning.
3411 ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3412 } else
3413 return false;
3414
3415 // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3416 // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3417 // AndOp0+AND.
3418 if (!BiggerPattern && !AndOp0.hasOneUse())
3419 return false;
3420
3421 DstLSB = llvm::countr_zero(NonZeroBits);
3422 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3423
3424 // Bail out on large Width. This happens when no proper combining / constant
3425 // folding was performed.
3426 if (Width >= (int)VT.getSizeInBits()) {
3427 // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3428 // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3429 // "val".
3430 // If VT is i32, what Width >= 32 means:
3431 // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3432 // demands at least 'Width' bits (after dag-combiner). This together with
3433 // `any_extend` Op (undefined higher bits) indicates missed combination
3434 // when lowering the 'and' IR instruction to an machine IR instruction.
3435 LLVM_DEBUG(
3436 dbgs()
3437 << "Found large Width in bit-field-positioning -- this indicates no "
3438 "proper combining / constant folding was performed\n");
3439 return false;
3440 }
3441
3442 // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3443 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3444 // amount. BiggerPattern is true when this pattern is being matched for BFI,
3445 // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3446 // which case it is not profitable to insert an extra shift.
3447 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3448 return false;
3449
3450 Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3451 return true;
3452}
3453
3454// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3455// UBFIZ.
3457 SDValue &Src, int &DstLSB,
3458 int &Width) {
3459 // Caller should have verified that N is a left shift with constant shift
3460 // amount; asserts that.
3461 assert(Op.getOpcode() == ISD::SHL &&
3462 "Op.getNode() should be a SHL node to call this function");
3463 assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3464 "Op.getNode() should shift ShlImm to call this function");
3465
3466 uint64_t AndImm = 0;
3467 SDValue Op0 = Op.getOperand(0);
3468 if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3469 return false;
3470
3471 const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3472 if (isMask_64(ShiftedAndImm)) {
3473 // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3474 // should end with Mask, and could be prefixed with random bits if those
3475 // bits are shifted out.
3476 //
3477 // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3478 // the AND result corresponding to those bits are shifted out, so it's fine
3479 // to not extract them.
3480 Width = llvm::countr_one(ShiftedAndImm);
3481 DstLSB = ShlImm;
3482 Src = Op0.getOperand(0);
3483 return true;
3484 }
3485 return false;
3486}
3487
3489 bool BiggerPattern,
3490 const uint64_t NonZeroBits,
3491 SDValue &Src, int &DstLSB,
3492 int &Width) {
3493 assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3494
3495 EVT VT = Op.getValueType();
3496 assert((VT == MVT::i32 || VT == MVT::i64) &&
3497 "Caller guarantees that type is i32 or i64");
3498 (void)VT;
3499
3500 uint64_t ShlImm;
3501 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3502 return false;
3503
3504 if (!BiggerPattern && !Op.hasOneUse())
3505 return false;
3506
3507 if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3508 return true;
3509
3510 DstLSB = llvm::countr_zero(NonZeroBits);
3511 Width = llvm::countr_one(NonZeroBits >> DstLSB);
3512
3513 if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3514 return false;
3515
3516 Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3517 return true;
3518}
3519
3520static bool isShiftedMask(uint64_t Mask, EVT VT) {
3521 assert(VT == MVT::i32 || VT == MVT::i64);
3522 if (VT == MVT::i32)
3523 return isShiftedMask_32(Mask);
3524 return isShiftedMask_64(Mask);
3525}
3526
3527// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3528// inserted only sets known zero bits.
3530 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3531
3532 EVT VT = N->getValueType(0);
3533 if (VT != MVT::i32 && VT != MVT::i64)
3534 return false;
3535
3536 unsigned BitWidth = VT.getSizeInBits();
3537
3538 uint64_t OrImm;
3539 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3540 return false;
3541
3542 // Skip this transformation if the ORR immediate can be encoded in the ORR.
3543 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3544 // performance neutral.
3546 return false;
3547
3548 uint64_t MaskImm;
3549 SDValue And = N->getOperand(0);
3550 // Must be a single use AND with an immediate operand.
3551 if (!And.hasOneUse() ||
3552 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3553 return false;
3554
3555 // Compute the Known Zero for the AND as this allows us to catch more general
3556 // cases than just looking for AND with imm.
3557 KnownBits Known = CurDAG->computeKnownBits(And);
3558
3559 // Non-zero in the sense that they're not provably zero, which is the key
3560 // point if we want to use this value.
3561 uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3562
3563 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3564 if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3565 return false;
3566
3567 // The bits being inserted must only set those bits that are known to be zero.
3568 if ((OrImm & NotKnownZero) != 0) {
3569 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3570 // currently handle this case.
3571 return false;
3572 }
3573
3574 // BFI/BFXIL dst, src, #lsb, #width.
3575 int LSB = llvm::countr_one(NotKnownZero);
3576 int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3577
3578 // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3579 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3580 unsigned ImmS = Width - 1;
3581
3582 // If we're creating a BFI instruction avoid cases where we need more
3583 // instructions to materialize the BFI constant as compared to the original
3584 // ORR. A BFXIL will use the same constant as the original ORR, so the code
3585 // should be no worse in this case.
3586 bool IsBFI = LSB != 0;
3587 uint64_t BFIImm = OrImm >> LSB;
3588 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3589 // We have a BFI instruction and we know the constant can't be materialized
3590 // with a ORR-immediate with the zero register.
3591 unsigned OrChunks = 0, BFIChunks = 0;
3592 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3593 if (((OrImm >> Shift) & 0xFFFF) != 0)
3594 ++OrChunks;
3595 if (((BFIImm >> Shift) & 0xFFFF) != 0)
3596 ++BFIChunks;
3597 }
3598 if (BFIChunks > OrChunks)
3599 return false;
3600 }
3601
3602 // Materialize the constant to be inserted.
3603 SDLoc DL(N);
3604 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3605 SDNode *MOVI = CurDAG->getMachineNode(
3606 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3607
3608 // Create the BFI/BFXIL instruction.
3609 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3610 CurDAG->getTargetConstant(ImmR, DL, VT),
3611 CurDAG->getTargetConstant(ImmS, DL, VT)};
3612 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3613 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3614 return true;
3615}
3616
3618 SDValue &ShiftedOperand,
3619 uint64_t &EncodedShiftImm) {
3620 // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3621 if (!Dst.hasOneUse())
3622 return false;
3623
3624 EVT VT = Dst.getValueType();
3625 assert((VT == MVT::i32 || VT == MVT::i64) &&
3626 "Caller should guarantee that VT is one of i32 or i64");
3627 const unsigned SizeInBits = VT.getSizeInBits();
3628
3629 SDLoc DL(Dst.getNode());
3630 uint64_t AndImm, ShlImm;
3631 if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3632 isShiftedMask_64(AndImm)) {
3633 // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3634 SDValue DstOp0 = Dst.getOperand(0);
3635 if (!DstOp0.hasOneUse())
3636 return false;
3637
3638 // An example to illustrate the transformation
3639 // From:
3640 // lsr x8, x1, #1
3641 // and x8, x8, #0x3f80
3642 // bfxil x8, x1, #0, #7
3643 // To:
3644 // and x8, x23, #0x7f
3645 // ubfx x9, x23, #8, #7
3646 // orr x23, x8, x9, lsl #7
3647 //
3648 // The number of instructions remains the same, but ORR is faster than BFXIL
3649 // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3650 // the dependency chain is improved after the transformation.
3651 uint64_t SrlImm;
3652 if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3653 uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3654 if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3655 unsigned MaskWidth =
3656 llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3657 unsigned UBFMOpc =
3658 (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3659 SDNode *UBFMNode = CurDAG->getMachineNode(
3660 UBFMOpc, DL, VT, DstOp0.getOperand(0),
3661 CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3662 VT),
3663 CurDAG->getTargetConstant(
3664 SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3665 ShiftedOperand = SDValue(UBFMNode, 0);
3666 EncodedShiftImm = AArch64_AM::getShifterImm(
3667 AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3668 return true;
3669 }
3670 }
3671 return false;
3672 }
3673
3674 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3675 ShiftedOperand = Dst.getOperand(0);
3676 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3677 return true;
3678 }
3679
3680 uint64_t SrlImm;
3681 if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3682 ShiftedOperand = Dst.getOperand(0);
3683 EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3684 return true;
3685 }
3686 return false;
3687}
3688
3689// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3690// the operands and select it to AArch64::ORR with shifted registers if
3691// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3692static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3693 SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3694 const bool BiggerPattern) {
3695 EVT VT = N->getValueType(0);
3696 assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3697 assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3698 (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3699 "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3700 assert((VT == MVT::i32 || VT == MVT::i64) &&
3701 "Expect result type to be i32 or i64 since N is combinable to BFM");
3702 SDLoc DL(N);
3703
3704 // Bail out if BFM simplifies away one node in BFM Dst.
3705 if (OrOpd1 != Dst)
3706 return false;
3707
3708 const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3709 // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3710 // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3711 if (BiggerPattern) {
3712 uint64_t SrcAndImm;
3713 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3714 isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3715 // OrOpd0 = AND Src, #Mask
3716 // So BFM simplifies away one AND node from Src and doesn't simplify away
3717 // nodes from Dst. If ORR with left-shifted operand also simplifies away
3718 // one node (from Rd), ORR is better since it has higher throughput and
3719 // smaller latency than BFM on many AArch64 processors (and for the rest
3720 // ORR is at least as good as BFM).
3721 SDValue ShiftedOperand;
3722 uint64_t EncodedShiftImm;
3723 if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3724 EncodedShiftImm)) {
3725 SDValue Ops[] = {OrOpd0, ShiftedOperand,
3726 CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3727 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3728 return true;
3729 }
3730 }
3731 return false;
3732 }
3733
3734 assert((!BiggerPattern) && "BiggerPattern should be handled above");
3735
3736 uint64_t ShlImm;
3737 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3738 if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3739 SDValue Ops[] = {
3740 Dst, Src,
3741 CurDAG->getTargetConstant(
3743 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3744 return true;
3745 }
3746
3747 // Select the following pattern to left-shifted operand rather than BFI.
3748 // %val1 = op ..
3749 // %val2 = shl %val1, #imm
3750 // %res = or %val1, %val2
3751 //
3752 // If N is selected to be BFI, we know that
3753 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3754 // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3755 //
3756 // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3757 if (OrOpd0.getOperand(0) == OrOpd1) {
3758 SDValue Ops[] = {
3759 OrOpd1, OrOpd1,
3760 CurDAG->getTargetConstant(
3762 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3763 return true;
3764 }
3765 }
3766
3767 uint64_t SrlImm;
3768 if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3769 // Select the following pattern to right-shifted operand rather than BFXIL.
3770 // %val1 = op ..
3771 // %val2 = lshr %val1, #imm
3772 // %res = or %val1, %val2
3773 //
3774 // If N is selected to be BFXIL, we know that
3775 // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3776 // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3777 //
3778 // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3779 if (OrOpd0.getOperand(0) == OrOpd1) {
3780 SDValue Ops[] = {
3781 OrOpd1, OrOpd1,
3782 CurDAG->getTargetConstant(
3784 CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3785 return true;
3786 }
3787 }
3788
3789 return false;
3790}
3791
3792static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3793 SelectionDAG *CurDAG) {
3794 assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3795
3796 EVT VT = N->getValueType(0);
3797 if (VT != MVT::i32 && VT != MVT::i64)
3798 return false;
3799
3800 unsigned BitWidth = VT.getSizeInBits();
3801
3802 // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3803 // have the expected shape. Try to undo that.
3804
3805 unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3806 unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3807
3808 // Given a OR operation, check if we have the following pattern
3809 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3810 // isBitfieldExtractOp)
3811 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3812 // countTrailingZeros(mask2) == imm2 - imm + 1
3813 // f = d | c
3814 // if yes, replace the OR instruction with:
3815 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3816
3817 // OR is commutative, check all combinations of operand order and values of
3818 // BiggerPattern, i.e.
3819 // Opd0, Opd1, BiggerPattern=false
3820 // Opd1, Opd0, BiggerPattern=false
3821 // Opd0, Opd1, BiggerPattern=true
3822 // Opd1, Opd0, BiggerPattern=true
3823 // Several of these combinations may match, so check with BiggerPattern=false
3824 // first since that will produce better results by matching more instructions
3825 // and/or inserting fewer extra instructions.
3826 for (int I = 0; I < 4; ++I) {
3827
3828 SDValue Dst, Src;
3829 unsigned ImmR, ImmS;
3830 bool BiggerPattern = I / 2;
3831 SDValue OrOpd0Val = N->getOperand(I % 2);
3832 SDNode *OrOpd0 = OrOpd0Val.getNode();
3833 SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3834 SDNode *OrOpd1 = OrOpd1Val.getNode();
3835
3836 unsigned BFXOpc;
3837 int DstLSB, Width;
3838 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3839 NumberOfIgnoredLowBits, BiggerPattern)) {
3840 // Check that the returned opcode is compatible with the pattern,
3841 // i.e., same type and zero extended (U and not S)
3842 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3843 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3844 continue;
3845
3846 // Compute the width of the bitfield insertion
3847 DstLSB = 0;
3848 Width = ImmS - ImmR + 1;
3849 // FIXME: This constraint is to catch bitfield insertion we may
3850 // want to widen the pattern if we want to grab general bitfield
3851 // move case
3852 if (Width <= 0)
3853 continue;
3854
3855 // If the mask on the insertee is correct, we have a BFXIL operation. We
3856 // can share the ImmR and ImmS values from the already-computed UBFM.
3857 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3858 BiggerPattern,
3859 Src, DstLSB, Width)) {
3860 ImmR = (BitWidth - DstLSB) % BitWidth;
3861 ImmS = Width - 1;
3862 } else
3863 continue;
3864
3865 // Check the second part of the pattern
3866 EVT VT = OrOpd1Val.getValueType();
3867 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3868
3869 // Compute the Known Zero for the candidate of the first operand.
3870 // This allows to catch more general case than just looking for
3871 // AND with imm. Indeed, simplify-demanded-bits may have removed
3872 // the AND instruction because it proves it was useless.
3873 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3874
3875 // Check if there is enough room for the second operand to appear
3876 // in the first one
3877 APInt BitsToBeInserted =
3878 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3879
3880 if ((BitsToBeInserted & ~Known.Zero) != 0)
3881 continue;
3882
3883 // Set the first operand
3884 uint64_t Imm;
3885 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3886 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3887 // In that case, we can eliminate the AND
3888 Dst = OrOpd1->getOperand(0);
3889 else
3890 // Maybe the AND has been removed by simplify-demanded-bits
3891 // or is useful because it discards more bits
3892 Dst = OrOpd1Val;
3893
3894 // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3895 // with shifted operand is more efficient.
3896 if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3897 BiggerPattern))
3898 return true;
3899
3900 // both parts match
3901 SDLoc DL(N);
3902 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3903 CurDAG->getTargetConstant(ImmS, DL, VT)};
3904 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3905 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3906 return true;
3907 }
3908
3909 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3910 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3911 // mask (e.g., 0x000ffff0).
3912 uint64_t Mask0Imm, Mask1Imm;
3913 SDValue And0 = N->getOperand(0);
3914 SDValue And1 = N->getOperand(1);
3915 if (And0.hasOneUse() && And1.hasOneUse() &&
3916 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3917 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3918 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3919 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3920
3921 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3922 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3923 // bits to be inserted.
3924 if (isShiftedMask(Mask0Imm, VT)) {
3925 std::swap(And0, And1);
3926 std::swap(Mask0Imm, Mask1Imm);
3927 }
3928
3929 SDValue Src = And1->getOperand(0);
3930 SDValue Dst = And0->getOperand(0);
3931 unsigned LSB = llvm::countr_zero(Mask1Imm);
3932 int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3933
3934 // The BFXIL inserts the low-order bits from a source register, so right
3935 // shift the needed bits into place.
3936 SDLoc DL(N);
3937 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3938 uint64_t LsrImm = LSB;
3939 if (Src->hasOneUse() &&
3940 isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3941 (LsrImm + LSB) < BitWidth) {
3942 Src = Src->getOperand(0);
3943 LsrImm += LSB;
3944 }
3945
3946 SDNode *LSR = CurDAG->getMachineNode(
3947 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3948 CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3949
3950 // BFXIL is an alias of BFM, so translate to BFM operands.
3951 unsigned ImmR = (BitWidth - LSB) % BitWidth;
3952 unsigned ImmS = Width - 1;
3953
3954 // Create the BFXIL instruction.
3955 SDValue Ops[] = {Dst, SDValue(LSR, 0),
3956 CurDAG->getTargetConstant(ImmR, DL, VT),
3957 CurDAG->getTargetConstant(ImmS, DL, VT)};
3958 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3959 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3960 return true;
3961 }
3962
3963 return false;
3964}
3965
3966bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3967 if (N->getOpcode() != ISD::OR)
3968 return false;
3969
3970 APInt NUsefulBits;
3971 getUsefulBits(SDValue(N, 0), NUsefulBits);
3972
3973 // If all bits are not useful, just return UNDEF.
3974 if (!NUsefulBits) {
3975 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3976 return true;
3977 }
3978
3979 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3980 return true;
3981
3982 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3983}
3984
3985/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3986/// equivalent of a left shift by a constant amount followed by an and masking
3987/// out a contiguous set of bits.
3988bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3989 if (N->getOpcode() != ISD::AND)
3990 return false;
3991
3992 EVT VT = N->getValueType(0);
3993 if (VT != MVT::i32 && VT != MVT::i64)
3994 return false;
3995
3996 SDValue Op0;
3997 int DstLSB, Width;
3998 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3999 Op0, DstLSB, Width))
4000 return false;
4001
4002 // ImmR is the rotate right amount.
4003 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
4004 // ImmS is the most significant bit of the source to be moved.
4005 unsigned ImmS = Width - 1;
4006
4007 SDLoc DL(N);
4008 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
4009 CurDAG->getTargetConstant(ImmS, DL, VT)};
4010 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
4011 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4012 return true;
4013}
4014
4015/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
4016/// variable shift/rotate instructions.
4017bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
4018 EVT VT = N->getValueType(0);
4019
4020 unsigned Opc;
4021 switch (N->getOpcode()) {
4022 case ISD::ROTR:
4023 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
4024 break;
4025 case ISD::SHL:
4026 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
4027 break;
4028 case ISD::SRL:
4029 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
4030 break;
4031 case ISD::SRA:
4032 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
4033 break;
4034 default:
4035 return false;
4036 }
4037
4038 uint64_t Size;
4039 uint64_t Bits;
4040 if (VT == MVT::i32) {
4041 Bits = 5;
4042 Size = 32;
4043 } else if (VT == MVT::i64) {
4044 Bits = 6;
4045 Size = 64;
4046 } else
4047 return false;
4048
4049 SDValue ShiftAmt = N->getOperand(1);
4050 SDLoc DL(N);
4051 SDValue NewShiftAmt;
4052
4053 // Skip over an extend of the shift amount.
4054 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
4055 ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
4056 ShiftAmt = ShiftAmt->getOperand(0);
4057
4058 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
4059 SDValue Add0 = ShiftAmt->getOperand(0);
4060 SDValue Add1 = ShiftAmt->getOperand(1);
4061 uint64_t Add0Imm;
4062 uint64_t Add1Imm;
4063 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
4064 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
4065 // to avoid the ADD/SUB.
4066 NewShiftAmt = Add0;
4067 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4068 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
4069 (Add0Imm % Size == 0)) {
4070 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
4071 // to generate a NEG instead of a SUB from a constant.
4072 unsigned NegOpc;
4073 unsigned ZeroReg;
4074 EVT SubVT = ShiftAmt->getValueType(0);
4075 if (SubVT == MVT::i32) {
4076 NegOpc = AArch64::SUBWrr;
4077 ZeroReg = AArch64::WZR;
4078 } else {
4079 assert(SubVT == MVT::i64);
4080 NegOpc = AArch64::SUBXrr;
4081 ZeroReg = AArch64::XZR;
4082 }
4083 SDValue Zero =
4084 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4085 MachineSDNode *Neg =
4086 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
4087 NewShiftAmt = SDValue(Neg, 0);
4088 } else if (ShiftAmt->getOpcode() == ISD::SUB &&
4089 isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
4090 // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
4091 // to generate a NOT instead of a SUB from a constant.
4092 unsigned NotOpc;
4093 unsigned ZeroReg;
4094 EVT SubVT = ShiftAmt->getValueType(0);
4095 if (SubVT == MVT::i32) {
4096 NotOpc = AArch64::ORNWrr;
4097 ZeroReg = AArch64::WZR;
4098 } else {
4099 assert(SubVT == MVT::i64);
4100 NotOpc = AArch64::ORNXrr;
4101 ZeroReg = AArch64::XZR;
4102 }
4103 SDValue Zero =
4104 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
4105 MachineSDNode *Not =
4106 CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
4107 NewShiftAmt = SDValue(Not, 0);
4108 } else
4109 return false;
4110 } else {
4111 // If the shift amount is masked with an AND, check that the mask covers the
4112 // bits that are implicitly ANDed off by the above opcodes and if so, skip
4113 // the AND.
4114 uint64_t MaskImm;
4115 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
4116 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
4117 return false;
4118
4119 if ((unsigned)llvm::countr_one(MaskImm) < Bits)
4120 return false;
4121
4122 NewShiftAmt = ShiftAmt->getOperand(0);
4123 }
4124
4125 // Narrow/widen the shift amount to match the size of the shift operation.
4126 if (VT == MVT::i32)
4127 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
4128 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
4129 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
4130 MachineSDNode *Ext = CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT,
4131 NewShiftAmt, SubReg);
4132 NewShiftAmt = SDValue(Ext, 0);
4133 }
4134
4135 SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
4136 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4137 return true;
4138}
4139
4141 SDValue &FixedPos,
4142 unsigned RegWidth,
4143 bool isReciprocal) {
4144 APFloat FVal(0.0);
4146 FVal = CN->getValueAPF();
4147 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
4148 // Some otherwise illegal constants are allowed in this case.
4149 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
4150 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
4151 return false;
4152
4153 ConstantPoolSDNode *CN =
4154 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
4155 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
4156 } else
4157 return false;
4158
4159 if (unsigned FBits =
4160 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4161 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4162 return true;
4163 }
4164
4165 return false;
4166}
4167
4169 SDValue N,
4170 SDValue &FixedPos,
4171 unsigned RegWidth,
4172 bool isReciprocal) {
4173 if ((N.getOpcode() == AArch64ISD::NVCAST || N.getOpcode() == ISD::BITCAST) &&
4174 N.getValueType().getScalarSizeInBits() ==
4175 N.getOperand(0).getValueType().getScalarSizeInBits())
4176 N = N.getOperand(0);
4177
4178 auto ImmToFloat = [RegWidth](APInt Imm) {
4179 switch (RegWidth) {
4180 case 16:
4181 return APFloat(APFloat::IEEEhalf(), Imm);
4182 case 32:
4183 return APFloat(APFloat::IEEEsingle(), Imm);
4184 case 64:
4185 return APFloat(APFloat::IEEEdouble(), Imm);
4186 default:
4187 llvm_unreachable("Unexpected RegWidth!");
4188 };
4189 };
4190
4191 APFloat FVal(0.0);
4192 switch (N->getOpcode()) {
4193 case AArch64ISD::MOVIshift:
4194 FVal = ImmToFloat(APInt(RegWidth, N.getConstantOperandVal(0)
4195 << N.getConstantOperandVal(1)));
4196 break;
4197 case AArch64ISD::FMOV:
4198 FVal = ImmToFloat(DecodeFMOVImm(N.getConstantOperandVal(0), RegWidth));
4199 break;
4200 case AArch64ISD::DUP:
4201 if (isa<ConstantSDNode>(N.getOperand(0)))
4202 FVal = ImmToFloat(N.getConstantOperandAPInt(0).trunc(RegWidth));
4203 else
4204 return false;
4205 break;
4206 default:
4207 return false;
4208 }
4209
4210 if (unsigned FBits =
4211 CheckFixedPointOperandConstant(FVal, RegWidth, isReciprocal)) {
4212 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
4213 return true;
4214 }
4215
4216 return false;
4217}
4218
4219bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
4220 unsigned RegWidth) {
4221 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4222 /*isReciprocal*/ false);
4223}
4224
4225bool AArch64DAGToDAGISel::SelectCVTFixedPointVec(SDValue N, SDValue &FixedPos,
4226 unsigned RegWidth) {
4228 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ false);
4229}
4230
4231bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperandVec(SDValue N,
4232 SDValue &FixedPos,
4233 unsigned RegWidth) {
4235 CurDAG, N, FixedPos, RegWidth, /*isReciprocal*/ true);
4236}
4237
4238bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
4239 SDValue &FixedPos,
4240 unsigned RegWidth) {
4241 return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
4242 /*isReciprocal*/ true);
4243}
4244
4245// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
4246// of the string and obtains the integer values from them and combines these
4247// into a single value to be used in the MRS/MSR instruction.
4250 RegString.split(Fields, ':');
4251
4252 if (Fields.size() == 1)
4253 return -1;
4254
4255 assert(Fields.size() == 5
4256 && "Invalid number of fields in read register string");
4257
4259 bool AllIntFields = true;
4260
4261 for (StringRef Field : Fields) {
4262 unsigned IntField;
4263 AllIntFields &= !Field.getAsInteger(10, IntField);
4264 Ops.push_back(IntField);
4265 }
4266
4267 assert(AllIntFields &&
4268 "Unexpected non-integer value in special register string.");
4269 (void)AllIntFields;
4270
4271 // Need to combine the integer fields of the string into a single value
4272 // based on the bit encoding of MRS/MSR instruction.
4273 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | (Ops[3] << 3) |
4274 (Ops[4]);
4275}
4276
4277// Lower the read_register intrinsic to an MRS instruction node if the special
4278// register string argument is either of the form detailed in the ALCE (the
4279// form described in getIntOperandsFromRegisterString) or is a named register
4280// known by the MRS SysReg mapper.
4281bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
4282 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4283 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4284 SDLoc DL(N);
4285
4286 bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
4287
4288 unsigned Opcode64Bit = AArch64::MRS;
4289 int Imm = getIntOperandFromRegisterString(RegString->getString());
4290 if (Imm == -1) {
4291 // No match, Use the sysreg mapper to map the remaining possible strings to
4292 // the value for the register to be used for the instruction operand.
4293 const auto *TheReg =
4294 AArch64SysReg::lookupSysRegByName(RegString->getString());
4295 if (TheReg && TheReg->Readable &&
4296 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4297 Imm = TheReg->Encoding;
4298 else
4299 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4300
4301 if (Imm == -1) {
4302 // Still no match, see if this is "pc" or give up.
4303 if (!ReadIs128Bit && RegString->getString() == "pc") {
4304 Opcode64Bit = AArch64::ADR;
4305 Imm = 0;
4306 } else {
4307 // Not a system register. It may name an allocatable 64-bit GPR/FPR read
4308 // by the MSVC __getReg/__getRegFp intrinsics. Emit a pseudo that
4309 // carries the source register as an immediate so the read does not
4310 // reference an undefined physical register (which the machine verifier
4311 // rejects); the AsmPrinter materializes the real mov/fmov.
4312 Register PReg = Subtarget->getTargetLowering()->matchRegisterName(
4313 RegString->getString());
4314 unsigned PseudoOp = 0;
4315 if (AArch64::GPR64RegClass.contains(PReg))
4316 PseudoOp = AArch64::READ_REGISTER_GPR64;
4317 else if (AArch64::FPR64RegClass.contains(PReg))
4318 PseudoOp = AArch64::READ_REGISTER_FPR64;
4319 if (!ReadIs128Bit && PseudoOp && N->getValueType(0) == MVT::i64) {
4320 CurDAG->SelectNodeTo(N, PseudoOp, MVT::i64, MVT::Other,
4321 {CurDAG->getTargetConstant(PReg, DL, MVT::i32),
4322 N->getOperand(0)});
4323 return true;
4324 }
4325 return false;
4326 }
4327 }
4328 }
4329
4330 SDValue InChain = N->getOperand(0);
4331 SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
4332 if (!ReadIs128Bit) {
4333 CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
4334 {SysRegImm, InChain});
4335 } else {
4336 SDNode *MRRS = CurDAG->getMachineNode(
4337 AArch64::MRRS, DL,
4338 {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
4339 {SysRegImm, InChain});
4340
4341 // Sysregs are not endian. The even register always contains the low half
4342 // of the register.
4343 SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
4344 SDValue(MRRS, 0));
4345 SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
4346 SDValue(MRRS, 0));
4347 SDValue OutChain = SDValue(MRRS, 1);
4348
4349 ReplaceUses(SDValue(N, 0), Lo);
4350 ReplaceUses(SDValue(N, 1), Hi);
4351 ReplaceUses(SDValue(N, 2), OutChain);
4352 };
4353 return true;
4354}
4355
4356// Lower the write_register intrinsic to an MSR instruction node if the special
4357// register string argument is either of the form detailed in the ALCE (the
4358// form described in getIntOperandsFromRegisterString) or is a named register
4359// known by the MSR SysReg mapper.
4360bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4361 const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4362 const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4363 SDLoc DL(N);
4364
4365 bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4366
4367 if (!WriteIs128Bit) {
4368 // Check if the register was one of those allowed as the pstatefield value
4369 // in the MSR (immediate) instruction. To accept the values allowed in the
4370 // pstatefield for the MSR (immediate) instruction, we also require that an
4371 // immediate value has been provided as an argument, we know that this is
4372 // the case as it has been ensured by semantic checking.
4373 auto trySelectPState = [&](auto PMapper, unsigned State) {
4374 if (PMapper) {
4375 assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4376 "Expected a constant integer expression.");
4377 unsigned Reg = PMapper->Encoding;
4378 uint64_t Immed = N->getConstantOperandVal(2);
4379 CurDAG->SelectNodeTo(
4380 N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4381 CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4382 return true;
4383 }
4384 return false;
4385 };
4386
4387 if (trySelectPState(
4388 AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4389 AArch64::MSRpstateImm4))
4390 return true;
4391 if (trySelectPState(
4392 AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4393 AArch64::MSRpstateImm1))
4394 return true;
4395 }
4396
4397 int Imm = getIntOperandFromRegisterString(RegString->getString());
4398 if (Imm == -1) {
4399 // Use the sysreg mapper to attempt to map the remaining possible strings
4400 // to the value for the register to be used for the MSR (register)
4401 // instruction operand.
4402 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4403 if (TheReg && TheReg->Writeable &&
4404 TheReg->haveFeatures(Subtarget->getFeatureBits()))
4405 Imm = TheReg->Encoding;
4406 else
4407 Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4408
4409 if (Imm == -1) {
4410 // Used by the MSVC __setReg/__setRegFp intrinsics. Copy the value into
4411 // the physical register and keep it live with a FAKE_USE so the write is
4412 // not dead-eliminated. (getRegisterByName rejects allocatable registers,
4413 // so the generic write path cannot handle these.)
4414 Register PReg = Subtarget->getTargetLowering()->matchRegisterName(
4415 RegString->getString());
4416 bool IsGPR = AArch64::GPR64RegClass.contains(PReg);
4417 bool IsFPR = AArch64::FPR64RegClass.contains(PReg);
4418 if (!WriteIs128Bit && (IsGPR || IsFPR) &&
4419 N->getOperand(2).getValueType() == MVT::i64) {
4420 SDValue Copy =
4421 CurDAG->getCopyToReg(N->getOperand(0), DL, PReg, N->getOperand(2));
4422 SDValue RegOp = CurDAG->getRegister(PReg, MVT::i64);
4423 SDNode *FakeUse = CurDAG->getMachineNode(TargetOpcode::FAKE_USE, DL,
4424 MVT::Other, {RegOp, Copy});
4425 ReplaceUses(SDValue(N, 0), SDValue(FakeUse, 0));
4426 CurDAG->RemoveDeadNode(N);
4427 return true;
4428 }
4429 return false;
4430 }
4431 }
4432
4433 SDValue InChain = N->getOperand(0);
4434 if (!WriteIs128Bit) {
4435 CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4436 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4437 N->getOperand(2), InChain);
4438 } else {
4439 // No endian swap. The lower half always goes into the even subreg, and the
4440 // higher half always into the odd supreg.
4441 SDNode *Pair = CurDAG->getMachineNode(
4442 TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4443 {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4444 MVT::i32),
4445 N->getOperand(2),
4446 CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4447 N->getOperand(3),
4448 CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4449
4450 CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4451 CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4452 SDValue(Pair, 0), InChain);
4453 }
4454
4455 return true;
4456}
4457
4458/// We've got special pseudo-instructions for these
4459bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4460 unsigned Opcode;
4461 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4462
4463 // Leave IR for LSE if subtarget supports it.
4464 if (Subtarget->hasLSE()) return false;
4465
4466 if (MemTy == MVT::i8)
4467 Opcode = AArch64::CMP_SWAP_8;
4468 else if (MemTy == MVT::i16)
4469 Opcode = AArch64::CMP_SWAP_16;
4470 else if (MemTy == MVT::i32)
4471 Opcode = AArch64::CMP_SWAP_32;
4472 else if (MemTy == MVT::i64)
4473 Opcode = AArch64::CMP_SWAP_64;
4474 else
4475 llvm_unreachable("Unknown AtomicCmpSwap type");
4476
4477 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4478 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4479 N->getOperand(0)};
4480 SDNode *CmpSwap = CurDAG->getMachineNode(
4481 Opcode, SDLoc(N),
4482 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4483
4484 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4485 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4486
4487 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4488 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4489 CurDAG->RemoveDeadNode(N);
4490
4491 return true;
4492}
4493
4494bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4495 SDValue &Shift, bool Negate) {
4496 if (!isa<ConstantSDNode>(N))
4497 return false;
4498
4499 APInt Val =
4500 cast<ConstantSDNode>(N)->getAPIntValue().trunc(VT.getFixedSizeInBits());
4501
4502 return SelectSVEAddSubImm(SDLoc(N), Val, VT, Imm, Shift, Negate);
4503}
4504
4505bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDLoc DL, APInt Val, MVT VT,
4506 SDValue &Imm, SDValue &Shift,
4507 bool Negate) {
4508 if (Negate)
4509 Val = -Val;
4510
4511 switch (VT.SimpleTy) {
4512 case MVT::i8:
4513 // All immediates are supported.
4514 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4515 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4516 return true;
4517 case MVT::i16:
4518 case MVT::i32:
4519 case MVT::i64:
4520 // Support 8bit unsigned immediates.
4521 if ((Val & ~0xff) == 0) {
4522 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4523 Imm = CurDAG->getTargetConstant(Val.getZExtValue(), DL, MVT::i32);
4524 return true;
4525 }
4526 // Support 16bit unsigned immediates that are a multiple of 256.
4527 if ((Val & ~0xff00) == 0) {
4528 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4529 Imm = CurDAG->getTargetConstant(Val.lshr(8).getZExtValue(), DL, MVT::i32);
4530 return true;
4531 }
4532 break;
4533 default:
4534 break;
4535 }
4536
4537 return false;
4538}
4539
4540bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4541 SDValue &Imm, SDValue &Shift,
4542 bool Negate) {
4543 if (!isa<ConstantSDNode>(N))
4544 return false;
4545
4546 SDLoc DL(N);
4547 int64_t Val = cast<ConstantSDNode>(N)
4548 ->getAPIntValue()
4550 .getSExtValue();
4551
4552 if (Negate)
4553 Val = -Val;
4554
4555 // Signed saturating instructions treat their immediate operand as unsigned,
4556 // whereas the related intrinsics define their operands to be signed. This
4557 // means we can only use the immediate form when the operand is non-negative.
4558 if (Val < 0)
4559 return false;
4560
4561 switch (VT.SimpleTy) {
4562 case MVT::i8:
4563 // All positive immediates are supported.
4564 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4565 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4566 return true;
4567 case MVT::i16:
4568 case MVT::i32:
4569 case MVT::i64:
4570 // Support 8bit positive immediates.
4571 if (Val <= 255) {
4572 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4573 Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4574 return true;
4575 }
4576 // Support 16bit positive immediates that are a multiple of 256.
4577 if (Val <= 65280 && Val % 256 == 0) {
4578 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4579 Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4580 return true;
4581 }
4582 break;
4583 default:
4584 break;
4585 }
4586
4587 return false;
4588}
4589
4590bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4591 SDValue &Shift) {
4592 if (!isa<ConstantSDNode>(N))
4593 return false;
4594
4595 SDLoc DL(N);
4596 int64_t Val = cast<ConstantSDNode>(N)
4597 ->getAPIntValue()
4598 .trunc(VT.getFixedSizeInBits())
4599 .getSExtValue();
4600 int32_t ImmVal, ShiftVal;
4601 if (!AArch64_AM::isSVECpyDupImm(VT.getScalarSizeInBits(), Val, ImmVal,
4602 ShiftVal))
4603 return false;
4604
4605 Shift = CurDAG->getTargetConstant(ShiftVal, DL, MVT::i32);
4606 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4607 return true;
4608}
4609
4610bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4611 if (auto CNode = dyn_cast<ConstantSDNode>(N))
4612 return SelectSVESignedArithImm(SDLoc(N), CNode->getAPIntValue(), Imm);
4613 return false;
4614}
4615
4616bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDLoc DL, APInt Val,
4617 SDValue &Imm) {
4618 int64_t ImmVal = Val.getSExtValue();
4619 if (ImmVal >= -128 && ImmVal < 128) {
4620 Imm = CurDAG->getSignedTargetConstant(ImmVal, DL, MVT::i32);
4621 return true;
4622 }
4623 return false;
4624}
4625
4626bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4627 if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4628 uint64_t ImmVal = CNode->getZExtValue();
4629
4630 switch (VT.SimpleTy) {
4631 case MVT::i8:
4632 ImmVal &= 0xFF;
4633 break;
4634 case MVT::i16:
4635 ImmVal &= 0xFFFF;
4636 break;
4637 case MVT::i32:
4638 ImmVal &= 0xFFFFFFFF;
4639 break;
4640 case MVT::i64:
4641 break;
4642 default:
4643 llvm_unreachable("Unexpected type");
4644 }
4645
4646 if (ImmVal < 256) {
4647 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4648 return true;
4649 }
4650 }
4651 return false;
4652}
4653
4654bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4655 bool Invert) {
4656 uint64_t ImmVal;
4657 if (auto CI = dyn_cast<ConstantSDNode>(N))
4658 ImmVal = CI->getZExtValue();
4659 else if (auto CFP = dyn_cast<ConstantFPSDNode>(N))
4660 ImmVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
4661 else
4662 return false;
4663
4664 if (Invert)
4665 ImmVal = ~ImmVal;
4666
4667 uint64_t encoding;
4668 if (!AArch64_AM::isSVELogicalImm(VT.getScalarSizeInBits(), ImmVal, encoding))
4669 return false;
4670
4671 Imm = CurDAG->getTargetConstant(encoding, SDLoc(N), MVT::i64);
4672 return true;
4673}
4674
4675// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4676// Rather than attempt to normalise everything we can sometimes saturate the
4677// shift amount during selection. This function also allows for consistent
4678// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4679// required by the instructions.
4680bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4681 uint64_t High, bool AllowSaturation,
4682 SDValue &Imm) {
4683 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4684 uint64_t ImmVal = CN->getZExtValue();
4685
4686 // Reject shift amounts that are too small.
4687 if (ImmVal < Low)
4688 return false;
4689
4690 // Reject or saturate shift amounts that are too big.
4691 if (ImmVal > High) {
4692 if (!AllowSaturation)
4693 return false;
4694 ImmVal = High;
4695 }
4696
4697 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4698 return true;
4699 }
4700
4701 return false;
4702}
4703
4704bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4705 // tagp(FrameIndex, IRGstack, tag_offset):
4706 // since the offset between FrameIndex and IRGstack is a compile-time
4707 // constant, this can be lowered to a single ADDG instruction.
4708 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4709 return false;
4710 }
4711
4712 SDValue IRG_SP = N->getOperand(2);
4713 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4714 IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4715 return false;
4716 }
4717
4718 const TargetLowering *TLI = getTargetLowering();
4719 SDLoc DL(N);
4720 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4721 SDValue FiOp = CurDAG->getTargetFrameIndex(
4722 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4723 int TagOffset = N->getConstantOperandVal(3);
4724
4725 SDNode *Out = CurDAG->getMachineNode(
4726 AArch64::TAGPstack, DL, MVT::i64,
4727 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4728 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4729 ReplaceNode(N, Out);
4730 return true;
4731}
4732
4733void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4734 assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4735 "llvm.aarch64.tagp third argument must be an immediate");
4736 if (trySelectStackSlotTagP(N))
4737 return;
4738 // FIXME: above applies in any case when offset between Op1 and Op2 is a
4739 // compile-time constant, not just for stack allocations.
4740
4741 // General case for unrelated pointers in Op1 and Op2.
4742 SDLoc DL(N);
4743 int TagOffset = N->getConstantOperandVal(3);
4744 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4745 {N->getOperand(1), N->getOperand(2)});
4746 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4747 {SDValue(N1, 0), N->getOperand(2)});
4748 SDNode *N3 = CurDAG->getMachineNode(
4749 AArch64::ADDG, DL, MVT::i64,
4750 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4751 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4752 ReplaceNode(N, N3);
4753}
4754
4755bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4756 assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4757
4758 // Bail when not a "cast" like insert_subvector.
4759 if (N->getConstantOperandVal(2) != 0)
4760 return false;
4761 if (!N->getOperand(0).isUndef())
4762 return false;
4763
4764 // Bail when normal isel should do the job.
4765 EVT VT = N->getValueType(0);
4766 EVT InVT = N->getOperand(1).getValueType();
4767 if (VT.isFixedLengthVector() || InVT.isScalableVector())
4768 return false;
4769 if (InVT.getSizeInBits() <= 128)
4770 return false;
4771
4772 // NOTE: We can only get here when doing fixed length SVE code generation.
4773 // We do manual selection because the types involved are not linked to real
4774 // registers (despite being legal) and must be coerced into SVE registers.
4775
4777 "Expected to insert into a packed scalable vector!");
4778
4779 SDLoc DL(N);
4780 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4781 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4782 N->getOperand(1), RC));
4783 return true;
4784}
4785
4786bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4787 assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4788
4789 // Bail when not a "cast" like extract_subvector.
4790 if (N->getConstantOperandVal(1) != 0)
4791 return false;
4792
4793 // Bail when normal isel can do the job.
4794 EVT VT = N->getValueType(0);
4795 EVT InVT = N->getOperand(0).getValueType();
4796 if (VT.isScalableVector() || InVT.isFixedLengthVector())
4797 return false;
4798 if (VT.getSizeInBits() <= 128)
4799 return false;
4800
4801 // NOTE: We can only get here when doing fixed length SVE code generation.
4802 // We do manual selection because the types involved are not linked to real
4803 // registers (despite being legal) and must be coerced into SVE registers.
4804
4806 "Expected to extract from a packed scalable vector!");
4807
4808 SDLoc DL(N);
4809 auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4810 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4811 N->getOperand(0), RC));
4812 return true;
4813}
4814
4815bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4816 assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4817
4818 SDValue N0 = N->getOperand(0);
4819 SDValue N1 = N->getOperand(1);
4820
4821 EVT VT = N->getValueType(0);
4822 SDLoc DL(N);
4823
4824 // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4825 // Rotate by a constant is a funnel shift in IR which is expanded to
4826 // an OR with shifted operands.
4827 // We do the following transform:
4828 // OR N0, N1 -> xar (x, y, imm)
4829 // Where:
4830 // N1 = SRL_PRED true, V, splat(imm) --> rotr amount
4831 // N0 = SHL_PRED true, V, splat(bits-imm)
4832 // V = (xor x, y)
4833 if (VT.isScalableVector() &&
4834 (Subtarget->hasSVE2() ||
4835 (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4836 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4837 N1.getOpcode() != AArch64ISD::SRL_PRED)
4838 std::swap(N0, N1);
4839 if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4840 N1.getOpcode() != AArch64ISD::SRL_PRED)
4841 return false;
4842
4843 auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4844 if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4845 !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4846 return false;
4847
4848 if (N0.getOperand(1) != N1.getOperand(1))
4849 return false;
4850
4851 SDValue R1, R2;
4852 bool IsXOROperand = true;
4853 if (N0.getOperand(1).getOpcode() != ISD::XOR) {
4854 IsXOROperand = false;
4855 } else {
4856 R1 = N0.getOperand(1).getOperand(0);
4857 R2 = N1.getOperand(1).getOperand(1);
4858 }
4859
4860 APInt ShlAmt, ShrAmt;
4861 if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4863 return false;
4864
4865 if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4866 return false;
4867
4868 if (!IsXOROperand) {
4869 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4870 SDNode *MOV = CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, VT, Zero);
4871 SDValue MOVIV = SDValue(MOV, 0);
4872
4873 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
4874 SDNode *SubRegToReg =
4875 CurDAG->getMachineNode(AArch64::SUBREG_TO_REG, DL, VT, MOVIV, ZSub);
4876
4877 R1 = N1->getOperand(1);
4878 R2 = SDValue(SubRegToReg, 0);
4879 }
4880
4881 SDValue Imm =
4882 CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4883
4884 SDValue Ops[] = {R1, R2, Imm};
4886 VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4887 AArch64::XAR_ZZZI_D})) {
4888 CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4889 return true;
4890 }
4891 return false;
4892 }
4893
4894 // We have Neon SHA3 XAR operation for v2i64 but for types
4895 // v4i32, v8i16, v16i8 we can use SVE operations when SVE2-SHA3
4896 // is available.
4897 EVT SVT;
4898 switch (VT.getSimpleVT().SimpleTy) {
4899 case MVT::v4i32:
4900 case MVT::v2i32:
4901 SVT = MVT::nxv4i32;
4902 break;
4903 case MVT::v8i16:
4904 case MVT::v4i16:
4905 SVT = MVT::nxv8i16;
4906 break;
4907 case MVT::v16i8:
4908 case MVT::v8i8:
4909 SVT = MVT::nxv16i8;
4910 break;
4911 case MVT::v2i64:
4912 case MVT::v1i64:
4913 SVT = Subtarget->hasSHA3() ? MVT::v2i64 : MVT::nxv2i64;
4914 break;
4915 default:
4916 return false;
4917 }
4918
4919 if ((!SVT.isScalableVector() && !Subtarget->hasSHA3()) ||
4920 (SVT.isScalableVector() && !Subtarget->hasSVE2()))
4921 return false;
4922
4923 if (N0->getOpcode() != AArch64ISD::VSHL ||
4924 N1->getOpcode() != AArch64ISD::VLSHR)
4925 return false;
4926
4927 if (N0->getOperand(0) != N1->getOperand(0))
4928 return false;
4929
4930 SDValue R1, R2;
4931 bool IsXOROperand = true;
4932 if (N1->getOperand(0)->getOpcode() != ISD::XOR) {
4933 IsXOROperand = false;
4934 } else {
4935 SDValue XOR = N0.getOperand(0);
4936 R1 = XOR.getOperand(0);
4937 R2 = XOR.getOperand(1);
4938 }
4939
4940 unsigned HsAmt = N0.getConstantOperandVal(1);
4941 unsigned ShAmt = N1.getConstantOperandVal(1);
4942
4943 SDValue Imm = CurDAG->getTargetConstant(
4944 ShAmt, DL, N0.getOperand(1).getValueType(), false);
4945
4946 unsigned VTSizeInBits = VT.getScalarSizeInBits();
4947 if (ShAmt + HsAmt != VTSizeInBits)
4948 return false;
4949
4950 if (!IsXOROperand) {
4951 SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
4952 SDNode *MOV =
4953 CurDAG->getMachineNode(AArch64::MOVIv2d_ns, DL, MVT::v2i64, Zero);
4954 SDValue MOVIV = SDValue(MOV, 0);
4955
4956 R1 = N1->getOperand(0);
4957 R2 = MOVIV;
4958 }
4959
4960 if (SVT != VT) {
4961 SDValue Undef =
4962 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, SVT), 0);
4963
4964 if (SVT.isScalableVector() && VT.is64BitVector()) {
4965 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
4966
4967 SDValue UndefQ = SDValue(
4968 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, QVT), 0);
4969 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
4970
4971 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4972 UndefQ, R1, DSub),
4973 0);
4974 if (R2.getValueType() == VT)
4975 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, QVT,
4976 UndefQ, R2, DSub),
4977 0);
4978 }
4979
4980 SDValue SubReg = CurDAG->getTargetConstant(
4981 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL, MVT::i32);
4982
4983 R1 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT, Undef,
4984 R1, SubReg),
4985 0);
4986
4987 if (SVT.isScalableVector() || R2.getValueType() != SVT)
4988 R2 = SDValue(CurDAG->getMachineNode(AArch64::INSERT_SUBREG, DL, SVT,
4989 Undef, R2, SubReg),
4990 0);
4991 }
4992
4993 SDValue Ops[] = {R1, R2, Imm};
4994 SDNode *XAR = nullptr;
4995
4996 if (SVT.isScalableVector()) {
4998 SVT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4999 AArch64::XAR_ZZZI_D}))
5000 XAR = CurDAG->getMachineNode(Opc, DL, SVT, Ops);
5001 } else {
5002 XAR = CurDAG->getMachineNode(AArch64::XAR, DL, SVT, Ops);
5003 }
5004
5005 assert(XAR && "Unexpected NULL value for XAR instruction in DAG");
5006
5007 if (SVT != VT) {
5008 if (VT.is64BitVector() && SVT.isScalableVector()) {
5009 EVT QVT = VT.getDoubleNumVectorElementsVT(*CurDAG->getContext());
5010
5011 SDValue ZSub = CurDAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
5012 SDNode *Q = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, QVT,
5013 SDValue(XAR, 0), ZSub);
5014
5015 SDValue DSub = CurDAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
5016 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
5017 SDValue(Q, 0), DSub);
5018 } else {
5019 SDValue SubReg = CurDAG->getTargetConstant(
5020 (SVT.isScalableVector() ? AArch64::zsub : AArch64::dsub), DL,
5021 MVT::i32);
5022 XAR = CurDAG->getMachineNode(AArch64::EXTRACT_SUBREG, DL, VT,
5023 SDValue(XAR, 0), SubReg);
5024 }
5025 }
5026 ReplaceNode(N, XAR);
5027 return true;
5028}
5029
5030void AArch64DAGToDAGISel::Select(SDNode *Node) {
5031 // If we have a custom node, we already have selected!
5032 if (Node->isMachineOpcode()) {
5033 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
5034 Node->setNodeId(-1);
5035 return;
5036 }
5037
5038 // Few custom selection stuff.
5039 EVT VT = Node->getValueType(0);
5040
5041 switch (Node->getOpcode()) {
5042 default:
5043 break;
5044
5046 if (SelectCMP_SWAP(Node))
5047 return;
5048 break;
5049
5050 case ISD::READ_REGISTER:
5051 case AArch64ISD::MRRS:
5052 if (tryReadRegister(Node))
5053 return;
5054 break;
5055
5057 case AArch64ISD::MSRR:
5058 if (tryWriteRegister(Node))
5059 return;
5060 break;
5061
5062 case ISD::LOAD: {
5063 // Try to select as an indexed load. Fall through to normal processing
5064 // if we can't.
5065 if (tryIndexedLoad(Node))
5066 return;
5067 break;
5068 }
5069
5070 case ISD::SRL:
5071 case ISD::AND:
5072 case ISD::SRA:
5074 if (tryBitfieldExtractOp(Node))
5075 return;
5076 if (tryBitfieldInsertInZeroOp(Node))
5077 return;
5078 [[fallthrough]];
5079 case ISD::ROTR:
5080 case ISD::SHL:
5081 if (tryShiftAmountMod(Node))
5082 return;
5083 break;
5084
5085 case ISD::SIGN_EXTEND:
5086 if (tryBitfieldExtractOpFromSExt(Node))
5087 return;
5088 break;
5089
5090 case ISD::OR:
5091 if (tryBitfieldInsertOp(Node))
5092 return;
5093 if (trySelectXAR(Node))
5094 return;
5095 break;
5096
5098 if (trySelectCastScalableToFixedLengthVector(Node))
5099 return;
5100 break;
5101 }
5102
5103 case ISD::INSERT_SUBVECTOR: {
5104 if (trySelectCastFixedLengthToScalableVector(Node))
5105 return;
5106 break;
5107 }
5108
5109 case ISD::Constant: {
5110 // Materialize zero constants as copies from WZR/XZR. This allows
5111 // the coalescer to propagate these into other instructions.
5112 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
5113 if (ConstNode->isZero()) {
5114 if (VT == MVT::i32) {
5115 SDValue New = CurDAG->getCopyFromReg(
5116 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
5117 ReplaceNode(Node, New.getNode());
5118 return;
5119 } else if (VT == MVT::i64) {
5120 SDValue New = CurDAG->getCopyFromReg(
5121 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
5122 ReplaceNode(Node, New.getNode());
5123 return;
5124 }
5125 }
5126 break;
5127 }
5128
5129 case ISD::FrameIndex: {
5130 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
5131 int FI = cast<FrameIndexSDNode>(Node)->getIndex();
5132 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
5133 const TargetLowering *TLI = getTargetLowering();
5134 SDValue TFI = CurDAG->getTargetFrameIndex(
5135 FI, TLI->getPointerTy(CurDAG->getDataLayout()));
5136 SDLoc DL(Node);
5137 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
5138 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
5139 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
5140 return;
5141 }
5143 unsigned IntNo = Node->getConstantOperandVal(1);
5144 switch (IntNo) {
5145 default:
5146 break;
5147 case Intrinsic::aarch64_gcsss: {
5148 SDLoc DL(Node);
5149 SDValue Chain = Node->getOperand(0);
5150 SDValue Val = Node->getOperand(2);
5151 SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
5152 SDNode *SS1 =
5153 CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
5154 SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
5155 MVT::Other, Zero, SDValue(SS1, 0));
5156 ReplaceNode(Node, SS2);
5157 return;
5158 }
5159 case Intrinsic::aarch64_ldaxp:
5160 case Intrinsic::aarch64_ldxp: {
5161 unsigned Op =
5162 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
5163 SDValue MemAddr = Node->getOperand(2);
5164 SDLoc DL(Node);
5165 SDValue Chain = Node->getOperand(0);
5166
5167 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
5168 MVT::Other, MemAddr, Chain);
5169
5170 // Transfer memoperands.
5171 MachineMemOperand *MemOp =
5172 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5173 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
5174 ReplaceNode(Node, Ld);
5175 return;
5176 }
5177 case Intrinsic::aarch64_stlxp:
5178 case Intrinsic::aarch64_stxp: {
5179 unsigned Op =
5180 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
5181 SDLoc DL(Node);
5182 SDValue Chain = Node->getOperand(0);
5183 SDValue ValLo = Node->getOperand(2);
5184 SDValue ValHi = Node->getOperand(3);
5185 SDValue MemAddr = Node->getOperand(4);
5186
5187 // Place arguments in the right order.
5188 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
5189
5190 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
5191 // Transfer memoperands.
5192 MachineMemOperand *MemOp =
5193 cast<MemIntrinsicSDNode>(Node)->getMemOperand();
5194 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
5195
5196 ReplaceNode(Node, St);
5197 return;
5198 }
5199 case Intrinsic::aarch64_neon_ld1x2:
5200 if (VT == MVT::v8i8) {
5201 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
5202 return;
5203 } else if (VT == MVT::v16i8) {
5204 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
5205 return;
5206 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5207 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
5208 return;
5209 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5210 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
5211 return;
5212 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5213 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
5214 return;
5215 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5216 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
5217 return;
5218 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5219 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5220 return;
5221 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5222 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
5223 return;
5224 }
5225 break;
5226 case Intrinsic::aarch64_neon_ld1x3:
5227 if (VT == MVT::v8i8) {
5228 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
5229 return;
5230 } else if (VT == MVT::v16i8) {
5231 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
5232 return;
5233 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5234 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
5235 return;
5236 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5237 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
5238 return;
5239 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5240 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
5241 return;
5242 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5243 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
5244 return;
5245 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5246 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5247 return;
5248 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5249 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
5250 return;
5251 }
5252 break;
5253 case Intrinsic::aarch64_neon_ld1x4:
5254 if (VT == MVT::v8i8) {
5255 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
5256 return;
5257 } else if (VT == MVT::v16i8) {
5258 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
5259 return;
5260 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5261 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
5262 return;
5263 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5264 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
5265 return;
5266 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5267 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
5268 return;
5269 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5270 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
5271 return;
5272 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5273 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5274 return;
5275 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5276 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
5277 return;
5278 }
5279 break;
5280 case Intrinsic::aarch64_neon_ld2:
5281 if (VT == MVT::v8i8) {
5282 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
5283 return;
5284 } else if (VT == MVT::v16i8) {
5285 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
5286 return;
5287 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5288 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
5289 return;
5290 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5291 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
5292 return;
5293 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5294 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
5295 return;
5296 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5297 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
5298 return;
5299 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5300 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
5301 return;
5302 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5303 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
5304 return;
5305 }
5306 break;
5307 case Intrinsic::aarch64_neon_ld3:
5308 if (VT == MVT::v8i8) {
5309 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
5310 return;
5311 } else if (VT == MVT::v16i8) {
5312 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
5313 return;
5314 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5315 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
5316 return;
5317 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5318 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
5319 return;
5320 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5321 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
5322 return;
5323 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5324 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
5325 return;
5326 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5327 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
5328 return;
5329 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5330 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
5331 return;
5332 }
5333 break;
5334 case Intrinsic::aarch64_neon_ld4:
5335 if (VT == MVT::v8i8) {
5336 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
5337 return;
5338 } else if (VT == MVT::v16i8) {
5339 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
5340 return;
5341 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5342 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
5343 return;
5344 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5345 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
5346 return;
5347 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5348 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
5349 return;
5350 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5351 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
5352 return;
5353 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5354 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
5355 return;
5356 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5357 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
5358 return;
5359 }
5360 break;
5361 case Intrinsic::aarch64_neon_ld2r:
5362 if (VT == MVT::v8i8) {
5363 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
5364 return;
5365 } else if (VT == MVT::v16i8) {
5366 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
5367 return;
5368 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5369 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
5370 return;
5371 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5372 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
5373 return;
5374 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5375 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
5376 return;
5377 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5378 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
5379 return;
5380 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5381 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
5382 return;
5383 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5384 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
5385 return;
5386 }
5387 break;
5388 case Intrinsic::aarch64_neon_ld3r:
5389 if (VT == MVT::v8i8) {
5390 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
5391 return;
5392 } else if (VT == MVT::v16i8) {
5393 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
5394 return;
5395 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5396 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
5397 return;
5398 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5399 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
5400 return;
5401 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5402 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
5403 return;
5404 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5405 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
5406 return;
5407 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5408 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
5409 return;
5410 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5411 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
5412 return;
5413 }
5414 break;
5415 case Intrinsic::aarch64_neon_ld4r:
5416 if (VT == MVT::v8i8) {
5417 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
5418 return;
5419 } else if (VT == MVT::v16i8) {
5420 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
5421 return;
5422 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
5423 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
5424 return;
5425 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
5426 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
5427 return;
5428 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
5429 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
5430 return;
5431 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
5432 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
5433 return;
5434 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
5435 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
5436 return;
5437 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
5438 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
5439 return;
5440 }
5441 break;
5442 case Intrinsic::aarch64_neon_ld2lane:
5443 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5444 SelectLoadLane(Node, 2, AArch64::LD2i8);
5445 return;
5446 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5447 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5448 SelectLoadLane(Node, 2, AArch64::LD2i16);
5449 return;
5450 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5451 VT == MVT::v2f32) {
5452 SelectLoadLane(Node, 2, AArch64::LD2i32);
5453 return;
5454 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5455 VT == MVT::v1f64) {
5456 SelectLoadLane(Node, 2, AArch64::LD2i64);
5457 return;
5458 }
5459 break;
5460 case Intrinsic::aarch64_neon_ld3lane:
5461 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5462 SelectLoadLane(Node, 3, AArch64::LD3i8);
5463 return;
5464 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5465 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5466 SelectLoadLane(Node, 3, AArch64::LD3i16);
5467 return;
5468 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5469 VT == MVT::v2f32) {
5470 SelectLoadLane(Node, 3, AArch64::LD3i32);
5471 return;
5472 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5473 VT == MVT::v1f64) {
5474 SelectLoadLane(Node, 3, AArch64::LD3i64);
5475 return;
5476 }
5477 break;
5478 case Intrinsic::aarch64_neon_ld4lane:
5479 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5480 SelectLoadLane(Node, 4, AArch64::LD4i8);
5481 return;
5482 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5483 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5484 SelectLoadLane(Node, 4, AArch64::LD4i16);
5485 return;
5486 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5487 VT == MVT::v2f32) {
5488 SelectLoadLane(Node, 4, AArch64::LD4i32);
5489 return;
5490 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5491 VT == MVT::v1f64) {
5492 SelectLoadLane(Node, 4, AArch64::LD4i64);
5493 return;
5494 }
5495 break;
5496 case Intrinsic::aarch64_ld64b:
5497 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5498 return;
5499 case Intrinsic::aarch64_sve_ld2q_sret: {
5500 SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5501 return;
5502 }
5503 case Intrinsic::aarch64_sve_ld3q_sret: {
5504 SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5505 return;
5506 }
5507 case Intrinsic::aarch64_sve_ld4q_sret: {
5508 SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5509 return;
5510 }
5511 case Intrinsic::aarch64_sve_ld2_sret: {
5512 if (VT == MVT::nxv16i8) {
5513 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5514 true);
5515 return;
5516 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5517 VT == MVT::nxv8bf16) {
5518 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5519 true);
5520 return;
5521 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5522 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5523 true);
5524 return;
5525 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5526 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5527 true);
5528 return;
5529 }
5530 break;
5531 }
5532 case Intrinsic::aarch64_sve_ld1_pn_x2: {
5533 if (VT == MVT::nxv16i8) {
5534 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5535 SelectContiguousMultiVectorLoad(
5536 Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5537 else if (Subtarget->hasSVE2p1())
5538 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5539 AArch64::LD1B_2Z);
5540 else
5541 break;
5542 return;
5543 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5544 VT == MVT::nxv8bf16) {
5545 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5546 SelectContiguousMultiVectorLoad(
5547 Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5548 else if (Subtarget->hasSVE2p1())
5549 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5550 AArch64::LD1H_2Z);
5551 else
5552 break;
5553 return;
5554 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5555 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5556 SelectContiguousMultiVectorLoad(
5557 Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5558 else if (Subtarget->hasSVE2p1())
5559 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5560 AArch64::LD1W_2Z);
5561 else
5562 break;
5563 return;
5564 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5565 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5566 SelectContiguousMultiVectorLoad(
5567 Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5568 else if (Subtarget->hasSVE2p1())
5569 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5570 AArch64::LD1D_2Z);
5571 else
5572 break;
5573 return;
5574 }
5575 break;
5576 }
5577 case Intrinsic::aarch64_sve_ld1_pn_x4: {
5578 if (VT == MVT::nxv16i8) {
5579 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5580 SelectContiguousMultiVectorLoad(
5581 Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5582 else if (Subtarget->hasSVE2p1())
5583 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5584 AArch64::LD1B_4Z);
5585 else
5586 break;
5587 return;
5588 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5589 VT == MVT::nxv8bf16) {
5590 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5591 SelectContiguousMultiVectorLoad(
5592 Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5593 else if (Subtarget->hasSVE2p1())
5594 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5595 AArch64::LD1H_4Z);
5596 else
5597 break;
5598 return;
5599 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5600 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5601 SelectContiguousMultiVectorLoad(
5602 Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5603 else if (Subtarget->hasSVE2p1())
5604 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5605 AArch64::LD1W_4Z);
5606 else
5607 break;
5608 return;
5609 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5610 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5611 SelectContiguousMultiVectorLoad(
5612 Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5613 else if (Subtarget->hasSVE2p1())
5614 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5615 AArch64::LD1D_4Z);
5616 else
5617 break;
5618 return;
5619 }
5620 break;
5621 }
5622 case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5623 if (VT == MVT::nxv16i8) {
5624 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5625 SelectContiguousMultiVectorLoad(Node, 2, 0,
5626 AArch64::LDNT1B_2Z_IMM_PSEUDO,
5627 AArch64::LDNT1B_2Z_PSEUDO);
5628 else if (Subtarget->hasSVE2p1())
5629 SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5630 AArch64::LDNT1B_2Z);
5631 else
5632 break;
5633 return;
5634 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5635 VT == MVT::nxv8bf16) {
5636 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5637 SelectContiguousMultiVectorLoad(Node, 2, 1,
5638 AArch64::LDNT1H_2Z_IMM_PSEUDO,
5639 AArch64::LDNT1H_2Z_PSEUDO);
5640 else if (Subtarget->hasSVE2p1())
5641 SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5642 AArch64::LDNT1H_2Z);
5643 else
5644 break;
5645 return;
5646 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5647 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5648 SelectContiguousMultiVectorLoad(Node, 2, 2,
5649 AArch64::LDNT1W_2Z_IMM_PSEUDO,
5650 AArch64::LDNT1W_2Z_PSEUDO);
5651 else if (Subtarget->hasSVE2p1())
5652 SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5653 AArch64::LDNT1W_2Z);
5654 else
5655 break;
5656 return;
5657 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5658 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5659 SelectContiguousMultiVectorLoad(Node, 2, 3,
5660 AArch64::LDNT1D_2Z_IMM_PSEUDO,
5661 AArch64::LDNT1D_2Z_PSEUDO);
5662 else if (Subtarget->hasSVE2p1())
5663 SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5664 AArch64::LDNT1D_2Z);
5665 else
5666 break;
5667 return;
5668 }
5669 break;
5670 }
5671 case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5672 if (VT == MVT::nxv16i8) {
5673 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5674 SelectContiguousMultiVectorLoad(Node, 4, 0,
5675 AArch64::LDNT1B_4Z_IMM_PSEUDO,
5676 AArch64::LDNT1B_4Z_PSEUDO);
5677 else if (Subtarget->hasSVE2p1())
5678 SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5679 AArch64::LDNT1B_4Z);
5680 else
5681 break;
5682 return;
5683 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5684 VT == MVT::nxv8bf16) {
5685 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5686 SelectContiguousMultiVectorLoad(Node, 4, 1,
5687 AArch64::LDNT1H_4Z_IMM_PSEUDO,
5688 AArch64::LDNT1H_4Z_PSEUDO);
5689 else if (Subtarget->hasSVE2p1())
5690 SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5691 AArch64::LDNT1H_4Z);
5692 else
5693 break;
5694 return;
5695 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5696 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5697 SelectContiguousMultiVectorLoad(Node, 4, 2,
5698 AArch64::LDNT1W_4Z_IMM_PSEUDO,
5699 AArch64::LDNT1W_4Z_PSEUDO);
5700 else if (Subtarget->hasSVE2p1())
5701 SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5702 AArch64::LDNT1W_4Z);
5703 else
5704 break;
5705 return;
5706 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5707 if (Subtarget->hasSME2() && Subtarget->isStreaming())
5708 SelectContiguousMultiVectorLoad(Node, 4, 3,
5709 AArch64::LDNT1D_4Z_IMM_PSEUDO,
5710 AArch64::LDNT1D_4Z_PSEUDO);
5711 else if (Subtarget->hasSVE2p1())
5712 SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5713 AArch64::LDNT1D_4Z);
5714 else
5715 break;
5716 return;
5717 }
5718 break;
5719 }
5720 case Intrinsic::aarch64_sve_ld3_sret: {
5721 if (VT == MVT::nxv16i8) {
5722 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5723 true);
5724 return;
5725 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5726 VT == MVT::nxv8bf16) {
5727 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5728 true);
5729 return;
5730 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5731 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5732 true);
5733 return;
5734 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5735 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5736 true);
5737 return;
5738 }
5739 break;
5740 }
5741 case Intrinsic::aarch64_sve_ld4_sret: {
5742 if (VT == MVT::nxv16i8) {
5743 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5744 true);
5745 return;
5746 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5747 VT == MVT::nxv8bf16) {
5748 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5749 true);
5750 return;
5751 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5752 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5753 true);
5754 return;
5755 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5756 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5757 true);
5758 return;
5759 }
5760 break;
5761 }
5762 case Intrinsic::aarch64_sme_read_hor_vg2: {
5763 if (VT == MVT::nxv16i8) {
5764 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5765 AArch64::MOVA_2ZMXI_H_B);
5766 return;
5767 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5768 VT == MVT::nxv8bf16) {
5769 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5770 AArch64::MOVA_2ZMXI_H_H);
5771 return;
5772 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5773 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5774 AArch64::MOVA_2ZMXI_H_S);
5775 return;
5776 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5777 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5778 AArch64::MOVA_2ZMXI_H_D);
5779 return;
5780 }
5781 break;
5782 }
5783 case Intrinsic::aarch64_sme_read_ver_vg2: {
5784 if (VT == MVT::nxv16i8) {
5785 SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5786 AArch64::MOVA_2ZMXI_V_B);
5787 return;
5788 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5789 VT == MVT::nxv8bf16) {
5790 SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5791 AArch64::MOVA_2ZMXI_V_H);
5792 return;
5793 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5794 SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5795 AArch64::MOVA_2ZMXI_V_S);
5796 return;
5797 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5798 SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5799 AArch64::MOVA_2ZMXI_V_D);
5800 return;
5801 }
5802 break;
5803 }
5804 case Intrinsic::aarch64_sme_read_hor_vg4: {
5805 if (VT == MVT::nxv16i8) {
5806 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5807 AArch64::MOVA_4ZMXI_H_B);
5808 return;
5809 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5810 VT == MVT::nxv8bf16) {
5811 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5812 AArch64::MOVA_4ZMXI_H_H);
5813 return;
5814 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5815 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5816 AArch64::MOVA_4ZMXI_H_S);
5817 return;
5818 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5819 SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5820 AArch64::MOVA_4ZMXI_H_D);
5821 return;
5822 }
5823 break;
5824 }
5825 case Intrinsic::aarch64_sme_read_ver_vg4: {
5826 if (VT == MVT::nxv16i8) {
5827 SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5828 AArch64::MOVA_4ZMXI_V_B);
5829 return;
5830 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5831 VT == MVT::nxv8bf16) {
5832 SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5833 AArch64::MOVA_4ZMXI_V_H);
5834 return;
5835 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5836 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5837 AArch64::MOVA_4ZMXI_V_S);
5838 return;
5839 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5840 SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5841 AArch64::MOVA_4ZMXI_V_D);
5842 return;
5843 }
5844 break;
5845 }
5846 case Intrinsic::aarch64_sme_read_vg1x2: {
5847 SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5848 AArch64::MOVA_VG2_2ZMXI);
5849 return;
5850 }
5851 case Intrinsic::aarch64_sme_read_vg1x4: {
5852 SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5853 AArch64::MOVA_VG4_4ZMXI);
5854 return;
5855 }
5856 case Intrinsic::aarch64_sme_readz_horiz_x2: {
5857 if (VT == MVT::nxv16i8) {
5858 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5859 return;
5860 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5861 VT == MVT::nxv8bf16) {
5862 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5863 return;
5864 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5865 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5866 return;
5867 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5868 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5869 return;
5870 }
5871 break;
5872 }
5873 case Intrinsic::aarch64_sme_readz_vert_x2: {
5874 if (VT == MVT::nxv16i8) {
5875 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5876 return;
5877 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5878 VT == MVT::nxv8bf16) {
5879 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5880 return;
5881 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5882 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5883 return;
5884 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5885 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5886 return;
5887 }
5888 break;
5889 }
5890 case Intrinsic::aarch64_sme_readz_horiz_x4: {
5891 if (VT == MVT::nxv16i8) {
5892 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5893 return;
5894 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5895 VT == MVT::nxv8bf16) {
5896 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5897 return;
5898 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5899 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5900 return;
5901 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5902 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5903 return;
5904 }
5905 break;
5906 }
5907 case Intrinsic::aarch64_sme_readz_vert_x4: {
5908 if (VT == MVT::nxv16i8) {
5909 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5910 return;
5911 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5912 VT == MVT::nxv8bf16) {
5913 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5914 return;
5915 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5916 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5917 return;
5918 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5919 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5920 return;
5921 }
5922 break;
5923 }
5924 case Intrinsic::aarch64_sme_readz_x2: {
5925 SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5926 AArch64::ZA);
5927 return;
5928 }
5929 case Intrinsic::aarch64_sme_readz_x4: {
5930 SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5931 AArch64::ZA);
5932 return;
5933 }
5934 case Intrinsic::swift_async_context_addr: {
5935 SDLoc DL(Node);
5936 SDValue Chain = Node->getOperand(0);
5937 SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5938 SDValue Res = SDValue(
5939 CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5940 CurDAG->getTargetConstant(8, DL, MVT::i32),
5941 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5942 0);
5943 ReplaceUses(SDValue(Node, 0), Res);
5944 ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5945 CurDAG->RemoveDeadNode(Node);
5946
5947 auto &MF = CurDAG->getMachineFunction();
5948 MF.getFrameInfo().setFrameAddressIsTaken(true);
5949 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5950 return;
5951 }
5952 case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5954 Node->getValueType(0),
5955 {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5956 AArch64::LUTI2_4ZTZI_S}))
5957 // Second Immediate must be <= 3:
5958 SelectMultiVectorLutiLane(Node, 4, Opc, 3);
5959 return;
5960 }
5961 case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5963 Node->getValueType(0),
5964 {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5965 // Second Immediate must be <= 1:
5966 SelectMultiVectorLutiLane(Node, 4, Opc, 1);
5967 return;
5968 }
5969 case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5971 Node->getValueType(0),
5972 {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5973 AArch64::LUTI2_2ZTZI_S}))
5974 // Second Immediate must be <= 7:
5975 SelectMultiVectorLutiLane(Node, 2, Opc, 7);
5976 return;
5977 }
5978 case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5980 Node->getValueType(0),
5981 {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5982 AArch64::LUTI4_2ZTZI_S}))
5983 // Second Immediate must be <= 3:
5984 SelectMultiVectorLutiLane(Node, 2, Opc, 3);
5985 return;
5986 }
5987 case Intrinsic::aarch64_sme_luti4_zt_x4: {
5988 SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
5989 return;
5990 }
5991 case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
5993 Node->getValueType(0),
5994 {AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
5995 SelectCVTIntrinsicFP8(Node, 2, Opc);
5996 return;
5997 case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
5999 Node->getValueType(0),
6000 {AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
6001 SelectCVTIntrinsicFP8(Node, 2, Opc);
6002 return;
6003 case Intrinsic::aarch64_sve_fp8_cvt1_x2:
6005 Node->getValueType(0),
6006 {AArch64::BF1CVT_2ZZ_BtoH, AArch64::F1CVT_2ZZ_BtoH}))
6007 SelectCVTIntrinsicFP8(Node, 2, Opc);
6008 return;
6009 case Intrinsic::aarch64_sve_fp8_cvt2_x2:
6011 Node->getValueType(0),
6012 {AArch64::BF2CVT_2ZZ_BtoH, AArch64::F2CVT_2ZZ_BtoH}))
6013 SelectCVTIntrinsicFP8(Node, 2, Opc);
6014 return;
6015 case Intrinsic::ptrauth_resign_load_relative:
6016 SelectPtrauthResign(Node);
6017 return;
6018 }
6019 } break;
6021 unsigned IntNo = Node->getConstantOperandVal(0);
6022 switch (IntNo) {
6023 default:
6024 break;
6025 case Intrinsic::aarch64_tagp:
6026 SelectTagP(Node);
6027 return;
6028
6029 case Intrinsic::ptrauth_auth:
6030 SelectPtrauthAuth(Node);
6031 return;
6032
6033 case Intrinsic::ptrauth_resign:
6034 SelectPtrauthResign(Node);
6035 return;
6036
6037 case Intrinsic::aarch64_neon_tbl2:
6038 SelectTable(Node, 2,
6039 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
6040 false);
6041 return;
6042 case Intrinsic::aarch64_neon_tbl3:
6043 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
6044 : AArch64::TBLv16i8Three,
6045 false);
6046 return;
6047 case Intrinsic::aarch64_neon_tbl4:
6048 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
6049 : AArch64::TBLv16i8Four,
6050 false);
6051 return;
6052 case Intrinsic::aarch64_neon_tbx2:
6053 SelectTable(Node, 2,
6054 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
6055 true);
6056 return;
6057 case Intrinsic::aarch64_neon_tbx3:
6058 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
6059 : AArch64::TBXv16i8Three,
6060 true);
6061 return;
6062 case Intrinsic::aarch64_neon_tbx4:
6063 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
6064 : AArch64::TBXv16i8Four,
6065 true);
6066 return;
6067 case Intrinsic::aarch64_sve_srshl_single_x2:
6069 Node->getValueType(0),
6070 {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
6071 AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
6072 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6073 return;
6074 case Intrinsic::aarch64_sve_srshl_single_x4:
6076 Node->getValueType(0),
6077 {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
6078 AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
6079 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6080 return;
6081 case Intrinsic::aarch64_sve_urshl_single_x2:
6083 Node->getValueType(0),
6084 {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
6085 AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
6086 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6087 return;
6088 case Intrinsic::aarch64_sve_urshl_single_x4:
6090 Node->getValueType(0),
6091 {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
6092 AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
6093 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6094 return;
6095 case Intrinsic::aarch64_sve_srshl_x2:
6097 Node->getValueType(0),
6098 {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
6099 AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
6100 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6101 return;
6102 case Intrinsic::aarch64_sve_srshl_x4:
6104 Node->getValueType(0),
6105 {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
6106 AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
6107 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6108 return;
6109 case Intrinsic::aarch64_sve_urshl_x2:
6111 Node->getValueType(0),
6112 {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
6113 AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
6114 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6115 return;
6116 case Intrinsic::aarch64_sve_urshl_x4:
6118 Node->getValueType(0),
6119 {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
6120 AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
6121 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6122 return;
6123 case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
6125 Node->getValueType(0),
6126 {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
6127 AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
6128 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6129 return;
6130 case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
6132 Node->getValueType(0),
6133 {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
6134 AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
6135 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6136 return;
6137 case Intrinsic::aarch64_sve_sqdmulh_vgx2:
6139 Node->getValueType(0),
6140 {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
6141 AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
6142 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6143 return;
6144 case Intrinsic::aarch64_sve_sqdmulh_vgx4:
6146 Node->getValueType(0),
6147 {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
6148 AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
6149 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6150 return;
6151 case Intrinsic::aarch64_sme_fp8_scale_single_x2:
6153 Node->getValueType(0),
6154 {0, AArch64::FSCALE_2ZZ_H, AArch64::FSCALE_2ZZ_S,
6155 AArch64::FSCALE_2ZZ_D}))
6156 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6157 return;
6158 case Intrinsic::aarch64_sme_fp8_scale_single_x4:
6160 Node->getValueType(0),
6161 {0, AArch64::FSCALE_4ZZ_H, AArch64::FSCALE_4ZZ_S,
6162 AArch64::FSCALE_4ZZ_D}))
6163 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6164 return;
6165 case Intrinsic::aarch64_sme_fp8_scale_x2:
6167 Node->getValueType(0),
6168 {0, AArch64::FSCALE_2Z2Z_H, AArch64::FSCALE_2Z2Z_S,
6169 AArch64::FSCALE_2Z2Z_D}))
6170 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6171 return;
6172 case Intrinsic::aarch64_sme_fp8_scale_x4:
6174 Node->getValueType(0),
6175 {0, AArch64::FSCALE_4Z4Z_H, AArch64::FSCALE_4Z4Z_S,
6176 AArch64::FSCALE_4Z4Z_D}))
6177 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6178 return;
6179 case Intrinsic::aarch64_sve_whilege_x2:
6181 Node->getValueType(0),
6182 {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
6183 AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
6184 SelectWhilePair(Node, Op);
6185 return;
6186 case Intrinsic::aarch64_sve_whilegt_x2:
6188 Node->getValueType(0),
6189 {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
6190 AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
6191 SelectWhilePair(Node, Op);
6192 return;
6193 case Intrinsic::aarch64_sve_whilehi_x2:
6195 Node->getValueType(0),
6196 {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
6197 AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
6198 SelectWhilePair(Node, Op);
6199 return;
6200 case Intrinsic::aarch64_sve_whilehs_x2:
6202 Node->getValueType(0),
6203 {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
6204 AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
6205 SelectWhilePair(Node, Op);
6206 return;
6207 case Intrinsic::aarch64_sve_whilele_x2:
6209 Node->getValueType(0),
6210 {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
6211 AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
6212 SelectWhilePair(Node, Op);
6213 return;
6214 case Intrinsic::aarch64_sve_whilelo_x2:
6216 Node->getValueType(0),
6217 {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
6218 AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
6219 SelectWhilePair(Node, Op);
6220 return;
6221 case Intrinsic::aarch64_sve_whilels_x2:
6223 Node->getValueType(0),
6224 {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
6225 AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
6226 SelectWhilePair(Node, Op);
6227 return;
6228 case Intrinsic::aarch64_sve_whilelt_x2:
6230 Node->getValueType(0),
6231 {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
6232 AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
6233 SelectWhilePair(Node, Op);
6234 return;
6235 case Intrinsic::aarch64_sve_smax_single_x2:
6237 Node->getValueType(0),
6238 {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
6239 AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
6240 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6241 return;
6242 case Intrinsic::aarch64_sve_umax_single_x2:
6244 Node->getValueType(0),
6245 {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
6246 AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
6247 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6248 return;
6249 case Intrinsic::aarch64_sve_fmax_single_x2:
6251 Node->getValueType(0),
6252 {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
6253 AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
6254 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6255 return;
6256 case Intrinsic::aarch64_sve_smax_single_x4:
6258 Node->getValueType(0),
6259 {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
6260 AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
6261 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6262 return;
6263 case Intrinsic::aarch64_sve_umax_single_x4:
6265 Node->getValueType(0),
6266 {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
6267 AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
6268 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6269 return;
6270 case Intrinsic::aarch64_sve_fmax_single_x4:
6272 Node->getValueType(0),
6273 {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
6274 AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
6275 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6276 return;
6277 case Intrinsic::aarch64_sve_smin_single_x2:
6279 Node->getValueType(0),
6280 {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
6281 AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
6282 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6283 return;
6284 case Intrinsic::aarch64_sve_umin_single_x2:
6286 Node->getValueType(0),
6287 {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
6288 AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
6289 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6290 return;
6291 case Intrinsic::aarch64_sve_fmin_single_x2:
6293 Node->getValueType(0),
6294 {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
6295 AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
6296 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6297 return;
6298 case Intrinsic::aarch64_sve_smin_single_x4:
6300 Node->getValueType(0),
6301 {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
6302 AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
6303 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6304 return;
6305 case Intrinsic::aarch64_sve_umin_single_x4:
6307 Node->getValueType(0),
6308 {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
6309 AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
6310 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6311 return;
6312 case Intrinsic::aarch64_sve_fmin_single_x4:
6314 Node->getValueType(0),
6315 {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
6316 AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
6317 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6318 return;
6319 case Intrinsic::aarch64_sve_smax_x2:
6321 Node->getValueType(0),
6322 {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
6323 AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
6324 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6325 return;
6326 case Intrinsic::aarch64_sve_umax_x2:
6328 Node->getValueType(0),
6329 {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
6330 AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
6331 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6332 return;
6333 case Intrinsic::aarch64_sve_fmax_x2:
6335 Node->getValueType(0),
6336 {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
6337 AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
6338 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6339 return;
6340 case Intrinsic::aarch64_sve_smax_x4:
6342 Node->getValueType(0),
6343 {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
6344 AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
6345 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6346 return;
6347 case Intrinsic::aarch64_sve_umax_x4:
6349 Node->getValueType(0),
6350 {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
6351 AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
6352 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6353 return;
6354 case Intrinsic::aarch64_sve_fmax_x4:
6356 Node->getValueType(0),
6357 {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
6358 AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
6359 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6360 return;
6361 case Intrinsic::aarch64_sme_famax_x2:
6363 Node->getValueType(0),
6364 {0, AArch64::FAMAX_2Z2Z_H, AArch64::FAMAX_2Z2Z_S,
6365 AArch64::FAMAX_2Z2Z_D}))
6366 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6367 return;
6368 case Intrinsic::aarch64_sme_famax_x4:
6370 Node->getValueType(0),
6371 {0, AArch64::FAMAX_4Z4Z_H, AArch64::FAMAX_4Z4Z_S,
6372 AArch64::FAMAX_4Z4Z_D}))
6373 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6374 return;
6375 case Intrinsic::aarch64_sme_famin_x2:
6377 Node->getValueType(0),
6378 {0, AArch64::FAMIN_2Z2Z_H, AArch64::FAMIN_2Z2Z_S,
6379 AArch64::FAMIN_2Z2Z_D}))
6380 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6381 return;
6382 case Intrinsic::aarch64_sme_famin_x4:
6384 Node->getValueType(0),
6385 {0, AArch64::FAMIN_4Z4Z_H, AArch64::FAMIN_4Z4Z_S,
6386 AArch64::FAMIN_4Z4Z_D}))
6387 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6388 return;
6389 case Intrinsic::aarch64_sve_smin_x2:
6391 Node->getValueType(0),
6392 {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
6393 AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
6394 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6395 return;
6396 case Intrinsic::aarch64_sve_umin_x2:
6398 Node->getValueType(0),
6399 {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
6400 AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
6401 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6402 return;
6403 case Intrinsic::aarch64_sve_fmin_x2:
6405 Node->getValueType(0),
6406 {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
6407 AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
6408 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6409 return;
6410 case Intrinsic::aarch64_sve_smin_x4:
6412 Node->getValueType(0),
6413 {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
6414 AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
6415 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6416 return;
6417 case Intrinsic::aarch64_sve_umin_x4:
6419 Node->getValueType(0),
6420 {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
6421 AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
6422 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6423 return;
6424 case Intrinsic::aarch64_sve_fmin_x4:
6426 Node->getValueType(0),
6427 {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
6428 AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
6429 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6430 return;
6431 case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
6433 Node->getValueType(0),
6434 {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
6435 AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
6436 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6437 return;
6438 case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
6440 Node->getValueType(0),
6441 {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
6442 AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
6443 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6444 return;
6445 case Intrinsic::aarch64_sve_fminnm_single_x2:
6447 Node->getValueType(0),
6448 {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
6449 AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
6450 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6451 return;
6452 case Intrinsic::aarch64_sve_fminnm_single_x4:
6454 Node->getValueType(0),
6455 {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
6456 AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
6457 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6458 return;
6459 case Intrinsic::aarch64_sve_fscale_single_x4:
6460 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::BFSCALE_4ZZ);
6461 return;
6462 case Intrinsic::aarch64_sve_fscale_single_x2:
6463 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::BFSCALE_2ZZ);
6464 return;
6465 case Intrinsic::aarch64_sve_fmul_single_x4:
6467 Node->getValueType(0),
6468 {AArch64::BFMUL_4ZZ, AArch64::FMUL_4ZZ_H, AArch64::FMUL_4ZZ_S,
6469 AArch64::FMUL_4ZZ_D}))
6470 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6471 return;
6472 case Intrinsic::aarch64_sve_fmul_single_x2:
6474 Node->getValueType(0),
6475 {AArch64::BFMUL_2ZZ, AArch64::FMUL_2ZZ_H, AArch64::FMUL_2ZZ_S,
6476 AArch64::FMUL_2ZZ_D}))
6477 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6478 return;
6479 case Intrinsic::aarch64_sve_fmaxnm_x2:
6481 Node->getValueType(0),
6482 {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
6483 AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
6484 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6485 return;
6486 case Intrinsic::aarch64_sve_fmaxnm_x4:
6488 Node->getValueType(0),
6489 {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
6490 AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
6491 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6492 return;
6493 case Intrinsic::aarch64_sve_fminnm_x2:
6495 Node->getValueType(0),
6496 {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
6497 AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
6498 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6499 return;
6500 case Intrinsic::aarch64_sve_fminnm_x4:
6502 Node->getValueType(0),
6503 {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
6504 AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
6505 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6506 return;
6507 case Intrinsic::aarch64_sve_aese_lane_x2:
6508 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESE_2ZZI_B);
6509 return;
6510 case Intrinsic::aarch64_sve_aesd_lane_x2:
6511 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESD_2ZZI_B);
6512 return;
6513 case Intrinsic::aarch64_sve_aesemc_lane_x2:
6514 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESEMC_2ZZI_B);
6515 return;
6516 case Intrinsic::aarch64_sve_aesdimc_lane_x2:
6517 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::AESDIMC_2ZZI_B);
6518 return;
6519 case Intrinsic::aarch64_sve_aese_lane_x4:
6520 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESE_4ZZI_B);
6521 return;
6522 case Intrinsic::aarch64_sve_aesd_lane_x4:
6523 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESD_4ZZI_B);
6524 return;
6525 case Intrinsic::aarch64_sve_aesemc_lane_x4:
6526 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESEMC_4ZZI_B);
6527 return;
6528 case Intrinsic::aarch64_sve_aesdimc_lane_x4:
6529 SelectDestructiveMultiIntrinsic(Node, 4, false, AArch64::AESDIMC_4ZZI_B);
6530 return;
6531 case Intrinsic::aarch64_sve_pmlal_pair_x2:
6532 SelectDestructiveMultiIntrinsic(Node, 2, false, AArch64::PMLAL_2ZZZ_Q);
6533 return;
6534 case Intrinsic::aarch64_sve_pmull_pair_x2: {
6535 SDLoc DL(Node);
6536 SmallVector<SDValue, 4> Regs(Node->ops().slice(1, 2));
6537 SDNode *Res =
6538 CurDAG->getMachineNode(AArch64::PMULL_2ZZZ_Q, DL, MVT::Untyped, Regs);
6539 SDValue SuperReg = SDValue(Res, 0);
6540 for (unsigned I = 0; I < 2; I++)
6541 ReplaceUses(SDValue(Node, I),
6542 CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
6543 SuperReg));
6544 CurDAG->RemoveDeadNode(Node);
6545 return;
6546 }
6547 case Intrinsic::aarch64_sve_fscale_x4:
6548 SelectDestructiveMultiIntrinsic(Node, 4, true, AArch64::BFSCALE_4Z4Z);
6549 return;
6550 case Intrinsic::aarch64_sve_fscale_x2:
6551 SelectDestructiveMultiIntrinsic(Node, 2, true, AArch64::BFSCALE_2Z2Z);
6552 return;
6553 case Intrinsic::aarch64_sve_fmul_x4:
6555 Node->getValueType(0),
6556 {AArch64::BFMUL_4Z4Z, AArch64::FMUL_4Z4Z_H, AArch64::FMUL_4Z4Z_S,
6557 AArch64::FMUL_4Z4Z_D}))
6558 SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
6559 return;
6560 case Intrinsic::aarch64_sve_fmul_x2:
6562 Node->getValueType(0),
6563 {AArch64::BFMUL_2Z2Z, AArch64::FMUL_2Z2Z_H, AArch64::FMUL_2Z2Z_S,
6564 AArch64::FMUL_2Z2Z_D}))
6565 SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
6566 return;
6567 case Intrinsic::aarch64_sve_fcvtzs_x2:
6568 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
6569 return;
6570 case Intrinsic::aarch64_sve_scvtf_x2:
6571 SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
6572 return;
6573 case Intrinsic::aarch64_sve_fcvtzu_x2:
6574 SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
6575 return;
6576 case Intrinsic::aarch64_sve_ucvtf_x2:
6577 SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
6578 return;
6579 case Intrinsic::aarch64_sve_fcvtzs_x4:
6580 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
6581 return;
6582 case Intrinsic::aarch64_sve_scvtf_x4:
6583 SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
6584 return;
6585 case Intrinsic::aarch64_sve_fcvtzu_x4:
6586 SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
6587 return;
6588 case Intrinsic::aarch64_sve_ucvtf_x4:
6589 SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
6590 return;
6591 case Intrinsic::aarch64_sve_fcvt_widen_x2:
6592 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
6593 return;
6594 case Intrinsic::aarch64_sve_fcvtl_widen_x2:
6595 SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
6596 return;
6597 case Intrinsic::aarch64_sve_sclamp_single_x2:
6599 Node->getValueType(0),
6600 {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
6601 AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
6602 SelectClamp(Node, 2, Op);
6603 return;
6604 case Intrinsic::aarch64_sve_uclamp_single_x2:
6606 Node->getValueType(0),
6607 {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
6608 AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
6609 SelectClamp(Node, 2, Op);
6610 return;
6611 case Intrinsic::aarch64_sve_fclamp_single_x2:
6613 Node->getValueType(0),
6614 {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
6615 AArch64::FCLAMP_VG2_2Z2Z_D}))
6616 SelectClamp(Node, 2, Op);
6617 return;
6618 case Intrinsic::aarch64_sve_bfclamp_single_x2:
6619 SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
6620 return;
6621 case Intrinsic::aarch64_sve_sclamp_single_x4:
6623 Node->getValueType(0),
6624 {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
6625 AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
6626 SelectClamp(Node, 4, Op);
6627 return;
6628 case Intrinsic::aarch64_sve_uclamp_single_x4:
6630 Node->getValueType(0),
6631 {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6632 AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6633 SelectClamp(Node, 4, Op);
6634 return;
6635 case Intrinsic::aarch64_sve_fclamp_single_x4:
6637 Node->getValueType(0),
6638 {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6639 AArch64::FCLAMP_VG4_4Z4Z_D}))
6640 SelectClamp(Node, 4, Op);
6641 return;
6642 case Intrinsic::aarch64_sve_bfclamp_single_x4:
6643 SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6644 return;
6645 case Intrinsic::aarch64_sve_add_single_x2:
6647 Node->getValueType(0),
6648 {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6649 AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6650 SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6651 return;
6652 case Intrinsic::aarch64_sve_add_single_x4:
6654 Node->getValueType(0),
6655 {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6656 AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6657 SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6658 return;
6659 case Intrinsic::aarch64_sve_zip_x2:
6661 Node->getValueType(0),
6662 {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6663 AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6664 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6665 return;
6666 case Intrinsic::aarch64_sve_zipq_x2:
6667 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6668 AArch64::ZIP_VG2_2ZZZ_Q);
6669 return;
6670 case Intrinsic::aarch64_sve_zip_x4:
6672 Node->getValueType(0),
6673 {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6674 AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6675 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6676 return;
6677 case Intrinsic::aarch64_sve_zipq_x4:
6678 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6679 AArch64::ZIP_VG4_4Z4Z_Q);
6680 return;
6681 case Intrinsic::aarch64_sve_uzp_x2:
6683 Node->getValueType(0),
6684 {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6685 AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6686 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6687 return;
6688 case Intrinsic::aarch64_sve_uzpq_x2:
6689 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6690 AArch64::UZP_VG2_2ZZZ_Q);
6691 return;
6692 case Intrinsic::aarch64_sve_uzp_x4:
6694 Node->getValueType(0),
6695 {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6696 AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6697 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6698 return;
6699 case Intrinsic::aarch64_sve_uzpq_x4:
6700 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6701 AArch64::UZP_VG4_4Z4Z_Q);
6702 return;
6703 case Intrinsic::aarch64_sve_sel_x2:
6705 Node->getValueType(0),
6706 {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6707 AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6708 SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6709 return;
6710 case Intrinsic::aarch64_sve_sel_x4:
6712 Node->getValueType(0),
6713 {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6714 AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6715 SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6716 return;
6717 case Intrinsic::aarch64_sve_frinta_x2:
6718 SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6719 return;
6720 case Intrinsic::aarch64_sve_frinta_x4:
6721 SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6722 return;
6723 case Intrinsic::aarch64_sve_frintm_x2:
6724 SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6725 return;
6726 case Intrinsic::aarch64_sve_frintm_x4:
6727 SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6728 return;
6729 case Intrinsic::aarch64_sve_frintn_x2:
6730 SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6731 return;
6732 case Intrinsic::aarch64_sve_frintn_x4:
6733 SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6734 return;
6735 case Intrinsic::aarch64_sve_frintp_x2:
6736 SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6737 return;
6738 case Intrinsic::aarch64_sve_frintp_x4:
6739 SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6740 return;
6741 case Intrinsic::aarch64_sve_sunpk_x2:
6743 Node->getValueType(0),
6744 {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6745 AArch64::SUNPK_VG2_2ZZ_D}))
6746 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6747 return;
6748 case Intrinsic::aarch64_sve_uunpk_x2:
6750 Node->getValueType(0),
6751 {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6752 AArch64::UUNPK_VG2_2ZZ_D}))
6753 SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6754 return;
6755 case Intrinsic::aarch64_sve_sunpk_x4:
6757 Node->getValueType(0),
6758 {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6759 AArch64::SUNPK_VG4_4Z2Z_D}))
6760 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6761 return;
6762 case Intrinsic::aarch64_sve_uunpk_x4:
6764 Node->getValueType(0),
6765 {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6766 AArch64::UUNPK_VG4_4Z2Z_D}))
6767 SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6768 return;
6769 case Intrinsic::aarch64_sve_pext_x2: {
6771 Node->getValueType(0),
6772 {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6773 AArch64::PEXT_2PCI_D}))
6774 SelectPExtPair(Node, Op);
6775 return;
6776 }
6777 }
6778 break;
6779 }
6780 case ISD::INTRINSIC_VOID: {
6781 unsigned IntNo = Node->getConstantOperandVal(1);
6782 if (Node->getNumOperands() >= 3)
6783 VT = Node->getOperand(2)->getValueType(0);
6784 switch (IntNo) {
6785 default:
6786 break;
6787 case Intrinsic::aarch64_neon_st1x2: {
6788 if (VT == MVT::v8i8) {
6789 SelectStore(Node, 2, AArch64::ST1Twov8b);
6790 return;
6791 } else if (VT == MVT::v16i8) {
6792 SelectStore(Node, 2, AArch64::ST1Twov16b);
6793 return;
6794 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6795 VT == MVT::v4bf16) {
6796 SelectStore(Node, 2, AArch64::ST1Twov4h);
6797 return;
6798 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6799 VT == MVT::v8bf16) {
6800 SelectStore(Node, 2, AArch64::ST1Twov8h);
6801 return;
6802 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6803 SelectStore(Node, 2, AArch64::ST1Twov2s);
6804 return;
6805 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6806 SelectStore(Node, 2, AArch64::ST1Twov4s);
6807 return;
6808 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6809 SelectStore(Node, 2, AArch64::ST1Twov2d);
6810 return;
6811 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6812 SelectStore(Node, 2, AArch64::ST1Twov1d);
6813 return;
6814 }
6815 break;
6816 }
6817 case Intrinsic::aarch64_neon_st1x3: {
6818 if (VT == MVT::v8i8) {
6819 SelectStore(Node, 3, AArch64::ST1Threev8b);
6820 return;
6821 } else if (VT == MVT::v16i8) {
6822 SelectStore(Node, 3, AArch64::ST1Threev16b);
6823 return;
6824 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6825 VT == MVT::v4bf16) {
6826 SelectStore(Node, 3, AArch64::ST1Threev4h);
6827 return;
6828 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6829 VT == MVT::v8bf16) {
6830 SelectStore(Node, 3, AArch64::ST1Threev8h);
6831 return;
6832 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6833 SelectStore(Node, 3, AArch64::ST1Threev2s);
6834 return;
6835 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6836 SelectStore(Node, 3, AArch64::ST1Threev4s);
6837 return;
6838 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6839 SelectStore(Node, 3, AArch64::ST1Threev2d);
6840 return;
6841 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6842 SelectStore(Node, 3, AArch64::ST1Threev1d);
6843 return;
6844 }
6845 break;
6846 }
6847 case Intrinsic::aarch64_neon_st1x4: {
6848 if (VT == MVT::v8i8) {
6849 SelectStore(Node, 4, AArch64::ST1Fourv8b);
6850 return;
6851 } else if (VT == MVT::v16i8) {
6852 SelectStore(Node, 4, AArch64::ST1Fourv16b);
6853 return;
6854 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6855 VT == MVT::v4bf16) {
6856 SelectStore(Node, 4, AArch64::ST1Fourv4h);
6857 return;
6858 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6859 VT == MVT::v8bf16) {
6860 SelectStore(Node, 4, AArch64::ST1Fourv8h);
6861 return;
6862 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6863 SelectStore(Node, 4, AArch64::ST1Fourv2s);
6864 return;
6865 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6866 SelectStore(Node, 4, AArch64::ST1Fourv4s);
6867 return;
6868 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6869 SelectStore(Node, 4, AArch64::ST1Fourv2d);
6870 return;
6871 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6872 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6873 return;
6874 }
6875 break;
6876 }
6877 case Intrinsic::aarch64_neon_st2: {
6878 if (VT == MVT::v8i8) {
6879 SelectStore(Node, 2, AArch64::ST2Twov8b);
6880 return;
6881 } else if (VT == MVT::v16i8) {
6882 SelectStore(Node, 2, AArch64::ST2Twov16b);
6883 return;
6884 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6885 VT == MVT::v4bf16) {
6886 SelectStore(Node, 2, AArch64::ST2Twov4h);
6887 return;
6888 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6889 VT == MVT::v8bf16) {
6890 SelectStore(Node, 2, AArch64::ST2Twov8h);
6891 return;
6892 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6893 SelectStore(Node, 2, AArch64::ST2Twov2s);
6894 return;
6895 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6896 SelectStore(Node, 2, AArch64::ST2Twov4s);
6897 return;
6898 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6899 SelectStore(Node, 2, AArch64::ST2Twov2d);
6900 return;
6901 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6902 SelectStore(Node, 2, AArch64::ST1Twov1d);
6903 return;
6904 }
6905 break;
6906 }
6907 case Intrinsic::aarch64_neon_st3: {
6908 if (VT == MVT::v8i8) {
6909 SelectStore(Node, 3, AArch64::ST3Threev8b);
6910 return;
6911 } else if (VT == MVT::v16i8) {
6912 SelectStore(Node, 3, AArch64::ST3Threev16b);
6913 return;
6914 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6915 VT == MVT::v4bf16) {
6916 SelectStore(Node, 3, AArch64::ST3Threev4h);
6917 return;
6918 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6919 VT == MVT::v8bf16) {
6920 SelectStore(Node, 3, AArch64::ST3Threev8h);
6921 return;
6922 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6923 SelectStore(Node, 3, AArch64::ST3Threev2s);
6924 return;
6925 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6926 SelectStore(Node, 3, AArch64::ST3Threev4s);
6927 return;
6928 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6929 SelectStore(Node, 3, AArch64::ST3Threev2d);
6930 return;
6931 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6932 SelectStore(Node, 3, AArch64::ST1Threev1d);
6933 return;
6934 }
6935 break;
6936 }
6937 case Intrinsic::aarch64_neon_st4: {
6938 if (VT == MVT::v8i8) {
6939 SelectStore(Node, 4, AArch64::ST4Fourv8b);
6940 return;
6941 } else if (VT == MVT::v16i8) {
6942 SelectStore(Node, 4, AArch64::ST4Fourv16b);
6943 return;
6944 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6945 VT == MVT::v4bf16) {
6946 SelectStore(Node, 4, AArch64::ST4Fourv4h);
6947 return;
6948 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6949 VT == MVT::v8bf16) {
6950 SelectStore(Node, 4, AArch64::ST4Fourv8h);
6951 return;
6952 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6953 SelectStore(Node, 4, AArch64::ST4Fourv2s);
6954 return;
6955 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6956 SelectStore(Node, 4, AArch64::ST4Fourv4s);
6957 return;
6958 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6959 SelectStore(Node, 4, AArch64::ST4Fourv2d);
6960 return;
6961 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6962 SelectStore(Node, 4, AArch64::ST1Fourv1d);
6963 return;
6964 }
6965 break;
6966 }
6967 case Intrinsic::aarch64_neon_st2lane: {
6968 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6969 SelectStoreLane(Node, 2, AArch64::ST2i8);
6970 return;
6971 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6972 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6973 SelectStoreLane(Node, 2, AArch64::ST2i16);
6974 return;
6975 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6976 VT == MVT::v2f32) {
6977 SelectStoreLane(Node, 2, AArch64::ST2i32);
6978 return;
6979 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6980 VT == MVT::v1f64) {
6981 SelectStoreLane(Node, 2, AArch64::ST2i64);
6982 return;
6983 }
6984 break;
6985 }
6986 case Intrinsic::aarch64_neon_st3lane: {
6987 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6988 SelectStoreLane(Node, 3, AArch64::ST3i8);
6989 return;
6990 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6991 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6992 SelectStoreLane(Node, 3, AArch64::ST3i16);
6993 return;
6994 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6995 VT == MVT::v2f32) {
6996 SelectStoreLane(Node, 3, AArch64::ST3i32);
6997 return;
6998 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6999 VT == MVT::v1f64) {
7000 SelectStoreLane(Node, 3, AArch64::ST3i64);
7001 return;
7002 }
7003 break;
7004 }
7005 case Intrinsic::aarch64_neon_st4lane: {
7006 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7007 SelectStoreLane(Node, 4, AArch64::ST4i8);
7008 return;
7009 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7010 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7011 SelectStoreLane(Node, 4, AArch64::ST4i16);
7012 return;
7013 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7014 VT == MVT::v2f32) {
7015 SelectStoreLane(Node, 4, AArch64::ST4i32);
7016 return;
7017 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7018 VT == MVT::v1f64) {
7019 SelectStoreLane(Node, 4, AArch64::ST4i64);
7020 return;
7021 }
7022 break;
7023 }
7024 case Intrinsic::aarch64_sve_st2q: {
7025 SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
7026 return;
7027 }
7028 case Intrinsic::aarch64_sve_st3q: {
7029 SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
7030 return;
7031 }
7032 case Intrinsic::aarch64_sve_st4q: {
7033 SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
7034 return;
7035 }
7036 case Intrinsic::aarch64_sve_st2: {
7037 if (VT == MVT::nxv16i8) {
7038 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
7039 return;
7040 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7041 VT == MVT::nxv8bf16) {
7042 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
7043 return;
7044 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7045 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
7046 return;
7047 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7048 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
7049 return;
7050 }
7051 break;
7052 }
7053 case Intrinsic::aarch64_sve_st3: {
7054 if (VT == MVT::nxv16i8) {
7055 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
7056 return;
7057 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7058 VT == MVT::nxv8bf16) {
7059 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
7060 return;
7061 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7062 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
7063 return;
7064 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7065 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
7066 return;
7067 }
7068 break;
7069 }
7070 case Intrinsic::aarch64_sve_st4: {
7071 if (VT == MVT::nxv16i8) {
7072 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
7073 return;
7074 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7075 VT == MVT::nxv8bf16) {
7076 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
7077 return;
7078 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7079 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
7080 return;
7081 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7082 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
7083 return;
7084 }
7085 break;
7086 }
7087 }
7088 break;
7089 }
7090 case AArch64ISD::LD2post: {
7091 if (VT == MVT::v8i8) {
7092 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
7093 return;
7094 } else if (VT == MVT::v16i8) {
7095 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
7096 return;
7097 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7098 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
7099 return;
7100 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7101 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
7102 return;
7103 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7104 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
7105 return;
7106 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7107 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
7108 return;
7109 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7110 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7111 return;
7112 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7113 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
7114 return;
7115 }
7116 break;
7117 }
7118 case AArch64ISD::LD3post: {
7119 if (VT == MVT::v8i8) {
7120 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
7121 return;
7122 } else if (VT == MVT::v16i8) {
7123 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
7124 return;
7125 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7126 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
7127 return;
7128 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7129 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
7130 return;
7131 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7132 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
7133 return;
7134 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7135 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
7136 return;
7137 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7138 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7139 return;
7140 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7141 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
7142 return;
7143 }
7144 break;
7145 }
7146 case AArch64ISD::LD4post: {
7147 if (VT == MVT::v8i8) {
7148 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
7149 return;
7150 } else if (VT == MVT::v16i8) {
7151 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
7152 return;
7153 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7154 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
7155 return;
7156 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7157 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
7158 return;
7159 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7160 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
7161 return;
7162 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7163 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
7164 return;
7165 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7166 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7167 return;
7168 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7169 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
7170 return;
7171 }
7172 break;
7173 }
7174 case AArch64ISD::LD1x2post: {
7175 if (VT == MVT::v8i8) {
7176 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
7177 return;
7178 } else if (VT == MVT::v16i8) {
7179 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
7180 return;
7181 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7182 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
7183 return;
7184 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7185 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
7186 return;
7187 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7188 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
7189 return;
7190 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7191 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
7192 return;
7193 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7194 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
7195 return;
7196 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7197 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
7198 return;
7199 }
7200 break;
7201 }
7202 case AArch64ISD::LD1x3post: {
7203 if (VT == MVT::v8i8) {
7204 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
7205 return;
7206 } else if (VT == MVT::v16i8) {
7207 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
7208 return;
7209 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7210 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
7211 return;
7212 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7213 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
7214 return;
7215 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7216 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
7217 return;
7218 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7219 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
7220 return;
7221 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7222 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
7223 return;
7224 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7225 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
7226 return;
7227 }
7228 break;
7229 }
7230 case AArch64ISD::LD1x4post: {
7231 if (VT == MVT::v8i8) {
7232 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
7233 return;
7234 } else if (VT == MVT::v16i8) {
7235 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
7236 return;
7237 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7238 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
7239 return;
7240 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7241 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
7242 return;
7243 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7244 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
7245 return;
7246 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7247 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
7248 return;
7249 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7250 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
7251 return;
7252 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7253 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
7254 return;
7255 }
7256 break;
7257 }
7258 case AArch64ISD::LD1DUPpost: {
7259 if (VT == MVT::v8i8) {
7260 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
7261 return;
7262 } else if (VT == MVT::v16i8) {
7263 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
7264 return;
7265 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7266 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
7267 return;
7268 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7269 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
7270 return;
7271 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7272 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
7273 return;
7274 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7275 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
7276 return;
7277 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7278 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
7279 return;
7280 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7281 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
7282 return;
7283 }
7284 break;
7285 }
7286 case AArch64ISD::LD2DUPpost: {
7287 if (VT == MVT::v8i8) {
7288 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
7289 return;
7290 } else if (VT == MVT::v16i8) {
7291 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
7292 return;
7293 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7294 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
7295 return;
7296 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7297 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
7298 return;
7299 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7300 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
7301 return;
7302 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7303 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
7304 return;
7305 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7306 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
7307 return;
7308 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7309 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
7310 return;
7311 }
7312 break;
7313 }
7314 case AArch64ISD::LD3DUPpost: {
7315 if (VT == MVT::v8i8) {
7316 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
7317 return;
7318 } else if (VT == MVT::v16i8) {
7319 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
7320 return;
7321 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7322 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
7323 return;
7324 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7325 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
7326 return;
7327 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7328 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
7329 return;
7330 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7331 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
7332 return;
7333 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7334 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
7335 return;
7336 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7337 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
7338 return;
7339 }
7340 break;
7341 }
7342 case AArch64ISD::LD4DUPpost: {
7343 if (VT == MVT::v8i8) {
7344 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
7345 return;
7346 } else if (VT == MVT::v16i8) {
7347 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
7348 return;
7349 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7350 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
7351 return;
7352 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7353 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
7354 return;
7355 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7356 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
7357 return;
7358 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7359 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
7360 return;
7361 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7362 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
7363 return;
7364 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7365 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
7366 return;
7367 }
7368 break;
7369 }
7370 case AArch64ISD::LD1LANEpost: {
7371 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7372 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
7373 return;
7374 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7375 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7376 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
7377 return;
7378 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7379 VT == MVT::v2f32) {
7380 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
7381 return;
7382 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7383 VT == MVT::v1f64) {
7384 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
7385 return;
7386 }
7387 break;
7388 }
7389 case AArch64ISD::LD2LANEpost: {
7390 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7391 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
7392 return;
7393 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7394 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7395 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
7396 return;
7397 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7398 VT == MVT::v2f32) {
7399 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
7400 return;
7401 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7402 VT == MVT::v1f64) {
7403 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
7404 return;
7405 }
7406 break;
7407 }
7408 case AArch64ISD::LD3LANEpost: {
7409 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7410 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
7411 return;
7412 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7413 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7414 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
7415 return;
7416 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7417 VT == MVT::v2f32) {
7418 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
7419 return;
7420 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7421 VT == MVT::v1f64) {
7422 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
7423 return;
7424 }
7425 break;
7426 }
7427 case AArch64ISD::LD4LANEpost: {
7428 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7429 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
7430 return;
7431 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7432 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7433 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
7434 return;
7435 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7436 VT == MVT::v2f32) {
7437 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
7438 return;
7439 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7440 VT == MVT::v1f64) {
7441 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
7442 return;
7443 }
7444 break;
7445 }
7446 case AArch64ISD::ST2post: {
7447 VT = Node->getOperand(1).getValueType();
7448 if (VT == MVT::v8i8) {
7449 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
7450 return;
7451 } else if (VT == MVT::v16i8) {
7452 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
7453 return;
7454 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7455 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
7456 return;
7457 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7458 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
7459 return;
7460 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7461 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
7462 return;
7463 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7464 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
7465 return;
7466 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7467 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
7468 return;
7469 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7470 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7471 return;
7472 }
7473 break;
7474 }
7475 case AArch64ISD::ST3post: {
7476 VT = Node->getOperand(1).getValueType();
7477 if (VT == MVT::v8i8) {
7478 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
7479 return;
7480 } else if (VT == MVT::v16i8) {
7481 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
7482 return;
7483 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7484 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
7485 return;
7486 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7487 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
7488 return;
7489 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7490 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
7491 return;
7492 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7493 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
7494 return;
7495 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7496 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
7497 return;
7498 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7499 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7500 return;
7501 }
7502 break;
7503 }
7504 case AArch64ISD::ST4post: {
7505 VT = Node->getOperand(1).getValueType();
7506 if (VT == MVT::v8i8) {
7507 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
7508 return;
7509 } else if (VT == MVT::v16i8) {
7510 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
7511 return;
7512 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7513 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
7514 return;
7515 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7516 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
7517 return;
7518 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7519 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
7520 return;
7521 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7522 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
7523 return;
7524 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7525 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
7526 return;
7527 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7528 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7529 return;
7530 }
7531 break;
7532 }
7533 case AArch64ISD::ST1x2post: {
7534 VT = Node->getOperand(1).getValueType();
7535 if (VT == MVT::v8i8) {
7536 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
7537 return;
7538 } else if (VT == MVT::v16i8) {
7539 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
7540 return;
7541 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7542 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
7543 return;
7544 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7545 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
7546 return;
7547 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7548 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
7549 return;
7550 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7551 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
7552 return;
7553 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7554 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
7555 return;
7556 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7557 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
7558 return;
7559 }
7560 break;
7561 }
7562 case AArch64ISD::ST1x3post: {
7563 VT = Node->getOperand(1).getValueType();
7564 if (VT == MVT::v8i8) {
7565 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
7566 return;
7567 } else if (VT == MVT::v16i8) {
7568 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
7569 return;
7570 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7571 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
7572 return;
7573 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
7574 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
7575 return;
7576 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7577 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
7578 return;
7579 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7580 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
7581 return;
7582 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7583 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
7584 return;
7585 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7586 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
7587 return;
7588 }
7589 break;
7590 }
7591 case AArch64ISD::ST1x4post: {
7592 VT = Node->getOperand(1).getValueType();
7593 if (VT == MVT::v8i8) {
7594 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
7595 return;
7596 } else if (VT == MVT::v16i8) {
7597 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
7598 return;
7599 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
7600 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
7601 return;
7602 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
7603 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
7604 return;
7605 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
7606 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
7607 return;
7608 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
7609 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
7610 return;
7611 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
7612 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
7613 return;
7614 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
7615 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
7616 return;
7617 }
7618 break;
7619 }
7620 case AArch64ISD::ST2LANEpost: {
7621 VT = Node->getOperand(1).getValueType();
7622 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7623 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
7624 return;
7625 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7626 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7627 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
7628 return;
7629 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7630 VT == MVT::v2f32) {
7631 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7632 return;
7633 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7634 VT == MVT::v1f64) {
7635 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7636 return;
7637 }
7638 break;
7639 }
7640 case AArch64ISD::ST3LANEpost: {
7641 VT = Node->getOperand(1).getValueType();
7642 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7643 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7644 return;
7645 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7646 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7647 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7648 return;
7649 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7650 VT == MVT::v2f32) {
7651 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7652 return;
7653 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7654 VT == MVT::v1f64) {
7655 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7656 return;
7657 }
7658 break;
7659 }
7660 case AArch64ISD::ST4LANEpost: {
7661 VT = Node->getOperand(1).getValueType();
7662 if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7663 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7664 return;
7665 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7666 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7667 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7668 return;
7669 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7670 VT == MVT::v2f32) {
7671 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7672 return;
7673 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7674 VT == MVT::v1f64) {
7675 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7676 return;
7677 }
7678 break;
7679 }
7680 }
7681
7682 // Select the default instruction
7683 SelectCode(Node);
7684}
7685
7686/// createAArch64ISelDag - This pass converts a legalized DAG into a
7687/// AArch64-specific DAG, ready for instruction scheduling.
7689 CodeGenOptLevel OptLevel) {
7690 return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7691}
7692
7693/// When \p PredVT is a scalable vector predicate in the form
7694/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7695/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7696/// structured vectors (NumVec >1), the output data type is
7697/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7698/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7699/// EVT.
7701 unsigned NumVec) {
7702 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7703 if (!PredVT.isScalableVectorOf(MVT::i1))
7704 return EVT();
7705
7706 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7707 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7708 return EVT();
7709
7710 ElementCount EC = PredVT.getVectorElementCount();
7711 EVT ScalarVT =
7712 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7713 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7714
7715 return MemVT;
7716}
7717
7718/// Return the EVT of the data associated to a memory operation in \p
7719/// Root. If such EVT cannot be retrieved, it returns an invalid EVT.
7721 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(Root))
7722 return MemIntr->getMemoryVT();
7723
7724 if (isa<MemSDNode>(Root)) {
7725 EVT MemVT = cast<MemSDNode>(Root)->getMemoryVT();
7726
7727 EVT DataVT;
7728 if (auto *Load = dyn_cast<LoadSDNode>(Root))
7729 DataVT = Load->getValueType(0);
7730 else if (auto *Load = dyn_cast<MaskedLoadSDNode>(Root))
7731 DataVT = Load->getValueType(0);
7732 else if (auto *Store = dyn_cast<StoreSDNode>(Root))
7733 DataVT = Store->getValue().getValueType();
7734 else if (auto *Store = dyn_cast<MaskedStoreSDNode>(Root))
7735 DataVT = Store->getValue().getValueType();
7736 else
7737 llvm_unreachable("Unexpected MemSDNode!");
7738
7739 return DataVT.changeVectorElementType(Ctx, MemVT.getVectorElementType());
7740 }
7741
7742 const unsigned Opcode = Root->getOpcode();
7743 // For custom ISD nodes, we have to look at them individually to extract the
7744 // type of the data moved to/from memory.
7745 switch (Opcode) {
7746 case AArch64ISD::LD1_MERGE_ZERO:
7747 case AArch64ISD::LD1S_MERGE_ZERO:
7748 case AArch64ISD::LDNF1_MERGE_ZERO:
7749 case AArch64ISD::LDNF1S_MERGE_ZERO:
7750 return cast<VTSDNode>(Root->getOperand(3))->getVT();
7751 case AArch64ISD::ST1_PRED:
7752 return cast<VTSDNode>(Root->getOperand(4))->getVT();
7753 default:
7754 break;
7755 }
7756
7757 if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7758 return EVT();
7759
7760 switch (Root->getConstantOperandVal(1)) {
7761 default:
7762 return EVT();
7763 case Intrinsic::aarch64_sme_ldr:
7764 case Intrinsic::aarch64_sme_str:
7765 return MVT::nxv16i8;
7766 case Intrinsic::aarch64_sve_prf:
7767 // We are using an SVE prefetch intrinsic. Type must be inferred from the
7768 // width of the predicate.
7770 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7771 case Intrinsic::aarch64_sve_ld2_sret:
7772 case Intrinsic::aarch64_sve_ld2q_sret:
7774 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7775 case Intrinsic::aarch64_sve_st2q:
7777 Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7778 case Intrinsic::aarch64_sve_ld3_sret:
7779 case Intrinsic::aarch64_sve_ld3q_sret:
7781 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7782 case Intrinsic::aarch64_sve_st3q:
7784 Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7785 case Intrinsic::aarch64_sve_ld4_sret:
7786 case Intrinsic::aarch64_sve_ld4q_sret:
7788 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7789 case Intrinsic::aarch64_sve_st4q:
7791 Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7792 case Intrinsic::aarch64_sve_ld1udq:
7793 case Intrinsic::aarch64_sve_st1dq:
7794 return EVT(MVT::nxv1i64);
7795 case Intrinsic::aarch64_sve_ld1uwq:
7796 case Intrinsic::aarch64_sve_st1wq:
7797 return EVT(MVT::nxv1i32);
7798 }
7799}
7800
7801/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7802/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7803/// where Root is the memory access using N for its address.
7804template <int64_t Min, int64_t Max>
7805bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7806 SDValue &Base,
7807 SDValue &OffImm) {
7808 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7809 const DataLayout &DL = CurDAG->getDataLayout();
7810 const MachineFrameInfo &MFI = MF->getFrameInfo();
7811
7812 if (N.getOpcode() == ISD::FrameIndex) {
7813 int FI = cast<FrameIndexSDNode>(N)->getIndex();
7814 // We can only encode VL scaled offsets, so only fold in frame indexes
7815 // referencing SVE objects.
7816 if (MFI.hasScalableStackID(FI)) {
7817 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7818 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7819 return true;
7820 }
7821
7822 return false;
7823 }
7824
7825 if (MemVT == EVT())
7826 return false;
7827
7828 if (N.getOpcode() != ISD::ADD)
7829 return false;
7830
7831 SDValue VScale = N.getOperand(1);
7832 int64_t MulImm = std::numeric_limits<int64_t>::max();
7833 if (VScale.getOpcode() == ISD::VSCALE) {
7834 MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7835 } else if (auto C = dyn_cast<ConstantSDNode>(VScale)) {
7836 int64_t ByteOffset = C->getSExtValue();
7837 const auto KnownVScale =
7839
7840 if (!KnownVScale || ByteOffset % KnownVScale != 0)
7841 return false;
7842
7843 MulImm = ByteOffset / KnownVScale;
7844 } else
7845 return false;
7846
7847 TypeSize TS = MemVT.getSizeInBits();
7848 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7849
7850 if ((MulImm % MemWidthBytes) != 0)
7851 return false;
7852
7853 int64_t Offset = MulImm / MemWidthBytes;
7855 return false;
7856
7857 Base = N.getOperand(0);
7858 if (Base.getOpcode() == ISD::FrameIndex) {
7859 int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7860 // We can only encode VL scaled offsets, so only fold in frame indexes
7861 // referencing SVE objects.
7862 if (MFI.hasScalableStackID(FI))
7863 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7864 }
7865
7866 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7867 return true;
7868}
7869
7870/// Select register plus register addressing mode for SVE, with scaled
7871/// offset.
7872bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7873 SDValue &Base,
7874 SDValue &Offset) {
7875 if (N.getOpcode() != ISD::ADD)
7876 return false;
7877
7878 // Process an ADD node.
7879 const SDValue LHS = N.getOperand(0);
7880 const SDValue RHS = N.getOperand(1);
7881
7882 // 8 bit data does not come with the SHL node, so it is treated
7883 // separately.
7884 if (Scale == 0) {
7885 Base = LHS;
7886 Offset = RHS;
7887 return true;
7888 }
7889
7890 if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7891 int64_t ImmOff = C->getSExtValue();
7892 unsigned Size = 1 << Scale;
7893
7894 // To use the reg+reg addressing mode, the immediate must be a multiple of
7895 // the vector element's byte size.
7896 if (ImmOff % Size)
7897 return false;
7898
7899 SDLoc DL(N);
7900 Base = LHS;
7901 Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7902 SDValue Ops[] = {Offset};
7903 SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7904 Offset = SDValue(MI, 0);
7905 return true;
7906 }
7907
7908 // Check if the RHS is a shift node with a constant.
7909 if (RHS.getOpcode() != ISD::SHL)
7910 return false;
7911
7912 const SDValue ShiftRHS = RHS.getOperand(1);
7913 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7914 if (C->getZExtValue() == Scale) {
7915 Base = LHS;
7916 Offset = RHS.getOperand(0);
7917 return true;
7918 }
7919
7920 return false;
7921}
7922
7923bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7924 const AArch64TargetLowering *TLI =
7925 static_cast<const AArch64TargetLowering *>(getTargetLowering());
7926
7927 return TLI->isAllActivePredicate(*CurDAG, N);
7928}
7929
7930bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7931 return N.getValueType().isScalableVectorOf(MVT::i1);
7932}
7933
7934bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7936 unsigned Scale) {
7937 auto MatchConstantOffset = [&](SDValue CN) -> SDValue {
7938 if (auto *C = dyn_cast<ConstantSDNode>(CN)) {
7939 int64_t ImmOff = C->getSExtValue();
7940 if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0)))
7941 return CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7942 }
7943 return SDValue();
7944 };
7945
7946 if (SDValue C = MatchConstantOffset(N)) {
7947 Base = CurDAG->getConstant(0, SDLoc(N), MVT::i32);
7948 Offset = C;
7949 return true;
7950 }
7951
7952 // Try to untangle an ADD node into a 'reg + offset'
7953 if (CurDAG->isBaseWithConstantOffset(N)) {
7954 if (SDValue C = MatchConstantOffset(N.getOperand(1))) {
7955 Base = N.getOperand(0);
7956 Offset = C;
7957 return true;
7958 }
7959 }
7960
7961 // By default, just match reg + 0.
7962 Base = N;
7963 Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7964 return true;
7965}
7966
7967bool AArch64DAGToDAGISel::SelectCmpBranchUImm6Operand(SDNode *P, SDValue N,
7968 SDValue &Imm) {
7970 static_cast<AArch64CC::CondCode>(P->getConstantOperandVal(1));
7971 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
7972 // Check conservatively if the immediate fits the valid range [0, 64).
7973 // Immediate variants for GE and HS definitely need to be decremented
7974 // when lowering the pseudos later, so an immediate of 1 would become 0.
7975 // For the inverse conditions LT and LO we don't know for sure if they
7976 // will need a decrement but should the decision be made to reverse the
7977 // branch condition, we again end up with the need to decrement.
7978 // The same argument holds for LE, LS, GT and HI and possibly
7979 // incremented immediates. This can lead to slightly less optimal
7980 // codegen, e.g. we never codegen the legal case
7981 // cblt w0, #63, A
7982 // because we could end up with the illegal case
7983 // cbge w0, #64, B
7984 // should the decision to reverse the branch direction be made. For the
7985 // lower bound cases this is no problem since we can express comparisons
7986 // against 0 with either tbz/tnbz or using wzr/xzr.
7987 uint64_t LowerBound = 0, UpperBound = 64;
7988 switch (CC) {
7989 case AArch64CC::GE:
7990 case AArch64CC::HS:
7991 case AArch64CC::LT:
7992 case AArch64CC::LO:
7993 LowerBound = 1;
7994 break;
7995 case AArch64CC::LE:
7996 case AArch64CC::LS:
7997 case AArch64CC::GT:
7998 case AArch64CC::HI:
7999 UpperBound = 63;
8000 break;
8001 default:
8002 break;
8003 }
8004
8005 if (CN->getAPIntValue().uge(LowerBound) &&
8006 CN->getAPIntValue().ult(UpperBound)) {
8007 SDLoc DL(N);
8008 Imm = CurDAG->getTargetConstant(CN->getZExtValue(), DL, N.getValueType());
8009 return true;
8010 }
8011 }
8012
8013 return false;
8014}
8015
8016template <bool MatchCBB>
8017bool AArch64DAGToDAGISel::SelectCmpBranchExtOperand(SDValue N, SDValue &Reg,
8018 SDValue &ExtType) {
8019
8020 // Use an invalid shift-extend value to indicate we don't need to extend later
8021 if (N.getOpcode() == ISD::AssertZext || N.getOpcode() == ISD::AssertSext) {
8022 EVT Ty = cast<VTSDNode>(N.getOperand(1))->getVT();
8023 if (Ty != (MatchCBB ? MVT::i8 : MVT::i16))
8024 return false;
8025 Reg = N.getOperand(0);
8026 ExtType = CurDAG->getSignedTargetConstant(AArch64_AM::InvalidShiftExtend,
8027 SDLoc(N), MVT::i32);
8028 return true;
8029 }
8030
8032
8033 if ((MatchCBB && (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB)) ||
8034 (!MatchCBB && (ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH))) {
8035 Reg = N.getOperand(0);
8036 ExtType =
8037 CurDAG->getTargetConstant(getExtendEncoding(ET), SDLoc(N), MVT::i32);
8038 return true;
8039 }
8040
8041 return false;
8042}
8043
8044/// Try to fold AArch64 CSEL/FCMP patterns to FMAXNM/FMINNM.
8045///
8046/// This is intentionally done in PreprocessISelDAG rather than DAGCombine:
8047/// doing this earlier based on the defining operation of X can be invalidated
8048/// by later DAG combines. At this point the DAG is being prepared for
8049/// instruction selection, so the use of isKnownNeverSNaN(X) applies to the
8050/// final SDValue being selected.
8051/// Only handles FCMP(X, C) with scalar FP types, where C is a non-NaN constant.
8052/// The nsz requirement is needed only when C is zero, to avoid signed-zero
8053/// mismatches. The never-sNaN check is required because AArch64 FMAXNM/FMINNM
8054/// differ from fcmp+fcsel for signaling NaN inputs.
8055SDValue AArch64DAGToDAGISel::tryFoldCselToFMaxMin(SDNode &N) {
8056 EVT VT = N.getValueType(0);
8057
8058 // Scalar FP only.
8059 if (!VT.isFloatingPoint() || VT.isVector())
8060 return SDValue();
8061
8062 SDValue TVal = N.getOperand(0);
8063 SDValue FVal = N.getOperand(1);
8064 SDValue CCVal = N.getOperand(2);
8065 SDValue Cmp = N.getOperand(3);
8066
8067 if (Cmp.getOpcode() != AArch64ISD::FCMP)
8068 return SDValue();
8069
8070 auto *CC = dyn_cast<ConstantSDNode>(CCVal);
8071 if (!CC)
8072 return SDValue();
8073
8074 SDValue CmpLHS = Cmp.getOperand(0);
8075 SDValue CmpRHS = Cmp.getOperand(1);
8076 unsigned CondCode = CC->getZExtValue();
8077
8078 // Map VT and operation (max/min) to machine opcode.
8079 auto getOpc = [](EVT VT, bool isMax) -> unsigned {
8080 if (VT == MVT::f16)
8081 return isMax ? AArch64::FMAXNMHrr : AArch64::FMINNMHrr;
8082 else if (VT == MVT::f32)
8083 return isMax ? AArch64::FMAXNMSrr : AArch64::FMINNMSrr;
8084 else if (VT == MVT::f64)
8085 return isMax ? AArch64::FMAXNMDrr : AArch64::FMINNMDrr;
8086 else
8087 return 0; // unsupported
8088 };
8089
8090 // Determine whether to use max or min based on condition code and operands.
8091 bool isMax;
8092 if (CondCode == AArch64CC::GT || CondCode == AArch64CC::GE) {
8093 if (TVal == CmpLHS && FVal == CmpRHS)
8094 isMax = true;
8095 else if (TVal == CmpRHS && FVal == CmpLHS)
8096 isMax = false;
8097 else
8098 return SDValue();
8099 } else if (CondCode == AArch64CC::MI || CondCode == AArch64CC::LS) {
8100 if (TVal == CmpLHS && FVal == CmpRHS)
8101 isMax = false;
8102 else if (TVal == CmpRHS && FVal == CmpLHS)
8103 isMax = true;
8104 else
8105 return SDValue();
8106 } else {
8107 return SDValue();
8108 }
8109
8110 // Get the machine opcode for this VT and operation.
8111 unsigned Opc = getOpc(VT, isMax);
8112 if (!Opc)
8113 return SDValue();
8114
8115 // Constant must be non-NaN.
8116 auto *CFP = dyn_cast<ConstantFPSDNode>(CmpRHS);
8117 if (!CFP || CFP->getValueAPF().isNaN())
8118 return SDValue();
8119
8120 // nsz flag required only when constant is zero: fmaxnm(+0,-0)=+0 differs from
8121 // fcmp+select's -0. For non-zero constants, semantics are identical.
8122 if (CFP->isZero() && !N.getFlags().hasNoSignedZeros())
8123 return SDValue();
8124
8125 // Only fold if variable operand is never sNaN.
8126 // This runs after DAG combines, so later combines cannot remove a defining
8127 // operation used by isKnownNeverSNaN().
8128 if (!CurDAG->isKnownNeverSNaN(CmpLHS))
8129 return SDValue();
8130
8131 SDLoc DL(&N);
8132
8133 // Directly emit the machine node
8134 return SDValue(CurDAG->getMachineNode(Opc, DL, VT, CmpLHS, CmpRHS), 0);
8135}
8136
8137void AArch64DAGToDAGISel::PreprocessISelDAG() {
8138 bool MadeChange = false;
8139 for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
8140 if (N.use_empty())
8141 continue;
8142
8144 switch (N.getOpcode()) {
8145 case ISD::SCALAR_TO_VECTOR: {
8146 EVT ScalarTy = N.getValueType(0).getVectorElementType();
8147 if ((ScalarTy == MVT::i32 || ScalarTy == MVT::i64) &&
8148 ScalarTy == N.getOperand(0).getValueType())
8149 Result = addBitcastHints(*CurDAG, N);
8150
8151 break;
8152 }
8153 case AArch64ISD::CSEL:
8154 Result = tryFoldCselToFMaxMin(N);
8155 break;
8156 default:
8157 break;
8158 }
8159
8160 if (Result) {
8161 LLVM_DEBUG(dbgs() << "AArch64 DAG preprocessing replacing:\nOld: ");
8162 LLVM_DEBUG(N.dump(CurDAG));
8163 LLVM_DEBUG(dbgs() << "\nNew: ");
8164 LLVM_DEBUG(Result.dump(CurDAG));
8165 LLVM_DEBUG(dbgs() << "\n");
8166
8167 CurDAG->ReplaceAllUsesOfValueWith(SDValue(&N, 0), Result);
8168 MadeChange = true;
8169 }
8170 }
8171
8172 if (MadeChange)
8173 CurDAG->RemoveDeadNodes();
8174
8176}
static SDValue Widen(SelectionDAG *CurDAG, SDValue N)
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms)
static int getIntOperandFromRegisterString(StringRef RegString)
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG)
NarrowVector - Given a value in the V128 register class, produce the equivalent value in the V64 regi...
static std::optional< APInt > GetNEONSplatValue(SDValue N)
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, unsigned NumberOfIgnoredHighBits, EVT VT)
Does DstMask form a complementary pair with the mask provided by BitsToBeInserted,...
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N)
Instructions that accept extend modifiers like UXTW expect the register being extended to be a GPR32,...
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op, SDValue &Src, int &DstLSB, int &Width)
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, SDValue &Src, int &DstLSB, int &Width)
Does this tree qualify as an attempt to move a bitfield into position, essentially "(and (shl VAL,...
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG)
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static SDValue addBitcastHints(SelectionDAG &DAG, SDNode &N)
addBitcastHints - This method adds bitcast hints to the operands of a node to help instruction select...
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB, unsigned NumberOfIgnoredLowBits, bool BiggerPattern)
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, unsigned NumberOfIgnoredLowBits=0, bool BiggerPattern=false)
static bool isShiftedMask(uint64_t Mask, EVT VT)
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum)
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root)
Return the EVT of the data associated to a memory operation in Root.
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isWorthFoldingADDlow(SDValue N)
If there's a use of this ADDlow that's not itself a load/store then we'll need to create a real ADD i...
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N)
getShiftTypeForNode - Translate a shift node to the corresponding ShiftType value.
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &LSB, unsigned &MSB)
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef< unsigned > Opcodes)
This function selects an opcode from a list of opcodes, which is expected to be the opcode for { 8-bi...
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, unsigned NumVec)
When PredVT is a scalable vector predicate in the form MVT::nx<M>xi1, it builds the correspondent sca...
static std::optional< APInt > DecodeNEONSplat(SDValue N)
static bool checkCVTFixedPointOperandWithFBitsForVectors(SelectionDAG *CurDAG, SDValue N, SDValue &FixedPos, unsigned RegWidth, bool isReciprocal)
static bool isPreferredADD(int64_t ImmOff)
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, uint64_t Imm, uint64_t MSB, unsigned Depth)
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount)
Create a machine node performing a notional SHL of Op by ShlAmount.
static bool isWorthFoldingSHL(SDValue V)
Determine whether it is worth it to fold SHL into the addressing mode.
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, unsigned Depth)
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, unsigned &Immr, unsigned &Imms, bool BiggerPattern)
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1, SDValue Src, SDValue Dst, SelectionDAG *CurDAG, const bool BiggerPattern)
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, SDValue Orig, unsigned Depth)
static bool isMemOpOrPrefetch(SDNode *N)
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, unsigned Depth)
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, SelectionDAG *CurDAG)
static APInt DecodeFMOVImm(uint64_t Imm, unsigned RegWidth)
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, unsigned Depth)
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth=0)
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected)
static AArch64_AM::ShiftExtendType getExtendTypeForNode(SDValue N, bool IsLoadStore=false)
getExtendTypeForNode - Translate an extend node to the corresponding ExtendType value.
static bool isIntImmediate(const SDNode *N, uint64_t &Imm)
isIntImmediate - This method tests to see if the node is a constant operand.
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG, SDValue &ShiftedOperand, uint64_t &EncodedShiftImm)
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range, unsigned Size)
Check if the immediate offset is valid as a scaled immediate.
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op, bool BiggerPattern, const uint64_t NonZeroBits, SDValue &Src, int &DstLSB, int &Width)
return SDValue()
static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG)
WidenVector - Given a value in the V64 register class, produce the equivalent value in the V128 regis...
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define DEBUG_TYPE
IRTranslator LLVM IR MI
std::pair< Instruction::BinaryOps, Value * > OffsetOp
Find all possible pairs (BinOp, RHS) that BinOp V, RHS can be simplified.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t High
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define PASS_NAME
Value * RHS
Value * LHS
const AArch64RegisterInfo * getRegisterInfo() const override
const AArch64TargetLowering * getTargetLowering() const override
bool isStreaming() const
Returns true if the function has a streaming body.
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
unsigned getSVEVectorSizeInBits() const
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const
Register matchRegisterName(StringRef RegName) const
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1076
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1621
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
void flipAllBits()
Toggle every bit to its opposite value.
Definition APInt.h:1475
bool isShiftedMask() const
Return true if this APInt value contains a non-empty sequence of ones with the remainder zero.
Definition APInt.h:511
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
iterator begin() const
Definition ArrayRef.h:129
const Constant * getConstVal() const
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
bool hasScalableStackID(int ObjectIdx) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
static constexpr unsigned MaxRecursionDepth
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
unsigned getID() const
Return the register class ID number.
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:972
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
uint32_t parseGenericRegister(StringRef Name)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t decodeAdvSIMDModImmType12(uint8_t Imm)
static uint64_t decodeAdvSIMDModImmType11(uint8_t Imm)
unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET)
Mapping from extend bits to required operation: shifter: 000 ==> uxtb 001 ==> uxth 010 ==> uxtw 011 =...
static uint64_t decodeAdvSIMDModImmType10(uint8_t Imm)
static bool isSVELogicalImm(unsigned SizeInBits, uint64_t ImmVal, uint64_t &Encoding)
static bool isSVECpyDupImm(int SizeInBits, int64_t Val, int32_t &Imm, int32_t &Shift)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static constexpr unsigned SVEBitsPerBlock
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ AssertAlign
AssertAlign - These nodes record if a register contains a value that has a known alignment and the tr...
Definition ISDOpcodes.h:69
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition ISDOpcodes.h:139
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ VSCALE
VSCALE(IMM) - Returns the runtime scaling factor used to calculate the number of elements within a sc...
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Not(const Pred &P) -> Not< Pred >
DiagnosticInfoOptimizationBase::Argument NV
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
@ Offset
Definition DWP.cpp:558
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition MathExtras.h:267
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2025
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAArch64ISelDag(AArch64TargetMachine &TM, CodeGenOptLevel OptLevel)
createAArch64ISelDag - This pass converts a legalized DAG into a AArch64-specific DAG,...
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isScalableVectorOf(EVT EltVT) const
Return true if this is a scalable vector with matching element type.
Definition ValueTypes.h:192
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:494
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:382
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isFixedLengthVector() const
Definition ValueTypes.h:199
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:187
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition ValueTypes.h:225
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.