LLVM 23.0.0git
AArch64PostLegalizerLowering.cpp
Go to the documentation of this file.
1//=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// Post-legalization lowering for instructions.
11///
12/// This is used to offload pattern matching from the selector.
13///
14/// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually
15/// a G_ZIP, G_UZP, etc.
16///
17/// General optimization combines should be handled by either the
18/// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner.
19///
20//===----------------------------------------------------------------------===//
21
22#include "AArch64.h"
23#include "AArch64ExpandImm.h"
26#include "AArch64Subtarget.h"
47#include "llvm/IR/InstrTypes.h"
49#include <optional>
50
51#define GET_GICOMBINER_DEPS
52#include "AArch64GenPostLegalizeGILowering.inc"
53#undef GET_GICOMBINER_DEPS
54
55#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
56
57using namespace llvm;
58using namespace MIPatternMatch;
59using namespace AArch64GISelUtils;
60
61#define GET_GICOMBINER_TYPES
62#include "AArch64GenPostLegalizeGILowering.inc"
63#undef GET_GICOMBINER_TYPES
64
65namespace {
66
67/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
68///
69/// Used for matching target-supported shuffles before codegen.
70struct ShuffleVectorPseudo {
71 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
72 Register Dst; ///< Destination register.
73 SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
74 ShuffleVectorPseudo(unsigned Opc, Register Dst,
75 std::initializer_list<SrcOp> SrcOps)
76 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){};
77 ShuffleVectorPseudo() = default;
78};
79
80/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
81/// sources of the shuffle are different.
82std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
83 unsigned NumElts) {
84 // Look for the first non-undef element.
85 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
86 if (FirstRealElt == M.end())
87 return std::nullopt;
88
89 // Use APInt to handle overflow when calculating expected element.
90 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
91 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1, false, true);
92
93 // The following shuffle indices must be the successive elements after the
94 // first real element.
95 if (any_of(
96 make_range(std::next(FirstRealElt), M.end()),
97 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; }))
98 return std::nullopt;
99
100 // The index of an EXT is the first element if it is not UNDEF.
101 // Watch out for the beginning UNDEFs. The EXT index should be the expected
102 // value of the first element. E.g.
103 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
104 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
105 // ExpectedElt is the last mask index plus 1.
106 uint64_t Imm = ExpectedElt.getZExtValue();
107 bool ReverseExt = false;
108
109 // There are two difference cases requiring to reverse input vectors.
110 // For example, for vector <4 x i32> we have the following cases,
111 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
112 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
113 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
114 // to reverse two input vectors.
115 if (Imm < NumElts)
116 ReverseExt = true;
117 else
118 Imm -= NumElts;
119 return std::make_pair(ReverseExt, Imm);
120}
121
122/// Helper function for matchINS.
123///
124/// \returns a value when \p M is an ins mask for \p NumInputElements.
125///
126/// First element of the returned pair is true when the produced
127/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
128///
129/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
130std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
131 int NumInputElements) {
132 if (M.size() != static_cast<size_t>(NumInputElements))
133 return std::nullopt;
134 int NumLHSMatch = 0, NumRHSMatch = 0;
135 int LastLHSMismatch = -1, LastRHSMismatch = -1;
136 for (int Idx = 0; Idx < NumInputElements; ++Idx) {
137 if (M[Idx] == -1) {
138 ++NumLHSMatch;
139 ++NumRHSMatch;
140 continue;
141 }
142 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx;
143 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx;
144 }
145 const int NumNeededToMatch = NumInputElements - 1;
146 if (NumLHSMatch == NumNeededToMatch)
147 return std::make_pair(true, LastLHSMismatch);
148 if (NumRHSMatch == NumNeededToMatch)
149 return std::make_pair(false, LastRHSMismatch);
150 return std::nullopt;
151}
152
153/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
154/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
155bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
156 ShuffleVectorPseudo &MatchInfo) {
157 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
158 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
159 Register Dst = MI.getOperand(0).getReg();
160 Register Src = MI.getOperand(1).getReg();
161 LLT Ty = MRI.getType(Dst);
162 unsigned EltSize = Ty.getScalarSizeInBits();
163
164 // Element size for a rev cannot be 64.
165 if (EltSize == 64)
166 return false;
167
168 unsigned NumElts = Ty.getNumElements();
169
170 // Try to produce a G_REV instruction
171 for (unsigned LaneSize : {64U, 32U, 16U}) {
172 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) {
173 unsigned Opcode;
174 if (LaneSize == 64U)
175 Opcode = AArch64::G_REV64;
176 else if (LaneSize == 32U)
177 Opcode = AArch64::G_REV32;
178 else
179 Opcode = AArch64::G_BSWAP;
180
181 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src});
182 return true;
183 }
184 }
185
186 return false;
187}
188
189/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
190/// a G_TRN1 or G_TRN2 instruction.
191bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
192 ShuffleVectorPseudo &MatchInfo) {
193 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
194 unsigned WhichResult;
195 unsigned OperandOrder;
196 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
197 Register Dst = MI.getOperand(0).getReg();
198 unsigned NumElts = MRI.getType(Dst).getNumElements();
199 if (!isTRNMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
200 return false;
201 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
202 Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();
203 Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();
204 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
205 return true;
206}
207
208/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
209/// a G_UZP1 or G_UZP2 instruction.
210///
211/// \param [in] MI - The shuffle vector instruction.
212/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
213bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
214 ShuffleVectorPseudo &MatchInfo) {
215 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
216 unsigned WhichResult;
217 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
218 Register Dst = MI.getOperand(0).getReg();
219 unsigned NumElts = MRI.getType(Dst).getNumElements();
220 if (!isUZPMask(ShuffleMask, NumElts, WhichResult))
221 return false;
222 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2;
223 Register V1 = MI.getOperand(1).getReg();
224 Register V2 = MI.getOperand(2).getReg();
225 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
226 return true;
227}
228
229bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
230 ShuffleVectorPseudo &MatchInfo) {
231 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
232 unsigned WhichResult;
233 unsigned OperandOrder;
234 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
235 Register Dst = MI.getOperand(0).getReg();
236 unsigned NumElts = MRI.getType(Dst).getNumElements();
237 if (!isZIPMask(ShuffleMask, NumElts, WhichResult, OperandOrder))
238 return false;
239 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2;
240 Register V1 = MI.getOperand(OperandOrder == 0 ? 1 : 2).getReg();
241 Register V2 = MI.getOperand(OperandOrder == 0 ? 2 : 1).getReg();
242 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
243 return true;
244}
245
246/// Helper function for matchDup.
247bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
249 ShuffleVectorPseudo &MatchInfo) {
250 if (Lane != 0)
251 return false;
252
253 // Try to match a vector splat operation into a dup instruction.
254 // We're looking for this pattern:
255 //
256 // %scalar:gpr(s64) = COPY $x0
257 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF
258 // %cst0:gpr(s32) = G_CONSTANT i32 0
259 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
260 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
261 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
262 // %zerovec(<2 x s32>)
263 //
264 // ...into:
265 // %splat = G_DUP %scalar
266
267 // Begin matching the insert.
268 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT,
269 MI.getOperand(1).getReg(), MRI);
270 if (!InsMI)
271 return false;
272 // Match the undef vector operand.
273 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(),
274 MRI))
275 return false;
276
277 // Match the index constant 0.
278 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt()))
279 return false;
280
281 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(),
282 {InsMI->getOperand(2).getReg()});
283 return true;
284}
285
286/// Helper function for matchDup.
287bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
289 ShuffleVectorPseudo &MatchInfo) {
290 assert(Lane >= 0 && "Expected positive lane?");
291 int NumElements = MRI.getType(MI.getOperand(1).getReg()).getNumElements();
292 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
293 // lane's definition directly.
294 auto *BuildVecMI =
295 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR,
296 MI.getOperand(Lane < NumElements ? 1 : 2).getReg(), MRI);
297 // If Lane >= NumElements then it is point to RHS, just check from RHS
298 if (NumElements <= Lane)
299 Lane -= NumElements;
300
301 if (!BuildVecMI)
302 return false;
303 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg();
304 MatchInfo =
305 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg});
306 return true;
307}
308
309bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
310 ShuffleVectorPseudo &MatchInfo) {
311 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
312 auto MaybeLane = getSplatIndex(MI);
313 if (!MaybeLane)
314 return false;
315 int Lane = *MaybeLane;
316 // If this is undef splat, generate it via "just" vdup, if possible.
317 if (Lane < 0)
318 Lane = 0;
319 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo))
320 return true;
321 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo))
322 return true;
323 return false;
324}
325
326// Check if an EXT instruction can handle the shuffle mask when the vector
327// sources of the shuffle are the same.
328bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
329 unsigned NumElts = Ty.getNumElements();
330
331 // Assume that the first shuffle index is not UNDEF. Fail if it is.
332 if (M[0] < 0)
333 return false;
334
335 // If this is a VEXT shuffle, the immediate value is the index of the first
336 // element. The other shuffle indices must be the successive elements after
337 // the first one.
338 unsigned ExpectedElt = M[0];
339 for (unsigned I = 1; I < NumElts; ++I) {
340 // Increment the expected index. If it wraps around, just follow it
341 // back to index zero and keep going.
342 ++ExpectedElt;
343 if (ExpectedElt == NumElts)
344 ExpectedElt = 0;
345
346 if (M[I] < 0)
347 continue; // Ignore UNDEF indices.
348 if (ExpectedElt != static_cast<unsigned>(M[I]))
349 return false;
350 }
351
352 return true;
353}
354
355bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
356 ShuffleVectorPseudo &MatchInfo) {
357 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
358 Register Dst = MI.getOperand(0).getReg();
359 LLT DstTy = MRI.getType(Dst);
360 Register V1 = MI.getOperand(1).getReg();
361 Register V2 = MI.getOperand(2).getReg();
362 auto Mask = MI.getOperand(3).getShuffleMask();
363 uint64_t Imm;
364 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements());
365 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8;
366
367 if (!ExtInfo) {
368 if (!getOpcodeDef<GImplicitDef>(V2, MRI) ||
369 !isSingletonExtMask(Mask, DstTy))
370 return false;
371
372 Imm = Mask[0] * ExtFactor;
373 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm});
374 return true;
375 }
376 bool ReverseExt;
377 std::tie(ReverseExt, Imm) = *ExtInfo;
378 if (ReverseExt)
379 std::swap(V1, V2);
380 Imm *= ExtFactor;
381 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm});
382 return true;
383}
384
385/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
386/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
387void applyShuffleVectorPseudo(MachineInstr &MI, MachineRegisterInfo &MRI,
388 ShuffleVectorPseudo &MatchInfo) {
389 MachineIRBuilder MIRBuilder(MI);
390 if (MatchInfo.Opc == TargetOpcode::G_BSWAP) {
391 assert(MatchInfo.SrcOps.size() == 1);
392 LLT DstTy = MRI.getType(MatchInfo.Dst);
393 assert(DstTy == LLT::fixed_vector(8, 8) ||
394 DstTy == LLT::fixed_vector(16, 8));
395 LLT BSTy = DstTy == LLT::fixed_vector(8, 8)
398 // FIXME: NVCAST
399 auto BS1 = MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {BSTy},
400 MatchInfo.SrcOps[0]);
401 auto BS2 = MIRBuilder.buildInstr(MatchInfo.Opc, {BSTy}, {BS1});
402 MIRBuilder.buildInstr(TargetOpcode::G_BITCAST, {MatchInfo.Dst}, {BS2});
403 } else
404 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
405 MI.eraseFromParent();
406}
407
408/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
409/// Special-cased because the constant operand must be emitted as a G_CONSTANT
410/// for the imported tablegen patterns to work.
411void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
412 MachineIRBuilder MIRBuilder(MI);
413 if (MatchInfo.SrcOps[2].getImm() == 0)
414 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]);
415 else {
416 // Tablegen patterns expect an i32 G_CONSTANT as the final op.
417 auto Cst = MIRBuilder.buildConstant(LLT::integer(32),
418 MatchInfo.SrcOps[2].getImm());
419 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
420 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
421 }
422 MI.eraseFromParent();
423}
424
425void applyFullRev(MachineInstr &MI, MachineRegisterInfo &MRI) {
426 Register Dst = MI.getOperand(0).getReg();
427 Register Src = MI.getOperand(1).getReg();
428 LLT DstTy = MRI.getType(Dst);
429 assert(DstTy.getSizeInBits() == 128 &&
430 "Expected 128bit vector in applyFullRev");
431 MachineIRBuilder MIRBuilder(MI);
432 auto Cst = MIRBuilder.buildConstant(LLT::integer(32), 8);
433 auto Rev = MIRBuilder.buildInstr(AArch64::G_REV64, {DstTy}, {Src});
434 MIRBuilder.buildInstr(AArch64::G_EXT, {Dst}, {Rev, Rev, Cst});
435 MI.eraseFromParent();
436}
437
438bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) {
439 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
440
441 auto ValAndVReg =
442 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI);
443 return !ValAndVReg;
444}
445
446void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI,
447 MachineIRBuilder &Builder) {
448 auto &Insert = cast<GInsertVectorElement>(MI);
449 Builder.setInstrAndDebugLoc(Insert);
450
451 Register Offset = Insert.getIndexReg();
452 LLT VecTy = MRI.getType(Insert.getReg(0));
453 LLT EltTy = MRI.getType(Insert.getElementReg());
454 LLT IdxTy = MRI.getType(Insert.getIndexReg());
455
456 if (VecTy.isScalableVector())
457 return;
458
459 // Create a stack slot and store the vector into it
460 MachineFunction &MF = Builder.getMF();
461 Align Alignment(
462 std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16));
463 int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(),
464 Alignment, false);
465 LLT FramePtrTy = LLT::pointer(0, 64);
467 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx);
468
469 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8));
470
471 // Get the pointer to the element, and be sure not to hit undefined behavior
472 // if the index is out of bounds.
474 "Expected a power-2 vector size");
475 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
476 Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0);
477 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes());
478 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0);
479 Register EltPtr =
480 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul)
481 .getReg(0);
482
483 // Write the inserted element
484 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1));
485 // Reload the whole vector.
486 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8));
487 Insert.eraseFromParent();
488}
489
490/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
491/// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair.
492///
493/// e.g.
494/// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0)
495///
496/// Can be represented as
497///
498/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
499/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
500///
501bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
502 std::tuple<Register, int, Register, int> &MatchInfo) {
503 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
504 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
505 Register Dst = MI.getOperand(0).getReg();
506 int NumElts = MRI.getType(Dst).getNumElements();
507 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts);
508 if (!DstIsLeftAndDstLane)
509 return false;
510 bool DstIsLeft;
511 int DstLane;
512 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane;
513 Register Left = MI.getOperand(1).getReg();
514 Register Right = MI.getOperand(2).getReg();
515 Register DstVec = DstIsLeft ? Left : Right;
516 Register SrcVec = Left;
517
518 int SrcLane = ShuffleMask[DstLane];
519 if (SrcLane >= NumElts) {
520 SrcVec = Right;
521 SrcLane -= NumElts;
522 }
523
524 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane);
525 return true;
526}
527
528void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
529 MachineIRBuilder &Builder,
530 std::tuple<Register, int, Register, int> &MatchInfo) {
531 Builder.setInstrAndDebugLoc(MI);
532 Register Dst = MI.getOperand(0).getReg();
533 auto ScalarTy = MRI.getType(Dst).getElementType();
534 Register DstVec, SrcVec;
535 int DstLane, SrcLane;
536 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo;
537 auto SrcCst = Builder.buildConstant(LLT::integer(64), SrcLane);
538 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst);
539 auto DstCst = Builder.buildConstant(LLT::integer(64), DstLane);
540 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
541 MI.eraseFromParent();
542}
543
544/// isVShiftRImm - Check if this is a valid vector for the immediate
545/// operand of a vector shift right operation. The value must be in the range:
546/// 1 <= Value <= ElementBits for a right shift.
548 int64_t &Cnt) {
549 assert(Ty.isVector() && "vector shift count is not a vector type");
551 auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
552 if (!Cst)
553 return false;
554 Cnt = *Cst;
555 int64_t ElementBits = Ty.getScalarSizeInBits();
556 return Cnt >= 1 && Cnt <= ElementBits;
557}
558
559/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
560bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
561 int64_t &Imm) {
562 assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
563 MI.getOpcode() == TargetOpcode::G_LSHR);
564 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
565 if (!Ty.isVector())
566 return false;
567 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
568}
569
570void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
571 int64_t &Imm) {
572 unsigned Opc = MI.getOpcode();
573 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
574 unsigned NewOpc =
575 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR;
576 MachineIRBuilder MIB(MI);
577 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1)}).addImm(Imm);
578 MI.eraseFromParent();
579}
580
581bool isLegalCmpImmed(const APInt &C) {
582 // Works for negative immediates too, as it can be written as an ADDS
583 // instruction with a negated immediate.
584 return isLegalArithImmed(C.abs().getZExtValue());
585}
586
587/// Determine whether an integer G_ICMP against 1 or -1 can compare
588/// against 0 instead.
589///
590/// AArch64 can fold a compare-with-zero more cheaply than some non-arithmetic
591/// immediates (SUBS/ADDS, or TST when the LHS is an AND). When the predicate
592/// can be adjusted without changing semantics, the RHS may become 0.
593///
594/// Supported transforms (signed predicates only):
595/// (and X, Y) slt 1 => (and X, Y) sle 0
596/// (and X, Y) sge 1 => (and X, Y) sgt 0
597/// X sle -1 => X slt 0
598/// X sgt -1 => X sge 0
599///
600/// The compare-against-1 cases require the LHS to be G_AND because the
601/// compare-with-zero path enables ANDS (TST) selection, and ANDS flags are
602/// only reliable for those signed comparisons. This mirrors SelectionDAG
603/// emitComparison().
604///
605/// For compare-against--1 on a non-AND LHS, \p LHS must have a single
606/// non-debug use so other users are not left with a different immediate.
607///
608/// \param LHS The compare LHS register.
609/// \param C The constant RHS (only 1 or all-ones are considered).
610/// \param P In/out predicate; updated when a transform applies.
611/// \param MRI Used to inspect the LHS definition and use count.
612/// \returns true if \p P was updated and comparing against 0 is equivalent.
613static bool shouldBeAdjustedToZero(Register LHS, const APInt &C,
615 const MachineRegisterInfo &MRI) {
616 const bool IsAndLHS = getOpcodeDef<GAnd>(LHS, MRI) != nullptr;
617
618 if (C.isOne() && (P == CmpInst::ICMP_SLT || P == CmpInst::ICMP_SGE) &&
619 IsAndLHS) {
621 return true;
622 }
623
624 if (!IsAndLHS && !MRI.hasOneNonDBGUse(LHS))
625 return false;
626
627 if (C.isAllOnes() && (P == CmpInst::ICMP_SLE || P == CmpInst::ICMP_SGT)) {
629 return true;
630 }
631 return false;
632}
633
634/// Determine if it is possible to modify the \p RHS and predicate \p P of a
635/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
636///
637/// \returns A pair containing the updated immediate and predicate which may
638/// be used to optimize the instruction.
639///
640/// \note This assumes that the comparison has been legalized.
641std::optional<std::pair<uint64_t, CmpInst::Predicate>>
642tryAdjustICmpImmAndPred(Register LHS, Register RHS, CmpInst::Predicate P,
643 const MachineRegisterInfo &MRI) {
644 const auto &Ty = MRI.getType(RHS);
645 if (Ty.isVector())
646 return std::nullopt;
647 assert((Ty.getSizeInBits() == 32 || Ty.getSizeInBits() == 64) &&
648 "Expected 32 or 64 bit compare only?");
649
650 // If the RHS is not a constant, or the RHS is already a valid arithmetic
651 // immediate, then there is nothing to change.
652 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
653 if (!ValAndVReg)
654 return std::nullopt;
655 APInt C = ValAndVReg->Value;
656 if (shouldBeAdjustedToZero(LHS, C, P, MRI))
657 return {{0, P}};
658
659 if (isLegalCmpImmed(C))
660 return std::nullopt;
661
662 uint64_t OriginalC = C.getZExtValue();
663
664 // We have a non-arithmetic immediate. Check if adjusting the immediate and
665 // adjusting the predicate will result in a legal arithmetic immediate.
666 switch (P) {
667 default:
668 return std::nullopt;
671 // Check for
672 //
673 // x slt c => x sle c - 1
674 // x sge c => x sgt c - 1
675 //
676 // When c is not the smallest possible negative number.
677 if (C.isMinSignedValue())
678 return std::nullopt;
680 C = C - 1;
681 break;
684 // Check for
685 //
686 // x ult c => x ule c - 1
687 // x uge c => x ugt c - 1
688 //
689 // When c is not zero.
690 assert(!C.isZero() && "C should not be zero here!");
692 C = C - 1;
693 break;
696 // Check for
697 //
698 // x sle c => x slt c + 1
699 // x sgt c => s sge c + 1
700 //
701 // When c is not the largest possible signed integer.
702 if (C.isMaxSignedValue())
703 return std::nullopt;
705 C = C + 1;
706 break;
709 // Check for
710 //
711 // x ule c => x ult c + 1
712 // x ugt c => s uge c + 1
713 //
714 // When c is not the largest possible unsigned integer.
715 if (C.isAllOnes())
716 return std::nullopt;
718 C = C + 1;
719 break;
720 }
721
722 // Check if the new constant is valid, and return the updated constant and
723 // predicate if it is.
724 uint64_t NewC = C.getZExtValue();
725 if (isLegalCmpImmed(C))
726 return {{NewC, P}};
727
728 auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {
730 AArch64_IMM::expandMOVImm(Imm, 32, Insn);
731 return Insn.size();
732 };
733
734 if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(NewC))
735 return {{NewC, P}};
736
737 return std::nullopt;
738}
739
740/// Determine whether or not it is possible to update the RHS and predicate of
741/// a G_ICMP instruction such that the RHS will be selected as an arithmetic
742/// immediate.
743///
744/// \p MI - The G_ICMP instruction
745/// \p MatchInfo - The new RHS immediate and predicate on success
746///
747/// See tryAdjustICmpImmAndPred for valid transformations.
748bool matchAdjustICmpImmAndPred(
750 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
751 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
752 Register LHS = MI.getOperand(2).getReg();
753 Register RHS = MI.getOperand(3).getReg();
754 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
755 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(LHS, RHS, Pred, MRI)) {
756 MatchInfo = *MaybeNewImmAndPred;
757 return true;
758 }
759 return false;
760}
761
762void applyAdjustICmpImmAndPred(
763 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
764 MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
766 MachineOperand &RHS = MI.getOperand(3);
767 MachineRegisterInfo &MRI = *MIB.getMRI();
768 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()),
769 MatchInfo.first);
770 Observer.changingInstr(MI);
771 RHS.setReg(Cst->getOperand(0).getReg());
772 MI.getOperand(1).setPredicate(MatchInfo.second);
773 Observer.changedInstr(MI);
774}
775
776bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
777 std::pair<unsigned, int> &MatchInfo) {
778 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
779 Register Src1Reg = MI.getOperand(1).getReg();
780 const LLT SrcTy = MRI.getType(Src1Reg);
781 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
782
783 auto LaneIdx = getSplatIndex(MI);
784 if (!LaneIdx)
785 return false;
786
787 // The lane idx should be within the first source vector.
788 if (*LaneIdx >= SrcTy.getNumElements())
789 return false;
790
791 if (DstTy != SrcTy)
792 return false;
793
794 LLT ScalarTy = SrcTy.getElementType();
795 unsigned ScalarSize = ScalarTy.getSizeInBits();
796
797 unsigned Opc = 0;
798 switch (SrcTy.getNumElements()) {
799 case 2:
800 if (ScalarSize == 64)
801 Opc = AArch64::G_DUPLANE64;
802 else if (ScalarSize == 32)
803 Opc = AArch64::G_DUPLANE32;
804 break;
805 case 4:
806 if (ScalarSize == 32)
807 Opc = AArch64::G_DUPLANE32;
808 else if (ScalarSize == 16)
809 Opc = AArch64::G_DUPLANE16;
810 break;
811 case 8:
812 if (ScalarSize == 8)
813 Opc = AArch64::G_DUPLANE8;
814 else if (ScalarSize == 16)
815 Opc = AArch64::G_DUPLANE16;
816 break;
817 case 16:
818 if (ScalarSize == 8)
819 Opc = AArch64::G_DUPLANE8;
820 break;
821 default:
822 break;
823 }
824 if (!Opc)
825 return false;
826
827 MatchInfo.first = Opc;
828 MatchInfo.second = *LaneIdx;
829 return true;
830}
831
832void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
833 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
834 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
835 Register Src1Reg = MI.getOperand(1).getReg();
836 const LLT SrcTy = MRI.getType(Src1Reg);
837
838 B.setInstrAndDebugLoc(MI);
839 auto Lane = B.buildConstant(LLT::integer(64), MatchInfo.second);
840
841 Register DupSrc = MI.getOperand(1).getReg();
842 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source.
843 // To do this, we can use a G_CONCAT_VECTORS to do the widening.
844 if (SrcTy.getSizeInBits() == 64) {
845 auto Undef = B.buildUndef(SrcTy);
846 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2),
847 {Src1Reg, Undef.getReg(0)})
848 .getReg(0);
849 }
850 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
851 MI.eraseFromParent();
852}
853
854bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) {
855 auto &Unmerge = cast<GUnmerge>(MI);
856 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
857 const LLT SrcTy = MRI.getType(Src1Reg);
858 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64)
859 return false;
860 return SrcTy.isVector() && !SrcTy.isScalable() &&
861 (Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1 ||
862 (Unmerge.getNumDefs() == 2 && SrcTy.getSizeInBits() == 128 &&
863 MRI.getType(Unmerge.getReg(0)).getSizeInBits() == 64));
864}
865
866void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
868 auto &Unmerge = cast<GUnmerge>(MI);
869 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1);
870 const LLT SrcTy = MRI.getType(Src1Reg);
871 const LLT DstTy = MRI.getType(Unmerge.getReg(0));
872 assert((SrcTy.isVector() && !SrcTy.isScalable()) &&
873 "Expected a fixed length vector");
874
875 if (DstTy.isVector()) {
876 assert(Unmerge.getNumDefs() == 2);
877 if (!MRI.use_nodbg_empty(Unmerge.getReg(0)))
878 B.buildExtractSubvector(Unmerge.getReg(0), Src1Reg, 0);
879 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
880 B.buildExtractSubvector(Unmerge.getReg(1), Src1Reg,
881 SrcTy.getNumElements() / 2);
882 } else {
883 for (int I = 0; I < SrcTy.getNumElements(); ++I)
884 if (!MRI.use_nodbg_empty(Unmerge.getReg(I)))
885 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I);
886 }
887 MI.eraseFromParent();
888}
889
890bool matchBuildVectorToDup(MachineInstr &MI, Register &Src,
891 MachineRegisterInfo &MRI) {
892 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
893
894 // Later, during selection, we'll try to match imported patterns using
895 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower
896 // G_BUILD_VECTORs which could match those patterns.
898 return false;
899
900 // Find buildvector which always uses the same register or undef. Return true
901 // so long as at least 2 registers were found (not all-undef or only 1
902 // non-undef entry).
903 Register Reg = 0;
904 unsigned NumNonUndef = 0;
905 for (const MachineOperand &Op : drop_begin(MI.operands())) {
906 if (getOpcodeDef<GImplicitDef>(Op.getReg(), MRI))
907 continue;
908
909 if (!Reg)
910 Reg = Op.getReg();
911 else if (Op.getReg() != Reg)
912 return false;
913 NumNonUndef++;
914 }
915
916 Src = Reg;
917 return Reg && NumNonUndef > 1;
918}
919
920void applyBuildVectorToDup(MachineInstr &MI, Register Src,
922 B.setInstrAndDebugLoc(MI);
923 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()}, {Src});
924 MI.eraseFromParent();
925}
926
927/// \returns how many instructions would be saved by folding a G_ICMP's shift
928/// and/or extension operations.
929static unsigned getCmpOperandFoldingProfit(Register CmpOp,
930 MachineRegisterInfo &MRI) {
931 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister)
932 auto IsSupportedExtend = [&](const MachineInstr &MI) {
933 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG)
934 return true;
935 if (MI.getOpcode() == TargetOpcode::G_AND) {
936 auto ValAndVReg =
937 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
938 if (ValAndVReg) {
939 uint64_t Mask = ValAndVReg->Value.getZExtValue();
940 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
941 }
942 }
943 return false;
944 };
945
946 // No instructions to save if there's more than one use or no uses.
947 if (!MRI.hasOneNonDBGUse(CmpOp))
948 return 0;
949
950 MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI);
951 if (IsSupportedExtend(*Def))
952 return 1;
953
954 unsigned Opc = Def->getOpcode();
955 if (Opc == TargetOpcode::G_SHL || Opc == TargetOpcode::G_LSHR ||
956 Opc == TargetOpcode::G_ASHR) {
957 auto MaybeShiftAmt =
958 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI);
959 if (MaybeShiftAmt) {
960 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue();
961 MachineInstr *ShiftLHS =
962 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI);
963 if (IsSupportedExtend(*ShiftLHS))
964 return (ShiftAmt <= 4) ? 2 : 1;
965 LLT Ty = MRI.getType(Def->getOperand(0).getReg());
966 if (Ty.isVector())
967 return 0;
968 unsigned ShiftSize = Ty.getSizeInBits();
969 if ((ShiftSize == 32 && ShiftAmt <= 31) ||
970 (ShiftSize == 64 && ShiftAmt <= 63))
971 return 1;
972 }
973 }
974
975 return 0;
976}
977
978/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
979/// instruction \p MI.
980bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
981 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
982 // Swap the operands if it would introduce a profitable folding opportunity.
983 // (e.g. a shift + extend).
984 //
985 // For example:
986 // lsl w13, w11, #1
987 // cmp w13, w12
988 // can be turned into:
989 // cmp w12, w11, lsl #1
990
991 // Don't swap if there's a constant on the RHS and it is a legal compare
992 // immediate, because we know we can fold that.
993 Register RHS = MI.getOperand(3).getReg();
994 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI);
995 if (RHSCst && isLegalCmpImmed(RHSCst->Value))
996 return false;
997
998 Register LHS = MI.getOperand(2).getReg();
999 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1000 auto GetRegForProfit = [&](Register Reg) {
1002 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg;
1003 };
1004
1005 // Don't have a constant on the RHS. If we swap the LHS and RHS of the
1006 // compare, would we be able to fold more instructions?
1007 Register TheLHS = GetRegForProfit(LHS);
1008 Register TheRHS = GetRegForProfit(RHS);
1009
1010 // If the LHS is more likely to give us a folding opportunity, then swap the
1011 // LHS and RHS.
1012 return (getCmpOperandFoldingProfit(TheLHS, MRI) >
1013 getCmpOperandFoldingProfit(TheRHS, MRI));
1014}
1015
1016void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
1017 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1018 Register LHS = MI.getOperand(2).getReg();
1019 Register RHS = MI.getOperand(3).getReg();
1020 Observer.changedInstr(MI);
1021 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred));
1022 MI.getOperand(2).setReg(RHS);
1023 MI.getOperand(3).setReg(LHS);
1024 Observer.changedInstr(MI);
1025}
1026
1027/// \returns a function which builds a vector floating point compare instruction
1028/// for a condition code \p CC.
1029/// \param [in] NoNans - True if the instruction has nnan flag.
1030std::function<Register(MachineIRBuilder &)>
1031getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool NoNans,
1032 MachineRegisterInfo &MRI) {
1033 LLT OldTy = MRI.getType(LHS);
1034 LLT DstTy = LLT::fixed_vector(OldTy.getNumElements(),
1036 assert(DstTy.isVector() && "Expected vector types only?");
1037 switch (CC) {
1038 default:
1039 llvm_unreachable("Unexpected condition code!");
1040 case AArch64CC::NE:
1041 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1042 auto FCmp = MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS});
1043 return MIB.buildNot(DstTy, FCmp).getReg(0);
1044 };
1045 case AArch64CC::EQ:
1046 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1047 return MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}).getReg(0);
1048 };
1049 case AArch64CC::GE:
1050 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1051 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}).getReg(0);
1052 };
1053 case AArch64CC::GT:
1054 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1055 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}).getReg(0);
1056 };
1057 case AArch64CC::LS:
1058 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1059 return MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}).getReg(0);
1060 };
1061 case AArch64CC::MI:
1062 return [LHS, RHS, DstTy](MachineIRBuilder &MIB) {
1063 return MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}).getReg(0);
1064 };
1065 }
1066}
1067
1068/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1069bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1070 MachineIRBuilder &MIB) {
1071 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1072 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
1073
1074 Register Dst = MI.getOperand(0).getReg();
1075 LLT DstTy = MRI.getType(Dst);
1076 if (!DstTy.isVector() || !ST.hasNEON())
1077 return false;
1078 Register LHS = MI.getOperand(2).getReg();
1079 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
1080 if (EltSize == 16 && !ST.hasFullFP16())
1081 return false;
1082 if (EltSize != 16 && EltSize != 32 && EltSize != 64)
1083 return false;
1084
1085 return true;
1086}
1087
1088/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
1089void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
1090 MachineIRBuilder &MIB) {
1091 assert(MI.getOpcode() == TargetOpcode::G_FCMP);
1092
1093 const auto &CmpMI = cast<GFCmp>(MI);
1094
1095 Register Dst = CmpMI.getReg(0);
1096 CmpInst::Predicate Pred = CmpMI.getCond();
1097 Register LHS = CmpMI.getLHSReg();
1098 Register RHS = CmpMI.getRHSReg();
1099
1100 LLT DstTy = MRI.getType(Dst);
1101
1102 bool Invert = false;
1104 if ((Pred == CmpInst::Predicate::FCMP_ORD ||
1106 isBuildVectorAllZeros(*MRI.getVRegDef(RHS), MRI)) {
1107 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't
1108 // NaN, so equivalent to a == a and doesn't need the two comparisons an
1109 // "ord" normally would.
1110 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is
1111 // thus equivalent to a != a.
1112 RHS = LHS;
1114 } else
1115 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
1116
1117 // Instead of having an apply function, just build here to simplify things.
1119
1120 // TODO: Also consider GISelValueTracking result if eligible.
1121 const bool NoNans = MI.getFlag(MachineInstr::FmNoNans);
1122
1123 auto Cmp = getVectorFCMP(CC, LHS, RHS, NoNans, MRI);
1124 Register CmpRes;
1125 if (CC2 == AArch64CC::AL)
1126 CmpRes = Cmp(MIB);
1127 else {
1128 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, NoNans, MRI);
1129 auto Cmp2Dst = Cmp2(MIB);
1130 auto Cmp1Dst = Cmp(MIB);
1131 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0);
1132 }
1133 if (Invert)
1134 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
1135 MRI.replaceRegWith(Dst, CmpRes);
1136 MI.eraseFromParent();
1137}
1138
1139// Matches G_BUILD_VECTOR where at least one source operand is not a constant
1140bool matchLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI) {
1141 auto *GBuildVec = cast<GBuildVector>(&MI);
1142
1143 // Check if the values are all constants
1144 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1145 auto ConstVal =
1146 getAnyConstantVRegValWithLookThrough(GBuildVec->getSourceReg(I), MRI);
1147
1148 if (!ConstVal.has_value())
1149 return true;
1150 }
1151
1152 return false;
1153}
1154
1155void applyLowerBuildToInsertVecElt(MachineInstr &MI, MachineRegisterInfo &MRI,
1157 auto *GBuildVec = cast<GBuildVector>(&MI);
1158 LLT DstTy = MRI.getType(GBuildVec->getReg(0));
1159 Register DstReg = B.buildUndef(DstTy).getReg(0);
1160
1161 for (unsigned I = 0; I < GBuildVec->getNumSources(); ++I) {
1162 Register SrcReg = GBuildVec->getSourceReg(I);
1163 if (mi_match(SrcReg, MRI, m_GImplicitDef()))
1164 continue;
1165 auto IdxReg = B.buildConstant(LLT::integer(64), I);
1166 DstReg =
1167 B.buildInsertVectorElement(DstTy, DstReg, SrcReg, IdxReg).getReg(0);
1168 }
1169 B.buildCopy(GBuildVec->getReg(0), DstReg);
1170 GBuildVec->eraseFromParent();
1171}
1172
1173bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1174 Register &SrcReg) {
1175 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1176 Register DstReg = MI.getOperand(0).getReg();
1177 if (MRI.getType(DstReg).isVector())
1178 return false;
1179 // Match a store of a truncate.
1180 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg))))
1181 return false;
1182 // Only form truncstores for value types of max 64b.
1183 return MRI.getType(SrcReg).getSizeInBits() <= 64;
1184}
1185
1186void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
1188 Register &SrcReg) {
1189 assert(MI.getOpcode() == TargetOpcode::G_STORE);
1190 Observer.changingInstr(MI);
1191 MI.getOperand(0).setReg(SrcReg);
1192 Observer.changedInstr(MI);
1193}
1194
1195// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
1196// form in the first place for combine opportunities, so any remaining ones
1197// at this stage need be lowered back.
1198bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
1199 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1200 Register DstReg = MI.getOperand(0).getReg();
1201 LLT DstTy = MRI.getType(DstReg);
1202 return DstTy.isVector();
1203}
1204
1205void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
1207 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1208 B.setInstrAndDebugLoc(MI);
1209 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1210 Helper.lower(MI, 0, /* Unused hint type */ LLT());
1211}
1212
1213/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1214/// => unused, <N x t> = unmerge v
1215bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1216 Register &MatchInfo) {
1217 auto &Unmerge = cast<GUnmerge>(MI);
1218 if (Unmerge.getNumDefs() != 2)
1219 return false;
1220 if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1221 return false;
1222
1223 LLT DstTy = MRI.getType(Unmerge.getReg(0));
1224 if (!DstTy.isVector())
1225 return false;
1226
1227 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1228 if (!Ext)
1229 return false;
1230
1231 Register ExtSrc1 = Ext->getOperand(1).getReg();
1232 Register ExtSrc2 = Ext->getOperand(2).getReg();
1233 auto LowestVal =
1235 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1236 return false;
1237
1238 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1239 return false;
1240
1241 MatchInfo = ExtSrc1;
1242 return true;
1243}
1244
1245void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1247 GISelChangeObserver &Observer, Register &SrcReg) {
1248 Observer.changingInstr(MI);
1249 // Swap dst registers.
1250 Register Dst1 = MI.getOperand(0).getReg();
1251 MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1252 MI.getOperand(1).setReg(Dst1);
1253 MI.getOperand(2).setReg(SrcReg);
1254 Observer.changedInstr(MI);
1255}
1256
1257// Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR
1258// Match v2s64 mul instructions, which will then be scalarised later on
1259// Doing these two matches in one function to ensure that the order of matching
1260// will always be the same.
1261// Try lowering MUL to MULL before trying to scalarize if needed.
1262bool matchMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI) {
1263 // Get the instructions that defined the source operand
1264 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1265 return DstTy == LLT::fixed_vector(2, 64);
1266}
1267
1268void applyMulv2s64(MachineInstr &MI, MachineRegisterInfo &MRI,
1270 assert(MI.getOpcode() == TargetOpcode::G_MUL &&
1271 "Expected a G_MUL instruction");
1272
1273 // Get the instructions that defined the source operand
1274 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1275 assert(DstTy == LLT::fixed_vector(2, 64) && "Expected v2s64 Mul");
1276 LegalizerHelper Helper(*MI.getMF(), Observer, B);
1277 Helper.fewerElementsVector(
1278 MI, 0,
1280}
1281
1282class AArch64PostLegalizerLoweringImpl : public Combiner {
1283protected:
1284 const CombinerHelper Helper;
1285 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
1286 const AArch64Subtarget &STI;
1287
1288public:
1289 AArch64PostLegalizerLoweringImpl(
1290 MachineFunction &MF, CombinerInfo &CInfo, GISelCSEInfo *CSEInfo,
1291 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1292 const AArch64Subtarget &STI);
1293
1294 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
1295
1296 bool tryCombineAll(MachineInstr &I) const override;
1297
1298private:
1299#define GET_GICOMBINER_CLASS_MEMBERS
1300#include "AArch64GenPostLegalizeGILowering.inc"
1301#undef GET_GICOMBINER_CLASS_MEMBERS
1302};
1303
1304#define GET_GICOMBINER_IMPL
1305#include "AArch64GenPostLegalizeGILowering.inc"
1306#undef GET_GICOMBINER_IMPL
1307
1308AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
1309 MachineFunction &MF, CombinerInfo &CInfo, GISelCSEInfo *CSEInfo,
1310 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
1311 const AArch64Subtarget &STI)
1312 : Combiner(MF, CInfo, /*VT*/ nullptr, CSEInfo),
1313 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig),
1314 STI(STI),
1316#include "AArch64GenPostLegalizeGILowering.inc"
1318{
1319}
1320
1321bool runPostLegalizerLowering(
1322 MachineFunction &MF,
1323 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig) {
1324 if (MF.getProperties().hasFailedISel())
1325 return false;
1326 const Function &F = MF.getFunction();
1327
1329 CombinerInfo CInfo(/*AllowIllegalOps=*/true, /*ShouldLegalizeIllegal=*/false,
1330 /*LegalizerInfo=*/nullptr, /*OptEnabled=*/true,
1331 F.hasOptSize(), F.hasMinSize());
1332 // Disable fixed-point iteration to reduce compile-time
1333 CInfo.MaxIterations = 1;
1334 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
1335 // PostLegalizerCombiner performs DCE, so a full DCE pass is unnecessary.
1336 CInfo.EnableFullDCE = false;
1337 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, /*CSEInfo=*/nullptr,
1338 RuleConfig, ST);
1339 return Impl.combineMachineInstrs();
1340}
1341
1342class AArch64PostLegalizerLoweringLegacy : public MachineFunctionPass {
1343public:
1344 static char ID;
1345
1346 AArch64PostLegalizerLoweringLegacy();
1347
1348 StringRef getPassName() const override {
1349 return "AArch64PostLegalizerLowering";
1350 }
1351
1352 bool runOnMachineFunction(MachineFunction &MF) override;
1353 void getAnalysisUsage(AnalysisUsage &AU) const override;
1354
1355private:
1356 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
1357};
1358} // end anonymous namespace
1359
1360void AArch64PostLegalizerLoweringLegacy::getAnalysisUsage(
1361 AnalysisUsage &AU) const {
1362 AU.setPreservesCFG();
1365}
1366
1367AArch64PostLegalizerLoweringLegacy::AArch64PostLegalizerLoweringLegacy()
1368 : MachineFunctionPass(ID) {
1369 if (!RuleConfig.parseCommandLineOption())
1370 report_fatal_error("Invalid rule identifier");
1371}
1372
1373bool AArch64PostLegalizerLoweringLegacy::runOnMachineFunction(
1374 MachineFunction &MF) {
1375 assert(MF.getProperties().hasLegalized() && "Expected a legalized function?");
1376 return runPostLegalizerLowering(MF, RuleConfig);
1377}
1378
1379char AArch64PostLegalizerLoweringLegacy::ID = 0;
1380INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLoweringLegacy, DEBUG_TYPE,
1381 "Lower AArch64 MachineInstrs after legalization", false,
1382 false)
1383INITIALIZE_PASS_END(AArch64PostLegalizerLoweringLegacy, DEBUG_TYPE,
1384 "Lower AArch64 MachineInstrs after legalization", false,
1385 false)
1386
1388 : RuleConfig(
1389 std::make_unique<AArch64PostLegalizerLoweringImplRuleConfig>()) {
1390 if (!RuleConfig->parseCommandLineOption())
1391 reportFatalUsageError("invalid rule identifier");
1392}
1393
1396
1398
1402 MFPropsModifier _(*this, MF);
1403 const bool Changed = runPostLegalizerLowering(MF, *RuleConfig);
1404
1405 if (!Changed)
1406 return PreservedAnalyses::all();
1407
1410 return PA;
1411}
1412
1413namespace llvm {
1415 return new AArch64PostLegalizerLoweringLegacy();
1416}
1417} // end namespace llvm
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isINSMask(ArrayRef< int > M, int NumInputElements, bool &DstIsLeft, int &Anomaly)
static unsigned getCmpOperandFoldingProfit(SDValue Op, bool AllowExtend)
Returns how profitable it is to fold a comparison's operand's shift and/or extension operations.
static bool shouldBeAdjustedToZero(SDValue LHS, const APInt &C, ISD::CondCode &CC)
bool isLegalCmpImmed(const APInt &C)
This file declares the targeting of the Machinelegalizer class for AArch64.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
This contains common code to allow clients to notify changes to machine instr.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define P(N)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Value * RHS
Value * LHS
BinaryOperator * Mul
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned logBase2() const
Definition APInt.h:1784
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:749
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:750
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Combiner implementation.
Definition Combiner.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The CSE Analysis object.
Definition CSEInfo.h:72
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr ElementCount getElementCount() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
An RAII based helper class to modify MachineFunctionProperties when running pass.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
LLVM_ABI void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
Wrapper class representing virtual and physical registers.
Definition Register.h:20
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr bool isLegalArithImmed(const uint64_t C)
void changeVectorFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2, bool &Invert)
Find the AArch64 condition codes necessary to represent P for a vector floating point comparison.
bool isCMN(const MachineInstr *MaybeSub, const CmpInst::Predicate &Pred, const MachineRegisterInfo &MRI)
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
operand_type_match m_Reg()
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
ImplicitDefMatch m_GImplicitDef()
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1444
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
bool isZIPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for zip1 or zip2 masks of the form: <0, 8, 1, 9, 2, 10, 3, 11> (WhichResultOut = 0,...
@ Undef
Value of the register doesn't matter.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createAArch64PostLegalizerLowering()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isUZPMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut)
Return true for uzp1 or uzp2 masks of the form: <0, 2, 4, 6, 8, 10, 12, 14> or <1,...
bool isREVMask(ArrayRef< int > M, unsigned EltSize, unsigned NumElts, unsigned BlockSize)
isREVMask - Check if a vector shuffle corresponds to a REV instruction with the specified blocksize.
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
LLVM_ABI bool isBuildVectorAllOnes(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1450
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1147
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool isTRNMask(ArrayRef< int > M, unsigned NumElts, unsigned &WhichResultOut, unsigned &OperandOrderOut)
Return true for trn1 or trn2 masks of the form: <0, 8, 2, 10, 4, 12, 6, 14> (WhichResultOut = 0,...
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:861
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:863
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ SinglePass
Enables Observer-based DCE and additional heuristics that retry combining defined and used instructio...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.