LLVM 23.0.0git
AArch64InstructionSelector.cpp
Go to the documentation of this file.
1//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file implements the targeting of the InstructionSelector class for
10/// AArch64.
11/// \todo This should be generated by TableGen.
12//===----------------------------------------------------------------------===//
13
15#include "AArch64InstrInfo.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/IntrinsicsAArch64.h"
46#include "llvm/IR/Type.h"
47#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
50#include <optional>
51
52#define DEBUG_TYPE "aarch64-isel"
53
54using namespace llvm;
55using namespace MIPatternMatch;
56using namespace AArch64GISelUtils;
57
58namespace llvm {
61}
62
63namespace {
64
65#define GET_GLOBALISEL_PREDICATE_BITSET
66#include "AArch64GenGlobalISel.inc"
67#undef GET_GLOBALISEL_PREDICATE_BITSET
68
69
70class AArch64InstructionSelector : public InstructionSelector {
71public:
72 AArch64InstructionSelector(const AArch64TargetMachine &TM,
73 const AArch64Subtarget &STI,
74 const AArch64RegisterBankInfo &RBI);
75
76 bool select(MachineInstr &I) override;
77 static const char *getName() { return DEBUG_TYPE; }
78
79 void setupMF(MachineFunction &MF, GISelValueTracking *VT,
80 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
81 BlockFrequencyInfo *BFI) override {
82 InstructionSelector::setupMF(MF, VT, CoverageInfo, PSI, BFI);
83 MIB.setMF(MF);
84
85 // hasFnAttribute() is expensive to call on every BRCOND selection, so
86 // cache it here for each run of the selector.
87 ProduceNonFlagSettingCondBr =
88 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
89 MFReturnAddr = Register();
90
91 processPHIs(MF);
92 }
93
94private:
95 /// tblgen-erated 'select' implementation, used as the initial selector for
96 /// the patterns that don't require complex C++.
97 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
98
99 // A lowering phase that runs before any selection attempts.
100 // Returns true if the instruction was modified.
101 bool preISelLower(MachineInstr &I);
102
103 // An early selection function that runs before the selectImpl() call.
104 bool earlySelect(MachineInstr &I);
105
106 /// Save state that is shared between select calls, call select on \p I and
107 /// then restore the saved state. This can be used to recursively call select
108 /// within a select call.
109 bool selectAndRestoreState(MachineInstr &I);
110
111 // Do some preprocessing of G_PHIs before we begin selection.
112 void processPHIs(MachineFunction &MF);
113
114 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
115
116 /// Eliminate same-sized cross-bank copies into stores before selectImpl().
117 bool contractCrossBankCopyIntoStore(MachineInstr &I,
119
120 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
121
122 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
123 MachineRegisterInfo &MRI) const;
124 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
125 MachineRegisterInfo &MRI) const;
126
127 ///@{
128 /// Helper functions for selectCompareBranch.
129 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
130 MachineIRBuilder &MIB) const;
131 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
132 MachineIRBuilder &MIB) const;
133 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
134 MachineIRBuilder &MIB) const;
135 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
136 MachineBasicBlock *DstMBB,
137 MachineIRBuilder &MIB) const;
138 ///@}
139
140 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
142
143 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
144 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
145
146 // Helper to generate an equivalent of scalar_to_vector into a new register,
147 // returned via 'Dst'.
148 MachineInstr *emitScalarToVector(unsigned EltSize,
149 const TargetRegisterClass *DstRC,
150 Register Scalar,
151 MachineIRBuilder &MIRBuilder) const;
152 /// Helper to narrow vector that was widened by emitScalarToVector.
153 /// Copy lowest part of 128-bit or 64-bit vector to 64-bit or 32-bit
154 /// vector, correspondingly.
155 MachineInstr *emitNarrowVector(Register DstReg, Register SrcReg,
156 MachineIRBuilder &MIRBuilder,
157 MachineRegisterInfo &MRI) const;
158
159 /// Emit a lane insert into \p DstReg, or a new vector register if
160 /// std::nullopt is provided.
161 ///
162 /// The lane inserted into is defined by \p LaneIdx. The vector source
163 /// register is given by \p SrcReg. The register containing the element is
164 /// given by \p EltReg.
165 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg,
166 Register EltReg, unsigned LaneIdx,
167 const RegisterBank &RB,
168 MachineIRBuilder &MIRBuilder) const;
169
170 /// Emit a sequence of instructions representing a constant \p CV for a
171 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
172 ///
173 /// \returns the last instruction in the sequence on success, and nullptr
174 /// otherwise.
175 MachineInstr *emitConstantVector(Register Dst, Constant *CV,
176 MachineIRBuilder &MIRBuilder,
178
179 MachineInstr *tryAdvSIMDModImm8(Register Dst, unsigned DstSize, APInt Bits,
180 MachineIRBuilder &MIRBuilder);
181
182 MachineInstr *tryAdvSIMDModImm16(Register Dst, unsigned DstSize, APInt Bits,
183 MachineIRBuilder &MIRBuilder, bool Inv);
184
185 MachineInstr *tryAdvSIMDModImm32(Register Dst, unsigned DstSize, APInt Bits,
186 MachineIRBuilder &MIRBuilder, bool Inv);
187 MachineInstr *tryAdvSIMDModImm64(Register Dst, unsigned DstSize, APInt Bits,
188 MachineIRBuilder &MIRBuilder);
189 MachineInstr *tryAdvSIMDModImm321s(Register Dst, unsigned DstSize, APInt Bits,
190 MachineIRBuilder &MIRBuilder, bool Inv);
191 MachineInstr *tryAdvSIMDModImmFP(Register Dst, unsigned DstSize, APInt Bits,
192 MachineIRBuilder &MIRBuilder);
193
194 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
196 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
197 /// SUBREG_TO_REG.
198 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
199 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
202
203 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
204 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
205 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
206 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
207
208 /// Helper function to select vector load intrinsics like
209 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
210 /// \p Opc is the opcode that the selected instruction should use.
211 /// \p NumVecs is the number of vector destinations for the instruction.
212 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
213 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
214 MachineInstr &I);
215 bool selectVectorLoadLaneIntrinsic(unsigned Opc, unsigned NumVecs,
216 MachineInstr &I);
217 void selectVectorStoreIntrinsic(MachineInstr &I, unsigned NumVecs,
218 unsigned Opc);
219 bool selectVectorStoreLaneIntrinsic(MachineInstr &I, unsigned NumVecs,
220 unsigned Opc);
221 bool selectIntrinsicWithSideEffects(MachineInstr &I,
223 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
224 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
225 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
226 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
227 bool selectPtrAuthGlobalValue(MachineInstr &I,
228 MachineRegisterInfo &MRI) const;
229 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
230 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI);
231 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
232 void SelectTable(MachineInstr &I, MachineRegisterInfo &MRI, unsigned NumVecs,
233 unsigned Opc1, unsigned Opc2, bool isExt);
234
235 bool selectIndexedExtLoad(MachineInstr &I, MachineRegisterInfo &MRI);
236 bool selectIndexedLoad(MachineInstr &I, MachineRegisterInfo &MRI);
237 bool selectIndexedStore(GIndexedStore &I, MachineRegisterInfo &MRI);
238
239 unsigned emitConstantPoolEntry(const Constant *CPVal,
240 MachineFunction &MF) const;
242 MachineIRBuilder &MIRBuilder) const;
243
244 // Emit a vector concat operation.
245 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1,
246 Register Op2,
247 MachineIRBuilder &MIRBuilder) const;
248
249 // Emit an integer compare between LHS and RHS, which checks for Predicate.
250 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
252 MachineIRBuilder &MIRBuilder) const;
253
254 /// Emit a floating point comparison between \p LHS and \p RHS.
255 /// \p Pred if given is the intended predicate to use.
257 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
258 std::optional<CmpInst::Predicate> = std::nullopt) const;
259
261 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
262 std::initializer_list<llvm::SrcOp> SrcOps,
263 MachineIRBuilder &MIRBuilder,
264 const ComplexRendererFns &RenderFns = std::nullopt) const;
265 /// Helper function to emit an add or sub instruction.
266 ///
267 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
268 /// in a specific order.
269 ///
270 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
271 ///
272 /// \code
273 /// const std::array<std::array<unsigned, 2>, 4> Table {
274 /// {{AArch64::ADDXri, AArch64::ADDWri},
275 /// {AArch64::ADDXrs, AArch64::ADDWrs},
276 /// {AArch64::ADDXrr, AArch64::ADDWrr},
277 /// {AArch64::SUBXri, AArch64::SUBWri},
278 /// {AArch64::ADDXrx, AArch64::ADDWrx}}};
279 /// \endcode
280 ///
281 /// Each row in the table corresponds to a different addressing mode. Each
282 /// column corresponds to a different register size.
283 ///
284 /// \attention Rows must be structured as follows:
285 /// - Row 0: The ri opcode variants
286 /// - Row 1: The rs opcode variants
287 /// - Row 2: The rr opcode variants
288 /// - Row 3: The ri opcode variants for negative immediates
289 /// - Row 4: The rx opcode variants
290 ///
291 /// \attention Columns must be structured as follows:
292 /// - Column 0: The 64-bit opcode variants
293 /// - Column 1: The 32-bit opcode variants
294 ///
295 /// \p Dst is the destination register of the binop to emit.
296 /// \p LHS is the left-hand operand of the binop to emit.
297 /// \p RHS is the right-hand operand of the binop to emit.
298 MachineInstr *emitAddSub(
299 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
301 MachineIRBuilder &MIRBuilder) const;
302 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
304 MachineIRBuilder &MIRBuilder) const;
306 MachineIRBuilder &MIRBuilder) const;
308 MachineIRBuilder &MIRBuilder) const;
310 MachineIRBuilder &MIRBuilder) const;
312 MachineIRBuilder &MIRBuilder) const;
314 MachineIRBuilder &MIRBuilder) const;
316 MachineIRBuilder &MIRBuilder) const;
318 MachineIRBuilder &MIRBuilder) const;
319 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
321 MachineIRBuilder &MIRBuilder) const;
322 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg,
323 const RegisterBank &DstRB, LLT ScalarTy,
324 Register VecReg, unsigned LaneIdx,
325 MachineIRBuilder &MIRBuilder) const;
326 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
328 MachineIRBuilder &MIRBuilder) const;
329 /// Emit a CSet for a FP compare.
330 ///
331 /// \p Dst is expected to be a 32-bit scalar register.
332 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
333 MachineIRBuilder &MIRBuilder) const;
334
335 /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
336 /// Might elide the instruction if the previous instruction already sets NZCV
337 /// correctly.
338 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
339
340 /// Emit the overflow op for \p Opcode.
341 ///
342 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
343 /// G_USUBO, etc.
344 std::pair<MachineInstr *, AArch64CC::CondCode>
345 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
346 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
347
348 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
349
350 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
351 /// In some cases this is even possible with OR operations in the expression.
353 MachineIRBuilder &MIB) const;
358 MachineIRBuilder &MIB) const;
360 bool Negate, Register CCOp,
362 MachineIRBuilder &MIB) const;
363
364 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
365 /// \p IsNegative is true if the test should be "not zero".
366 /// This will also optimize the test bit instruction when possible.
367 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
368 MachineBasicBlock *DstMBB,
369 MachineIRBuilder &MIB) const;
370
371 /// Emit a CB(N)Z instruction which branches to \p DestMBB.
372 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
373 MachineBasicBlock *DestMBB,
374 MachineIRBuilder &MIB) const;
375
376 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
377 // We use these manually instead of using the importer since it doesn't
378 // support SDNodeXForm.
379 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
380 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
381 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
382 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
383
384 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
385 ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
386 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
387
388 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
389 unsigned Size) const;
390
391 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
392 return selectAddrModeUnscaled(Root, 1);
393 }
394 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
395 return selectAddrModeUnscaled(Root, 2);
396 }
397 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
398 return selectAddrModeUnscaled(Root, 4);
399 }
400 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
401 return selectAddrModeUnscaled(Root, 8);
402 }
403 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
404 return selectAddrModeUnscaled(Root, 16);
405 }
406
407 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
408 /// from complex pattern matchers like selectAddrModeIndexed().
409 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
410 MachineRegisterInfo &MRI) const;
411
412 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
413 unsigned Size) const;
414 template <int Width>
415 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
416 return selectAddrModeIndexed(Root, Width / 8);
417 }
418
419 std::optional<bool>
420 isWorthFoldingIntoAddrMode(const MachineInstr &MI,
421 const MachineRegisterInfo &MRI) const;
422
423 bool isWorthFoldingIntoExtendedReg(const MachineInstr &MI,
424 const MachineRegisterInfo &MRI,
425 bool IsAddrOperand) const;
426 ComplexRendererFns
427 selectAddrModeShiftedExtendXReg(MachineOperand &Root,
428 unsigned SizeInBytes) const;
429
430 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
431 /// or not a shift + extend should be folded into an addressing mode. Returns
432 /// None when this is not profitable or possible.
433 ComplexRendererFns
434 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
435 MachineOperand &Offset, unsigned SizeInBytes,
436 bool WantsExt) const;
437 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
438 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
439 unsigned SizeInBytes) const;
440 template <int Width>
441 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
442 return selectAddrModeXRO(Root, Width / 8);
443 }
444
445 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
446 unsigned SizeInBytes) const;
447 template <int Width>
448 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
449 return selectAddrModeWRO(Root, Width / 8);
450 }
451
452 ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
453 bool AllowROR = false) const;
454
455 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
456 return selectShiftedRegister(Root);
457 }
458
459 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
460 return selectShiftedRegister(Root, true);
461 }
462
463 /// Given an extend instruction, determine the correct shift-extend type for
464 /// that instruction.
465 ///
466 /// If the instruction is going to be used in a load or store, pass
467 /// \p IsLoadStore = true.
469 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
470 bool IsLoadStore = false) const;
471
472 /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
473 ///
474 /// \returns Either \p Reg if no change was necessary, or the new register
475 /// created by moving \p Reg.
476 ///
477 /// Note: This uses emitCopy right now.
478 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
479 MachineIRBuilder &MIB) const;
480
481 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
482
483 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
484
485 ComplexRendererFns selectCVTFixedPointVec(MachineOperand &Root) const;
486 ComplexRendererFns
487 selectCVTFixedPointVecBase(const MachineOperand &Root) const;
488 void renderFixedPointXForm(MachineInstrBuilder &MIB, const MachineInstr &MI,
489 int OpIdx = -1) const;
490
491 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
492 int OpIdx = -1) const;
493 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
494 int OpIdx = -1) const;
495 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
496 int OpIdx = -1) const;
497 void renderUbsanTrap(MachineInstrBuilder &MIB, const MachineInstr &MI,
498 int OpIdx) const;
499 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
500 int OpIdx = -1) const;
501 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
502 int OpIdx = -1) const;
503 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
504 int OpIdx = -1) const;
505 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB,
506 const MachineInstr &MI,
507 int OpIdx = -1) const;
508
509 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
510 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
511
512 // Optimization methods.
513 bool tryOptSelect(GSelect &Sel);
514 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI);
515 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
517 MachineIRBuilder &MIRBuilder) const;
518
519 /// Return true if \p MI is a load or store of \p NumBytes bytes.
520 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
521
522 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
523 /// register zeroed out. In other words, the result of MI has been explicitly
524 /// zero extended.
525 bool isDef32(const MachineInstr &MI) const;
526
527 const AArch64TargetMachine &TM;
528 const AArch64Subtarget &STI;
529 const AArch64InstrInfo &TII;
531 const AArch64RegisterBankInfo &RBI;
532
533 bool ProduceNonFlagSettingCondBr = false;
534
535 // Some cached values used during selection.
536 // We use LR as a live-in register, and we keep track of it here as it can be
537 // clobbered by calls.
538 Register MFReturnAddr;
539
541
542#define GET_GLOBALISEL_PREDICATES_DECL
543#include "AArch64GenGlobalISel.inc"
544#undef GET_GLOBALISEL_PREDICATES_DECL
545
546// We declare the temporaries used by selectImpl() in the class to minimize the
547// cost of constructing placeholder values.
548#define GET_GLOBALISEL_TEMPORARIES_DECL
549#include "AArch64GenGlobalISel.inc"
550#undef GET_GLOBALISEL_TEMPORARIES_DECL
551};
552
553} // end anonymous namespace
554
555#define GET_GLOBALISEL_IMPL
556#include "AArch64GenGlobalISel.inc"
557#undef GET_GLOBALISEL_IMPL
558
559AArch64InstructionSelector::AArch64InstructionSelector(
560 const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
561 const AArch64RegisterBankInfo &RBI)
562 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
563 RBI(RBI),
565#include "AArch64GenGlobalISel.inc"
568#include "AArch64GenGlobalISel.inc"
570{
571}
572
573// FIXME: This should be target-independent, inferred from the types declared
574// for each class in the bank.
575//
576/// Given a register bank, and a type, return the smallest register class that
577/// can represent that combination.
578static const TargetRegisterClass *
579getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
580 bool GetAllRegSet = false) {
581 if (RB.getID() == AArch64::GPRRegBankID) {
582 if (Ty.getSizeInBits() <= 32)
583 return GetAllRegSet ? &AArch64::GPR32allRegClass
584 : &AArch64::GPR32RegClass;
585 if (Ty.getSizeInBits() == 64)
586 return GetAllRegSet ? &AArch64::GPR64allRegClass
587 : &AArch64::GPR64RegClass;
588 if (Ty.getSizeInBits() == 128)
589 return &AArch64::XSeqPairsClassRegClass;
590 return nullptr;
591 }
592
593 if (RB.getID() == AArch64::FPRRegBankID) {
594 switch (Ty.getSizeInBits()) {
595 case 8:
596 return &AArch64::FPR8RegClass;
597 case 16:
598 return &AArch64::FPR16RegClass;
599 case 32:
600 return &AArch64::FPR32RegClass;
601 case 64:
602 return &AArch64::FPR64RegClass;
603 case 128:
604 return &AArch64::FPR128RegClass;
605 }
606 return nullptr;
607 }
608
609 return nullptr;
610}
611
612/// Given a register bank, and size in bits, return the smallest register class
613/// that can represent that combination.
614static const TargetRegisterClass *
616 bool GetAllRegSet = false) {
617 if (SizeInBits.isScalable()) {
618 assert(RB.getID() == AArch64::FPRRegBankID &&
619 "Expected FPR regbank for scalable type size");
620 return &AArch64::ZPRRegClass;
621 }
622
623 unsigned RegBankID = RB.getID();
624
625 if (RegBankID == AArch64::GPRRegBankID) {
626 assert(!SizeInBits.isScalable() && "Unexpected scalable register size");
627 if (SizeInBits <= 32)
628 return GetAllRegSet ? &AArch64::GPR32allRegClass
629 : &AArch64::GPR32RegClass;
630 if (SizeInBits == 64)
631 return GetAllRegSet ? &AArch64::GPR64allRegClass
632 : &AArch64::GPR64RegClass;
633 if (SizeInBits == 128)
634 return &AArch64::XSeqPairsClassRegClass;
635 }
636
637 if (RegBankID == AArch64::FPRRegBankID) {
638 if (SizeInBits.isScalable()) {
639 assert(SizeInBits == TypeSize::getScalable(128) &&
640 "Unexpected scalable register size");
641 return &AArch64::ZPRRegClass;
642 }
643
644 switch (SizeInBits) {
645 default:
646 return nullptr;
647 case 8:
648 return &AArch64::FPR8RegClass;
649 case 16:
650 return &AArch64::FPR16RegClass;
651 case 32:
652 return &AArch64::FPR32RegClass;
653 case 64:
654 return &AArch64::FPR64RegClass;
655 case 128:
656 return &AArch64::FPR128RegClass;
657 }
658 }
659
660 return nullptr;
661}
662
663/// Returns the correct subregister to use for a given register class.
665 const TargetRegisterInfo &TRI, unsigned &SubReg) {
666 switch (TRI.getRegSizeInBits(*RC)) {
667 case 8:
668 SubReg = AArch64::bsub;
669 break;
670 case 16:
671 SubReg = AArch64::hsub;
672 break;
673 case 32:
674 if (RC != &AArch64::FPR32RegClass)
675 SubReg = AArch64::sub_32;
676 else
677 SubReg = AArch64::ssub;
678 break;
679 case 64:
680 SubReg = AArch64::dsub;
681 break;
682 default:
684 dbgs() << "Couldn't find appropriate subregister for register class.");
685 return false;
686 }
687
688 return true;
689}
690
691/// Returns the minimum size the given register bank can hold.
692static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
693 switch (RB.getID()) {
694 case AArch64::GPRRegBankID:
695 return 32;
696 case AArch64::FPRRegBankID:
697 return 8;
698 default:
699 llvm_unreachable("Tried to get minimum size for unknown register bank.");
700 }
701}
702
703/// Create a REG_SEQUENCE instruction using the registers in \p Regs.
704/// Helper function for functions like createDTuple and createQTuple.
705///
706/// \p RegClassIDs - The list of register class IDs available for some tuple of
707/// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
708/// expected to contain between 2 and 4 tuple classes.
709///
710/// \p SubRegs - The list of subregister classes associated with each register
711/// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
712/// subregister class. The index of each subregister class is expected to
713/// correspond with the index of each register class.
714///
715/// \returns Either the destination register of REG_SEQUENCE instruction that
716/// was created, or the 0th element of \p Regs if \p Regs contains a single
717/// element.
719 const unsigned RegClassIDs[],
720 const unsigned SubRegs[], MachineIRBuilder &MIB) {
721 unsigned NumRegs = Regs.size();
722 if (NumRegs == 1)
723 return Regs[0];
724 assert(NumRegs >= 2 && NumRegs <= 4 &&
725 "Only support between two and 4 registers in a tuple!");
727 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
728 auto RegSequence =
729 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
730 for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
731 RegSequence.addUse(Regs[I]);
732 RegSequence.addImm(SubRegs[I]);
733 }
734 return RegSequence.getReg(0);
735}
736
737/// Create a tuple of D-registers using the registers in \p Regs.
739 static const unsigned RegClassIDs[] = {
740 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
741 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
742 AArch64::dsub2, AArch64::dsub3};
743 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
744}
745
746/// Create a tuple of Q-registers using the registers in \p Regs.
748 static const unsigned RegClassIDs[] = {
749 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
750 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
751 AArch64::qsub2, AArch64::qsub3};
752 return createTuple(Regs, RegClassIDs, SubRegs, MIB);
753}
754
755static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
756 auto &MI = *Root.getParent();
757 auto &MBB = *MI.getParent();
758 auto &MF = *MBB.getParent();
759 auto &MRI = MF.getRegInfo();
760 uint64_t Immed;
761 if (Root.isImm())
762 Immed = Root.getImm();
763 else if (Root.isCImm())
764 Immed = Root.getCImm()->getZExtValue();
765 else if (Root.isReg()) {
766 auto ValAndVReg =
768 if (!ValAndVReg)
769 return std::nullopt;
770 Immed = ValAndVReg->Value.getSExtValue();
771 } else
772 return std::nullopt;
773 return Immed;
774}
775
776/// Check whether \p I is a currently unsupported binary operation:
777/// - it has an unsized type
778/// - an operand is not a vreg
779/// - all operands are not in the same bank
780/// These are checks that should someday live in the verifier, but right now,
781/// these are mostly limitations of the aarch64 selector.
782static bool unsupportedBinOp(const MachineInstr &I,
783 const AArch64RegisterBankInfo &RBI,
784 const MachineRegisterInfo &MRI,
785 const AArch64RegisterInfo &TRI) {
786 LLT Ty = MRI.getType(I.getOperand(0).getReg());
787 if (!Ty.isValid()) {
788 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
789 return true;
790 }
791
792 const RegisterBank *PrevOpBank = nullptr;
793 for (auto &MO : I.operands()) {
794 // FIXME: Support non-register operands.
795 if (!MO.isReg()) {
796 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
797 return true;
798 }
799
800 // FIXME: Can generic operations have physical registers operands? If
801 // so, this will need to be taught about that, and we'll need to get the
802 // bank out of the minimal class for the register.
803 // Either way, this needs to be documented (and possibly verified).
804 if (!MO.getReg().isVirtual()) {
805 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
806 return true;
807 }
808
809 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
810 if (!OpBank) {
811 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
812 return true;
813 }
814
815 if (PrevOpBank && OpBank != PrevOpBank) {
816 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
817 return true;
818 }
819 PrevOpBank = OpBank;
820 }
821 return false;
822}
823
824/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
825/// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
826/// and of size \p OpSize.
827/// \returns \p GenericOpc if the combination is unsupported.
828static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
829 unsigned OpSize) {
830 switch (RegBankID) {
831 case AArch64::GPRRegBankID:
832 if (OpSize == 32) {
833 switch (GenericOpc) {
834 case TargetOpcode::G_SHL:
835 return AArch64::LSLVWr;
836 case TargetOpcode::G_LSHR:
837 return AArch64::LSRVWr;
838 case TargetOpcode::G_ASHR:
839 return AArch64::ASRVWr;
840 default:
841 return GenericOpc;
842 }
843 } else if (OpSize == 64) {
844 switch (GenericOpc) {
845 case TargetOpcode::G_PTR_ADD:
846 return AArch64::ADDXrr;
847 case TargetOpcode::G_SHL:
848 return AArch64::LSLVXr;
849 case TargetOpcode::G_LSHR:
850 return AArch64::LSRVXr;
851 case TargetOpcode::G_ASHR:
852 return AArch64::ASRVXr;
853 default:
854 return GenericOpc;
855 }
856 }
857 break;
858 case AArch64::FPRRegBankID:
859 switch (OpSize) {
860 case 32:
861 switch (GenericOpc) {
862 case TargetOpcode::G_FADD:
863 return AArch64::FADDSrr;
864 case TargetOpcode::G_FSUB:
865 return AArch64::FSUBSrr;
866 case TargetOpcode::G_FMUL:
867 return AArch64::FMULSrr;
868 case TargetOpcode::G_FDIV:
869 return AArch64::FDIVSrr;
870 default:
871 return GenericOpc;
872 }
873 case 64:
874 switch (GenericOpc) {
875 case TargetOpcode::G_FADD:
876 return AArch64::FADDDrr;
877 case TargetOpcode::G_FSUB:
878 return AArch64::FSUBDrr;
879 case TargetOpcode::G_FMUL:
880 return AArch64::FMULDrr;
881 case TargetOpcode::G_FDIV:
882 return AArch64::FDIVDrr;
883 case TargetOpcode::G_OR:
884 return AArch64::ORRv8i8;
885 default:
886 return GenericOpc;
887 }
888 }
889 break;
890 }
891 return GenericOpc;
892}
893
894/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
895/// appropriate for the (value) register bank \p RegBankID and of memory access
896/// size \p OpSize. This returns the variant with the base+unsigned-immediate
897/// addressing mode (e.g., LDRXui).
898/// \returns \p GenericOpc if the combination is unsupported.
899static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
900 unsigned OpSize) {
901 const bool isStore = GenericOpc == TargetOpcode::G_STORE;
902 switch (RegBankID) {
903 case AArch64::GPRRegBankID:
904 switch (OpSize) {
905 case 8:
906 return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
907 case 16:
908 return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
909 case 32:
910 return isStore ? AArch64::STRWui : AArch64::LDRWui;
911 case 64:
912 return isStore ? AArch64::STRXui : AArch64::LDRXui;
913 }
914 break;
915 case AArch64::FPRRegBankID:
916 switch (OpSize) {
917 case 8:
918 return isStore ? AArch64::STRBui : AArch64::LDRBui;
919 case 16:
920 return isStore ? AArch64::STRHui : AArch64::LDRHui;
921 case 32:
922 return isStore ? AArch64::STRSui : AArch64::LDRSui;
923 case 64:
924 return isStore ? AArch64::STRDui : AArch64::LDRDui;
925 case 128:
926 return isStore ? AArch64::STRQui : AArch64::LDRQui;
927 }
928 break;
929 }
930 return GenericOpc;
931}
932
933/// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
934/// to \p *To.
935///
936/// E.g "To = COPY SrcReg:SubReg"
938 const RegisterBankInfo &RBI, Register SrcReg,
939 const TargetRegisterClass *To, unsigned SubReg) {
940 assert(SrcReg.isValid() && "Expected a valid source register?");
941 assert(To && "Destination register class cannot be null");
942 assert(SubReg && "Expected a valid subregister");
943
944 MachineIRBuilder MIB(I);
945 auto SubRegCopy =
946 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, {}, SubReg);
947 MachineOperand &RegOp = I.getOperand(1);
948 RegOp.setReg(SubRegCopy.getReg(0));
949
950 // It's possible that the destination register won't be constrained. Make
951 // sure that happens.
952 if (!I.getOperand(0).getReg().isPhysical())
953 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
954
955 return true;
956}
957
958/// Helper function to get the source and destination register classes for a
959/// copy. Returns a std::pair containing the source register class for the
960/// copy, and the destination register class for the copy. If a register class
961/// cannot be determined, then it will be nullptr.
962static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
965 const RegisterBankInfo &RBI) {
966 Register DstReg = I.getOperand(0).getReg();
967 Register SrcReg = I.getOperand(1).getReg();
968 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
969 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
970
971 TypeSize DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
972 TypeSize SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
973
974 // Special casing for cross-bank copies of s1s. We can technically represent
975 // a 1-bit value with any size of register. The minimum size for a GPR is 32
976 // bits. So, we need to put the FPR on 32 bits as well.
977 //
978 // FIXME: I'm not sure if this case holds true outside of copies. If it does,
979 // then we can pull it into the helpers that get the appropriate class for a
980 // register bank. Or make a new helper that carries along some constraint
981 // information.
982 if (SrcRegBank != DstRegBank &&
983 (DstSize == TypeSize::getFixed(1) && SrcSize == TypeSize::getFixed(1)))
984 SrcSize = DstSize = TypeSize::getFixed(32);
985
986 return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
987 getMinClassForRegBank(DstRegBank, DstSize, true)};
988}
989
990// FIXME: We need some sort of API in RBI/TRI to allow generic code to
991// constrain operands of simple instructions given a TargetRegisterClass
992// and LLT
994 const RegisterBankInfo &RBI) {
995 for (MachineOperand &MO : I.operands()) {
996 if (!MO.isReg())
997 continue;
998 Register Reg = MO.getReg();
999 if (!Reg)
1000 continue;
1001 if (Reg.isPhysical())
1002 continue;
1003 LLT Ty = MRI.getType(Reg);
1004 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
1005 const TargetRegisterClass *RC =
1007 if (!RC) {
1008 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
1009 RC = getRegClassForTypeOnBank(Ty, RB);
1010 if (!RC) {
1011 LLVM_DEBUG(
1012 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
1013 break;
1014 }
1015 }
1016 RBI.constrainGenericRegister(Reg, *RC, MRI);
1017 }
1018
1019 return true;
1020}
1021
1024 const RegisterBankInfo &RBI) {
1025 Register DstReg = I.getOperand(0).getReg();
1026 Register SrcReg = I.getOperand(1).getReg();
1027 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1028 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
1029
1030 // Find the correct register classes for the source and destination registers.
1031 const TargetRegisterClass *SrcRC;
1032 const TargetRegisterClass *DstRC;
1033 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
1034
1035 if (!DstRC) {
1036 LLVM_DEBUG(dbgs() << "Unexpected dest size "
1037 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
1038 return false;
1039 }
1040
1041 // Is this a copy? If so, then we may need to insert a subregister copy.
1042 if (I.isCopy()) {
1043 // Yes. Check if there's anything to fix up.
1044 if (!SrcRC) {
1045 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
1046 return false;
1047 }
1048
1049 const TypeSize SrcSize = TRI.getRegSizeInBits(*SrcRC);
1050 const TypeSize DstSize = TRI.getRegSizeInBits(*DstRC);
1051 unsigned SubReg;
1052
1053 // If the source bank doesn't support a subregister copy small enough,
1054 // then we first need to copy to the destination bank.
1055 if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
1056 const TargetRegisterClass *DstTempRC =
1057 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
1058 getSubRegForClass(DstRC, TRI, SubReg);
1059
1060 MachineIRBuilder MIB(I);
1061 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
1062 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
1063 } else if (SrcSize > DstSize) {
1064 // If the source register is bigger than the destination we need to
1065 // perform a subregister copy.
1066 const TargetRegisterClass *SubRegRC =
1067 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1068 getSubRegForClass(SubRegRC, TRI, SubReg);
1069 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
1070 } else if (DstSize > SrcSize) {
1071 // If the destination register is bigger than the source we need to do
1072 // a promotion using SUBREG_TO_REG.
1073 const TargetRegisterClass *PromotionRC =
1074 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
1075 getSubRegForClass(SrcRC, TRI, SubReg);
1076
1077 Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1078 BuildMI(*I.getParent(), I, I.getDebugLoc(),
1079 TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1080 .addUse(SrcReg)
1081 .addImm(SubReg);
1082 MachineOperand &RegOp = I.getOperand(1);
1083 RegOp.setReg(PromoteReg);
1084 }
1085
1086 // If the destination is a physical register, then there's nothing to
1087 // change, so we're done.
1088 if (DstReg.isPhysical())
1089 return true;
1090 }
1091
1092 // No need to constrain SrcReg. It will get constrained when we hit another
1093 // of its use or its defs. Copies do not have constraints.
1094 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1095 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1096 << " operand\n");
1097 return false;
1098 }
1099
1100 // If this a GPR ZEXT that we want to just reduce down into a copy.
1101 // The sizes will be mismatched with the source < 32b but that's ok.
1102 if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1103 I.setDesc(TII.get(AArch64::COPY));
1104 assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1105 return selectCopy(I, TII, MRI, TRI, RBI);
1106 }
1107
1108 I.setDesc(TII.get(AArch64::COPY));
1109 return true;
1110}
1111
1113AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1114 Register False, AArch64CC::CondCode CC,
1115 MachineIRBuilder &MIB) const {
1116 MachineRegisterInfo &MRI = *MIB.getMRI();
1117 assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1118 RBI.getRegBank(True, MRI, TRI)->getID() &&
1119 "Expected both select operands to have the same regbank?");
1120 LLT Ty = MRI.getType(True);
1121 if (Ty.isVector())
1122 return nullptr;
1123 const unsigned Size = Ty.getSizeInBits();
1124 assert((Size == 32 || Size == 64) &&
1125 "Expected 32 bit or 64 bit select only?");
1126 const bool Is32Bit = Size == 32;
1127 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1128 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1129 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1131 return &*FCSel;
1132 }
1133
1134 // By default, we'll try and emit a CSEL.
1135 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1136 bool Optimized = false;
1137 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1138 &Optimized](Register &Reg, Register &OtherReg,
1139 bool Invert) {
1140 if (Optimized)
1141 return false;
1142
1143 // Attempt to fold:
1144 //
1145 // %sub = G_SUB 0, %x
1146 // %select = G_SELECT cc, %reg, %sub
1147 //
1148 // Into:
1149 // %select = CSNEG %reg, %x, cc
1150 Register MatchReg;
1151 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1152 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1153 Reg = MatchReg;
1154 if (Invert) {
1156 std::swap(Reg, OtherReg);
1157 }
1158 return true;
1159 }
1160
1161 // Attempt to fold:
1162 //
1163 // %xor = G_XOR %x, -1
1164 // %select = G_SELECT cc, %reg, %xor
1165 //
1166 // Into:
1167 // %select = CSINV %reg, %x, cc
1168 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1169 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1170 Reg = MatchReg;
1171 if (Invert) {
1173 std::swap(Reg, OtherReg);
1174 }
1175 return true;
1176 }
1177
1178 // Attempt to fold:
1179 //
1180 // %add = G_ADD %x, 1
1181 // %select = G_SELECT cc, %reg, %add
1182 //
1183 // Into:
1184 // %select = CSINC %reg, %x, cc
1185 if (mi_match(Reg, MRI,
1186 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1187 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1188 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1189 Reg = MatchReg;
1190 if (Invert) {
1192 std::swap(Reg, OtherReg);
1193 }
1194 return true;
1195 }
1196
1197 return false;
1198 };
1199
1200 // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1201 // true/false values are constants.
1202 // FIXME: All of these patterns already exist in tablegen. We should be
1203 // able to import these.
1204 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1205 &Optimized]() {
1206 if (Optimized)
1207 return false;
1208 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1209 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1210 if (!TrueCst && !FalseCst)
1211 return false;
1212
1213 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1214 if (TrueCst && FalseCst) {
1215 int64_t T = TrueCst->Value.getSExtValue();
1216 int64_t F = FalseCst->Value.getSExtValue();
1217
1218 if (T == 0 && F == 1) {
1219 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1220 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1221 True = ZReg;
1222 False = ZReg;
1223 return true;
1224 }
1225
1226 if (T == 0 && F == -1) {
1227 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1228 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1229 True = ZReg;
1230 False = ZReg;
1231 return true;
1232 }
1233 }
1234
1235 if (TrueCst) {
1236 int64_t T = TrueCst->Value.getSExtValue();
1237 if (T == 1) {
1238 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1239 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1240 True = False;
1241 False = ZReg;
1243 return true;
1244 }
1245
1246 if (T == -1) {
1247 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1248 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1249 True = False;
1250 False = ZReg;
1252 return true;
1253 }
1254 }
1255
1256 if (FalseCst) {
1257 int64_t F = FalseCst->Value.getSExtValue();
1258 if (F == 1) {
1259 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1260 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1261 False = ZReg;
1262 return true;
1263 }
1264
1265 if (F == -1) {
1266 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1267 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1268 False = ZReg;
1269 return true;
1270 }
1271 }
1272 return false;
1273 };
1274
1275 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1276 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1277 Optimized |= TryOptSelectCst();
1278 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1279 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1280 return &*SelectInst;
1281}
1282
1285 MachineRegisterInfo *MRI = nullptr) {
1286 switch (P) {
1287 default:
1288 llvm_unreachable("Unknown condition code!");
1289 case CmpInst::ICMP_NE:
1290 return AArch64CC::NE;
1291 case CmpInst::ICMP_EQ:
1292 return AArch64CC::EQ;
1293 case CmpInst::ICMP_SGT:
1294 return AArch64CC::GT;
1295 case CmpInst::ICMP_SGE:
1296 if (RHS && MRI) {
1297 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1298 if (ValAndVReg && ValAndVReg->Value == 0)
1299 return AArch64CC::PL;
1300 }
1301 return AArch64CC::GE;
1302 case CmpInst::ICMP_SLT:
1303 if (RHS && MRI) {
1304 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, *MRI);
1305 if (ValAndVReg && ValAndVReg->Value == 0)
1306 return AArch64CC::MI;
1307 }
1308 return AArch64CC::LT;
1309 case CmpInst::ICMP_SLE:
1310 return AArch64CC::LE;
1311 case CmpInst::ICMP_UGT:
1312 return AArch64CC::HI;
1313 case CmpInst::ICMP_UGE:
1314 return AArch64CC::HS;
1315 case CmpInst::ICMP_ULT:
1316 return AArch64CC::LO;
1317 case CmpInst::ICMP_ULE:
1318 return AArch64CC::LS;
1319 }
1320}
1321
1322/// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
1324 AArch64CC::CondCode &CondCode,
1325 AArch64CC::CondCode &CondCode2) {
1326 CondCode2 = AArch64CC::AL;
1327 switch (CC) {
1328 default:
1329 llvm_unreachable("Unknown FP condition!");
1330 case CmpInst::FCMP_OEQ:
1331 CondCode = AArch64CC::EQ;
1332 break;
1333 case CmpInst::FCMP_OGT:
1334 CondCode = AArch64CC::GT;
1335 break;
1336 case CmpInst::FCMP_OGE:
1337 CondCode = AArch64CC::GE;
1338 break;
1339 case CmpInst::FCMP_OLT:
1340 CondCode = AArch64CC::MI;
1341 break;
1342 case CmpInst::FCMP_OLE:
1343 CondCode = AArch64CC::LS;
1344 break;
1345 case CmpInst::FCMP_ONE:
1346 CondCode = AArch64CC::MI;
1347 CondCode2 = AArch64CC::GT;
1348 break;
1349 case CmpInst::FCMP_ORD:
1350 CondCode = AArch64CC::VC;
1351 break;
1352 case CmpInst::FCMP_UNO:
1353 CondCode = AArch64CC::VS;
1354 break;
1355 case CmpInst::FCMP_UEQ:
1356 CondCode = AArch64CC::EQ;
1357 CondCode2 = AArch64CC::VS;
1358 break;
1359 case CmpInst::FCMP_UGT:
1360 CondCode = AArch64CC::HI;
1361 break;
1362 case CmpInst::FCMP_UGE:
1363 CondCode = AArch64CC::PL;
1364 break;
1365 case CmpInst::FCMP_ULT:
1366 CondCode = AArch64CC::LT;
1367 break;
1368 case CmpInst::FCMP_ULE:
1369 CondCode = AArch64CC::LE;
1370 break;
1371 case CmpInst::FCMP_UNE:
1372 CondCode = AArch64CC::NE;
1373 break;
1374 }
1375}
1376
1377/// Convert an IR fp condition code to an AArch64 CC.
1378/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1379/// should be AND'ed instead of OR'ed.
1381 AArch64CC::CondCode &CondCode,
1382 AArch64CC::CondCode &CondCode2) {
1383 CondCode2 = AArch64CC::AL;
1384 switch (CC) {
1385 default:
1386 changeFPCCToORAArch64CC(CC, CondCode, CondCode2);
1387 assert(CondCode2 == AArch64CC::AL);
1388 break;
1389 case CmpInst::FCMP_ONE:
1390 // (a one b)
1391 // == ((a olt b) || (a ogt b))
1392 // == ((a ord b) && (a une b))
1393 CondCode = AArch64CC::VC;
1394 CondCode2 = AArch64CC::NE;
1395 break;
1396 case CmpInst::FCMP_UEQ:
1397 // (a ueq b)
1398 // == ((a uno b) || (a oeq b))
1399 // == ((a ule b) && (a uge b))
1400 CondCode = AArch64CC::PL;
1401 CondCode2 = AArch64CC::LE;
1402 break;
1403 }
1404}
1405
1406/// Return a register which can be used as a bit to test in a TB(N)Z.
1407static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1408 MachineRegisterInfo &MRI) {
1409 assert(Reg.isValid() && "Expected valid register!");
1410 bool HasZext = false;
1411 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1412 unsigned Opc = MI->getOpcode();
1413
1414 if (!MI->getOperand(0).isReg() ||
1415 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1416 break;
1417
1418 // (tbz (any_ext x), b) -> (tbz x, b) and
1419 // (tbz (zext x), b) -> (tbz x, b) if we don't use the extended bits.
1420 //
1421 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1422 // on the truncated x is the same as the bit number on x.
1423 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1424 Opc == TargetOpcode::G_TRUNC) {
1425 if (Opc == TargetOpcode::G_ZEXT)
1426 HasZext = true;
1427
1428 Register NextReg = MI->getOperand(1).getReg();
1429 // Did we find something worth folding?
1430 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1431 break;
1432 TypeSize InSize = MRI.getType(NextReg).getSizeInBits();
1433 if (Bit >= InSize)
1434 break;
1435
1436 // NextReg is worth folding. Keep looking.
1437 Reg = NextReg;
1438 continue;
1439 }
1440
1441 // Attempt to find a suitable operation with a constant on one side.
1442 std::optional<uint64_t> C;
1443 Register TestReg;
1444 switch (Opc) {
1445 default:
1446 break;
1447 case TargetOpcode::G_AND:
1448 case TargetOpcode::G_XOR: {
1449 TestReg = MI->getOperand(1).getReg();
1450 Register ConstantReg = MI->getOperand(2).getReg();
1451 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1452 if (!VRegAndVal) {
1453 // AND commutes, check the other side for a constant.
1454 // FIXME: Can we canonicalize the constant so that it's always on the
1455 // same side at some point earlier?
1456 std::swap(ConstantReg, TestReg);
1457 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1458 }
1459 if (VRegAndVal) {
1460 if (HasZext)
1461 C = VRegAndVal->Value.getZExtValue();
1462 else
1463 C = VRegAndVal->Value.getSExtValue();
1464 }
1465 break;
1466 }
1467 case TargetOpcode::G_ASHR:
1468 case TargetOpcode::G_LSHR:
1469 case TargetOpcode::G_SHL: {
1470 TestReg = MI->getOperand(1).getReg();
1471 auto VRegAndVal =
1472 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1473 if (VRegAndVal)
1474 C = VRegAndVal->Value.getSExtValue();
1475 break;
1476 }
1477 }
1478
1479 // Didn't find a constant or viable register. Bail out of the loop.
1480 if (!C || !TestReg.isValid())
1481 break;
1482
1483 // We found a suitable instruction with a constant. Check to see if we can
1484 // walk through the instruction.
1485 Register NextReg;
1486 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1487 switch (Opc) {
1488 default:
1489 break;
1490 case TargetOpcode::G_AND:
1491 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1492 if ((*C >> Bit) & 1)
1493 NextReg = TestReg;
1494 break;
1495 case TargetOpcode::G_SHL:
1496 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1497 // the type of the register.
1498 if (*C <= Bit && (Bit - *C) < TestRegSize) {
1499 NextReg = TestReg;
1500 Bit = Bit - *C;
1501 }
1502 break;
1503 case TargetOpcode::G_ASHR:
1504 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1505 // in x
1506 NextReg = TestReg;
1507 Bit = Bit + *C;
1508 if (Bit >= TestRegSize)
1509 Bit = TestRegSize - 1;
1510 break;
1511 case TargetOpcode::G_LSHR:
1512 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1513 if ((Bit + *C) < TestRegSize) {
1514 NextReg = TestReg;
1515 Bit = Bit + *C;
1516 }
1517 break;
1518 case TargetOpcode::G_XOR:
1519 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1520 // appropriate.
1521 //
1522 // e.g. If x' = xor x, c, and the b-th bit is set in c then
1523 //
1524 // tbz x', b -> tbnz x, b
1525 //
1526 // Because x' only has the b-th bit set if x does not.
1527 if ((*C >> Bit) & 1)
1528 Invert = !Invert;
1529 NextReg = TestReg;
1530 break;
1531 }
1532
1533 // Check if we found anything worth folding.
1534 if (!NextReg.isValid())
1535 return Reg;
1536 Reg = NextReg;
1537 }
1538
1539 return Reg;
1540}
1541
1542MachineInstr *AArch64InstructionSelector::emitTestBit(
1543 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1544 MachineIRBuilder &MIB) const {
1545 assert(TestReg.isValid());
1546 assert(ProduceNonFlagSettingCondBr &&
1547 "Cannot emit TB(N)Z with speculation tracking!");
1548 MachineRegisterInfo &MRI = *MIB.getMRI();
1549
1550 // Attempt to optimize the test bit by walking over instructions.
1551 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1552 LLT Ty = MRI.getType(TestReg);
1553 unsigned Size = Ty.getSizeInBits();
1554 assert(!Ty.isVector() && "Expected a scalar!");
1555 assert(Bit < 64 && "Bit is too large!");
1556
1557 // When the test register is a 64-bit register, we have to narrow to make
1558 // TBNZW work.
1559 bool UseWReg = Bit < 32;
1560 unsigned NecessarySize = UseWReg ? 32 : 64;
1561 if (Size != NecessarySize)
1562 TestReg = moveScalarRegClass(
1563 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1564 MIB);
1565
1566 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1567 {AArch64::TBZW, AArch64::TBNZW}};
1568 unsigned Opc = OpcTable[UseWReg][IsNegative];
1569 auto TestBitMI =
1570 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1571 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1572 return &*TestBitMI;
1573}
1574
1575bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1576 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1577 MachineIRBuilder &MIB) const {
1578 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1579 // Given something like this:
1580 //
1581 // %x = ...Something...
1582 // %one = G_CONSTANT i64 1
1583 // %zero = G_CONSTANT i64 0
1584 // %and = G_AND %x, %one
1585 // %cmp = G_ICMP intpred(ne), %and, %zero
1586 // %cmp_trunc = G_TRUNC %cmp
1587 // G_BRCOND %cmp_trunc, %bb.3
1588 //
1589 // We want to try and fold the AND into the G_BRCOND and produce either a
1590 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1591 //
1592 // In this case, we'd get
1593 //
1594 // TBNZ %x %bb.3
1595 //
1596
1597 // Check if the AND has a constant on its RHS which we can use as a mask.
1598 // If it's a power of 2, then it's the same as checking a specific bit.
1599 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1600 auto MaybeBit = getIConstantVRegValWithLookThrough(
1601 AndInst.getOperand(2).getReg(), *MIB.getMRI());
1602 if (!MaybeBit)
1603 return false;
1604
1605 int32_t Bit = MaybeBit->Value.exactLogBase2();
1606 if (Bit < 0)
1607 return false;
1608
1609 Register TestReg = AndInst.getOperand(1).getReg();
1610
1611 // Emit a TB(N)Z.
1612 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1613 return true;
1614}
1615
1616MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1617 bool IsNegative,
1618 MachineBasicBlock *DestMBB,
1619 MachineIRBuilder &MIB) const {
1620 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1621 MachineRegisterInfo &MRI = *MIB.getMRI();
1622 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1623 AArch64::GPRRegBankID &&
1624 "Expected GPRs only?");
1625 auto Ty = MRI.getType(CompareReg);
1626 unsigned Width = Ty.getSizeInBits();
1627 assert(!Ty.isVector() && "Expected scalar only?");
1628 assert(Width <= 64 && "Expected width to be at most 64?");
1629 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1630 {AArch64::CBNZW, AArch64::CBNZX}};
1631 unsigned Opc = OpcTable[IsNegative][Width == 64];
1632 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1633 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1634 return &*BranchMI;
1635}
1636
1637bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1638 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1639 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1640 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1641 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1642 // totally clean. Some of them require two branches to implement.
1643 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1644 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1645 Pred);
1646 AArch64CC::CondCode CC1, CC2;
1647 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
1648 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1649 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1650 if (CC2 != AArch64CC::AL)
1651 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1652 I.eraseFromParent();
1653 return true;
1654}
1655
1656bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1657 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1658 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1659 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1660 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1661 //
1662 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1663 // instructions will not be produced, as they are conditional branch
1664 // instructions that do not set flags.
1665 if (!ProduceNonFlagSettingCondBr)
1666 return false;
1667
1668 MachineRegisterInfo &MRI = *MIB.getMRI();
1669 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1670 auto Pred =
1671 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1672 Register LHS = ICmp.getOperand(2).getReg();
1673 Register RHS = ICmp.getOperand(3).getReg();
1674
1675 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1676 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1677 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1678
1679 // When we can emit a TB(N)Z, prefer that.
1680 //
1681 // Handle non-commutative condition codes first.
1682 // Note that we don't want to do this when we have a G_AND because it can
1683 // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1684 if (VRegAndVal && !AndInst) {
1685 int64_t C = VRegAndVal->Value.getSExtValue();
1686
1687 // When we have a greater-than comparison, we can just test if the msb is
1688 // zero.
1689 if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1690 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1691 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1692 I.eraseFromParent();
1693 return true;
1694 }
1695
1696 // When we have a less than comparison, we can just test if the msb is not
1697 // zero.
1698 if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1699 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1700 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1701 I.eraseFromParent();
1702 return true;
1703 }
1704
1705 // Inversely, if we have a signed greater-than-or-equal comparison to zero,
1706 // we can test if the msb is zero.
1707 if (C == 0 && Pred == CmpInst::ICMP_SGE) {
1708 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1709 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1710 I.eraseFromParent();
1711 return true;
1712 }
1713 }
1714
1715 // Attempt to handle commutative condition codes. Right now, that's only
1716 // eq/ne.
1717 if (ICmpInst::isEquality(Pred)) {
1718 if (!VRegAndVal) {
1719 std::swap(RHS, LHS);
1720 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1721 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1722 }
1723
1724 if (VRegAndVal && VRegAndVal->Value == 0) {
1725 // If there's a G_AND feeding into this branch, try to fold it away by
1726 // emitting a TB(N)Z instead.
1727 //
1728 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1729 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1730 // would be redundant.
1731 if (AndInst &&
1732 tryOptAndIntoCompareBranch(
1733 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1734 I.eraseFromParent();
1735 return true;
1736 }
1737
1738 // Otherwise, try to emit a CB(N)Z instead.
1739 auto LHSTy = MRI.getType(LHS);
1740 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1741 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1742 I.eraseFromParent();
1743 return true;
1744 }
1745 }
1746 }
1747
1748 return false;
1749}
1750
1751bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1752 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1753 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1754 assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1755 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1756 return true;
1757
1758 // Couldn't optimize. Emit a compare + a Bcc.
1759 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1760 auto &PredOp = ICmp.getOperand(1);
1761 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1763 static_cast<CmpInst::Predicate>(PredOp.getPredicate()),
1764 ICmp.getOperand(3).getReg(), MIB.getMRI());
1765 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1766 I.eraseFromParent();
1767 return true;
1768}
1769
1770bool AArch64InstructionSelector::selectCompareBranch(
1771 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1772 Register CondReg = I.getOperand(0).getReg();
1773 MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1774 // Try to select the G_BRCOND using whatever is feeding the condition if
1775 // possible.
1776 unsigned CCMIOpc = CCMI->getOpcode();
1777 if (CCMIOpc == TargetOpcode::G_FCMP)
1778 return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1779 if (CCMIOpc == TargetOpcode::G_ICMP)
1780 return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1781
1782 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1783 // instructions will not be produced, as they are conditional branch
1784 // instructions that do not set flags.
1785 if (ProduceNonFlagSettingCondBr) {
1786 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1787 I.getOperand(1).getMBB(), MIB);
1788 I.eraseFromParent();
1789 return true;
1790 }
1791
1792 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1793 auto TstMI =
1794 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1796 auto Bcc = MIB.buildInstr(AArch64::Bcc)
1798 .addMBB(I.getOperand(1).getMBB());
1799 I.eraseFromParent();
1801 return true;
1802}
1803
1804/// Returns the element immediate value of a vector shift operand if found.
1805/// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1806static std::optional<int64_t> getVectorShiftImm(Register Reg,
1807 MachineRegisterInfo &MRI) {
1808 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1809 MachineInstr *OpMI = MRI.getVRegDef(Reg);
1810 return getAArch64VectorSplatScalar(*OpMI, MRI);
1811}
1812
1813/// Matches and returns the shift immediate value for a SHL instruction given
1814/// a shift operand.
1815static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg,
1816 MachineRegisterInfo &MRI) {
1817 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1818 if (!ShiftImm)
1819 return std::nullopt;
1820 // Check the immediate is in range for a SHL.
1821 int64_t Imm = *ShiftImm;
1822 if (Imm < 0)
1823 return std::nullopt;
1824 switch (SrcTy.getElementType().getSizeInBits()) {
1825 default:
1826 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1827 return std::nullopt;
1828 case 8:
1829 if (Imm > 7)
1830 return std::nullopt;
1831 break;
1832 case 16:
1833 if (Imm > 15)
1834 return std::nullopt;
1835 break;
1836 case 32:
1837 if (Imm > 31)
1838 return std::nullopt;
1839 break;
1840 case 64:
1841 if (Imm > 63)
1842 return std::nullopt;
1843 break;
1844 }
1845 return Imm;
1846}
1847
1848bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1849 MachineRegisterInfo &MRI) {
1850 assert(I.getOpcode() == TargetOpcode::G_SHL);
1851 Register DstReg = I.getOperand(0).getReg();
1852 const LLT Ty = MRI.getType(DstReg);
1853 Register Src1Reg = I.getOperand(1).getReg();
1854 Register Src2Reg = I.getOperand(2).getReg();
1855
1856 if (!Ty.isVector())
1857 return false;
1858
1859 // Check if we have a vector of constants on RHS that we can select as the
1860 // immediate form.
1861 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1862
1863 unsigned Opc = 0;
1864 if (Ty == LLT::fixed_vector(2, 64)) {
1865 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1866 } else if (Ty == LLT::fixed_vector(4, 32)) {
1867 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1868 } else if (Ty == LLT::fixed_vector(2, 32)) {
1869 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1870 } else if (Ty == LLT::fixed_vector(4, 16)) {
1871 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1872 } else if (Ty == LLT::fixed_vector(8, 16)) {
1873 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1874 } else if (Ty == LLT::fixed_vector(16, 8)) {
1875 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1876 } else if (Ty == LLT::fixed_vector(8, 8)) {
1877 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1878 } else {
1879 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1880 return false;
1881 }
1882
1883 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1884 if (ImmVal)
1885 Shl.addImm(*ImmVal);
1886 else
1887 Shl.addUse(Src2Reg);
1889 I.eraseFromParent();
1890 return true;
1891}
1892
1893bool AArch64InstructionSelector::selectVectorAshrLshr(
1894 MachineInstr &I, MachineRegisterInfo &MRI) {
1895 assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1896 I.getOpcode() == TargetOpcode::G_LSHR);
1897 Register DstReg = I.getOperand(0).getReg();
1898 const LLT Ty = MRI.getType(DstReg);
1899 Register Src1Reg = I.getOperand(1).getReg();
1900 Register Src2Reg = I.getOperand(2).getReg();
1901
1902 if (!Ty.isVector())
1903 return false;
1904
1905 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1906
1907 // We expect the immediate case to be lowered in the PostLegalCombiner to
1908 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1909
1910 // There is not a shift right register instruction, but the shift left
1911 // register instruction takes a signed value, where negative numbers specify a
1912 // right shift.
1913
1914 unsigned Opc = 0;
1915 unsigned NegOpc = 0;
1916 const TargetRegisterClass *RC =
1917 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID));
1918 if (Ty == LLT::fixed_vector(2, 64)) {
1919 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1920 NegOpc = AArch64::NEGv2i64;
1921 } else if (Ty == LLT::fixed_vector(4, 32)) {
1922 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1923 NegOpc = AArch64::NEGv4i32;
1924 } else if (Ty == LLT::fixed_vector(2, 32)) {
1925 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1926 NegOpc = AArch64::NEGv2i32;
1927 } else if (Ty == LLT::fixed_vector(4, 16)) {
1928 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1929 NegOpc = AArch64::NEGv4i16;
1930 } else if (Ty == LLT::fixed_vector(8, 16)) {
1931 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1932 NegOpc = AArch64::NEGv8i16;
1933 } else if (Ty == LLT::fixed_vector(16, 8)) {
1934 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1935 NegOpc = AArch64::NEGv16i8;
1936 } else if (Ty == LLT::fixed_vector(8, 8)) {
1937 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1938 NegOpc = AArch64::NEGv8i8;
1939 } else {
1940 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1941 return false;
1942 }
1943
1944 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1946 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1948 I.eraseFromParent();
1949 return true;
1950}
1951
1952bool AArch64InstructionSelector::selectVaStartAAPCS(
1953 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1954
1956 MF.getFunction().isVarArg()))
1957 return false;
1958
1959 // The layout of the va_list struct is specified in the AArch64 Procedure Call
1960 // Standard, section 10.1.5.
1961
1962 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1963 const unsigned PtrSize = STI.isTargetILP32() ? 4 : 8;
1964 const auto *PtrRegClass =
1965 STI.isTargetILP32() ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
1966
1967 const MCInstrDesc &MCIDAddAddr =
1968 TII.get(STI.isTargetILP32() ? AArch64::ADDWri : AArch64::ADDXri);
1969 const MCInstrDesc &MCIDStoreAddr =
1970 TII.get(STI.isTargetILP32() ? AArch64::STRWui : AArch64::STRXui);
1971
1972 /*
1973 * typedef struct va_list {
1974 * void * stack; // next stack param
1975 * void * gr_top; // end of GP arg reg save area
1976 * void * vr_top; // end of FP/SIMD arg reg save area
1977 * int gr_offs; // offset from gr_top to next GP register arg
1978 * int vr_offs; // offset from vr_top to next FP/SIMD register arg
1979 * } va_list;
1980 */
1981 const auto VAList = I.getOperand(0).getReg();
1982
1983 // Our current offset in bytes from the va_list struct (VAList).
1984 unsigned OffsetBytes = 0;
1985
1986 // Helper function to store (FrameIndex + Imm) to VAList at offset OffsetBytes
1987 // and increment OffsetBytes by PtrSize.
1988 const auto PushAddress = [&](const int FrameIndex, const int64_t Imm) {
1989 const Register Top = MRI.createVirtualRegister(PtrRegClass);
1990 auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDAddAddr)
1991 .addDef(Top)
1992 .addFrameIndex(FrameIndex)
1993 .addImm(Imm)
1994 .addImm(0);
1996
1997 const auto *MMO = *I.memoperands_begin();
1998 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), MCIDStoreAddr)
1999 .addUse(Top)
2000 .addUse(VAList)
2001 .addImm(OffsetBytes / PtrSize)
2003 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2004 MachineMemOperand::MOStore, PtrSize, MMO->getBaseAlign()));
2006
2007 OffsetBytes += PtrSize;
2008 };
2009
2010 // void* stack at offset 0
2011 PushAddress(FuncInfo->getVarArgsStackIndex(), 0);
2012
2013 // void* gr_top at offset 8 (4 on ILP32)
2014 const unsigned GPRSize = FuncInfo->getVarArgsGPRSize();
2015 PushAddress(FuncInfo->getVarArgsGPRIndex(), GPRSize);
2016
2017 // void* vr_top at offset 16 (8 on ILP32)
2018 const unsigned FPRSize = FuncInfo->getVarArgsFPRSize();
2019 PushAddress(FuncInfo->getVarArgsFPRIndex(), FPRSize);
2020
2021 // Helper function to store a 4-byte integer constant to VAList at offset
2022 // OffsetBytes, and increment OffsetBytes by 4.
2023 const auto PushIntConstant = [&](const int32_t Value) {
2024 constexpr int IntSize = 4;
2025 const Register Temp = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2026 auto MIB =
2027 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::MOVi32imm))
2028 .addDef(Temp)
2029 .addImm(Value);
2031
2032 const auto *MMO = *I.memoperands_begin();
2033 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRWui))
2034 .addUse(Temp)
2035 .addUse(VAList)
2036 .addImm(OffsetBytes / IntSize)
2038 MMO->getPointerInfo().getWithOffset(OffsetBytes),
2039 MachineMemOperand::MOStore, IntSize, MMO->getBaseAlign()));
2041 OffsetBytes += IntSize;
2042 };
2043
2044 // int gr_offs at offset 24 (12 on ILP32)
2045 PushIntConstant(-static_cast<int32_t>(GPRSize));
2046
2047 // int vr_offs at offset 28 (16 on ILP32)
2048 PushIntConstant(-static_cast<int32_t>(FPRSize));
2049
2050 assert(OffsetBytes == (STI.isTargetILP32() ? 20 : 32) && "Unexpected offset");
2051
2052 I.eraseFromParent();
2053 return true;
2054}
2055
2056bool AArch64InstructionSelector::selectVaStartDarwin(
2057 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
2058 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2059 Register ListReg = I.getOperand(0).getReg();
2060
2061 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2062
2063 int FrameIdx = FuncInfo->getVarArgsStackIndex();
2064 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
2066 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
2067 ? FuncInfo->getVarArgsGPRIndex()
2068 : FuncInfo->getVarArgsStackIndex();
2069 }
2070
2071 auto MIB =
2072 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
2073 .addDef(ArgsAddrReg)
2074 .addFrameIndex(FrameIdx)
2075 .addImm(0)
2076 .addImm(0);
2077
2079
2080 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
2081 .addUse(ArgsAddrReg)
2082 .addUse(ListReg)
2083 .addImm(0)
2084 .addMemOperand(*I.memoperands_begin());
2085
2087 I.eraseFromParent();
2088 return true;
2089}
2090
2091void AArch64InstructionSelector::materializeLargeCMVal(
2092 MachineInstr &I, const Value *V, unsigned OpFlags) {
2093 MachineBasicBlock &MBB = *I.getParent();
2094 MachineFunction &MF = *MBB.getParent();
2095 MachineRegisterInfo &MRI = MF.getRegInfo();
2096
2097 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
2098 MovZ->addOperand(MF, I.getOperand(1));
2099 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
2101 MovZ->addOperand(MF, MachineOperand::CreateImm(0));
2103
2104 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
2105 Register ForceDstReg) {
2106 Register DstReg = ForceDstReg
2107 ? ForceDstReg
2108 : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2109 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
2110 if (auto *GV = dyn_cast<GlobalValue>(V)) {
2111 MovI->addOperand(MF, MachineOperand::CreateGA(
2112 GV, MovZ->getOperand(1).getOffset(), Flags));
2113 } else {
2114 MovI->addOperand(
2116 MovZ->getOperand(1).getOffset(), Flags));
2117 }
2120 return DstReg;
2121 };
2122 Register DstReg = BuildMovK(MovZ.getReg(0),
2124 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
2125 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
2126}
2127
2128bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
2129 MachineBasicBlock &MBB = *I.getParent();
2130 MachineFunction &MF = *MBB.getParent();
2131 MachineRegisterInfo &MRI = MF.getRegInfo();
2132
2133 switch (I.getOpcode()) {
2134 case TargetOpcode::G_CONSTANT: {
2135 Register DefReg = I.getOperand(0).getReg();
2136 const LLT DefTy = MRI.getType(DefReg);
2137 if (!DefTy.isPointer())
2138 return false;
2139 const unsigned PtrSize = DefTy.getSizeInBits();
2140 if (PtrSize != 32 && PtrSize != 64)
2141 return false;
2142 // Convert pointer typed constants to integers so TableGen can select.
2143 MRI.setType(DefReg, LLT::scalar(PtrSize));
2144 return true;
2145 }
2146 case TargetOpcode::G_STORE: {
2147 bool Changed = contractCrossBankCopyIntoStore(I, MRI);
2148 MachineOperand &SrcOp = I.getOperand(0);
2149 if (MRI.getType(SrcOp.getReg()).isPointer()) {
2150 // Allow matching with imported patterns for stores of pointers. Unlike
2151 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
2152 // and constrain.
2153 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
2154 Register NewSrc = Copy.getReg(0);
2155 SrcOp.setReg(NewSrc);
2156 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
2157 Changed = true;
2158 }
2159 return Changed;
2160 }
2161 case TargetOpcode::G_PTR_ADD: {
2162 // If Checked Pointer Arithmetic (FEAT_CPA) is present, preserve the pointer
2163 // arithmetic semantics instead of falling back to regular arithmetic.
2164 const auto &TL = STI.getTargetLowering();
2165 if (TL->shouldPreservePtrArith(MF.getFunction(), EVT()))
2166 return false;
2167 return convertPtrAddToAdd(I, MRI);
2168 }
2169 case TargetOpcode::G_LOAD: {
2170 // For scalar loads of pointers, we try to convert the dest type from p0
2171 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
2172 // conversion, this should be ok because all users should have been
2173 // selected already, so the type doesn't matter for them.
2174 Register DstReg = I.getOperand(0).getReg();
2175 const LLT DstTy = MRI.getType(DstReg);
2176 if (!DstTy.isPointer())
2177 return false;
2178 MRI.setType(DstReg, LLT::scalar(64));
2179 return true;
2180 }
2181 case AArch64::G_DUP: {
2182 // Convert the type from p0 to s64 to help selection.
2183 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2184 if (!DstTy.isPointerVector())
2185 return false;
2186 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
2187 MRI.setType(I.getOperand(0).getReg(),
2188 DstTy.changeElementType(LLT::scalar(64)));
2189 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2190 I.getOperand(1).setReg(NewSrc.getReg(0));
2191 return true;
2192 }
2193 case AArch64::G_INSERT_VECTOR_ELT: {
2194 // Convert the type from p0 to s64 to help selection.
2195 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2196 LLT SrcVecTy = MRI.getType(I.getOperand(1).getReg());
2197 if (!SrcVecTy.isPointerVector())
2198 return false;
2199 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(2).getReg());
2200 MRI.setType(I.getOperand(1).getReg(),
2201 DstTy.changeElementType(LLT::scalar(64)));
2202 MRI.setType(I.getOperand(0).getReg(),
2203 DstTy.changeElementType(LLT::scalar(64)));
2204 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
2205 I.getOperand(2).setReg(NewSrc.getReg(0));
2206 return true;
2207 }
2208 case TargetOpcode::G_UITOFP:
2209 case TargetOpcode::G_SITOFP: {
2210 // If both source and destination regbanks are FPR, then convert the opcode
2211 // to G_SITOF so that the importer can select it to an fpr variant.
2212 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
2213 // copy.
2214 Register SrcReg = I.getOperand(1).getReg();
2215 LLT SrcTy = MRI.getType(SrcReg);
2216 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2217 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2218 return false;
2219
2220 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2221 if (I.getOpcode() == TargetOpcode::G_SITOFP)
2222 I.setDesc(TII.get(AArch64::G_SITOF));
2223 else
2224 I.setDesc(TII.get(AArch64::G_UITOF));
2225 return true;
2226 }
2227 return false;
2228 }
2229 default:
2230 return false;
2231 }
2232}
2233
2234/// This lowering tries to look for G_PTR_ADD instructions and then converts
2235/// them to a standard G_ADD with a COPY on the source.
2236///
2237/// The motivation behind this is to expose the add semantics to the imported
2238/// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2239/// because the selector works bottom up, uses before defs. By the time we
2240/// end up trying to select a G_PTR_ADD, we should have already attempted to
2241/// fold this into addressing modes and were therefore unsuccessful.
2242bool AArch64InstructionSelector::convertPtrAddToAdd(
2243 MachineInstr &I, MachineRegisterInfo &MRI) {
2244 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2245 Register DstReg = I.getOperand(0).getReg();
2246 Register AddOp1Reg = I.getOperand(1).getReg();
2247 const LLT PtrTy = MRI.getType(DstReg);
2248 if (PtrTy.getAddressSpace() != 0)
2249 return false;
2250
2251 const LLT CastPtrTy =
2252 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2253 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2254 // Set regbanks on the registers.
2255 if (PtrTy.isVector())
2256 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2257 else
2258 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2259
2260 // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2261 // %dst(intty) = G_ADD %intbase, off
2262 I.setDesc(TII.get(TargetOpcode::G_ADD));
2263 MRI.setType(DstReg, CastPtrTy);
2264 I.getOperand(1).setReg(PtrToInt.getReg(0));
2265 if (!select(*PtrToInt)) {
2266 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2267 return false;
2268 }
2269
2270 // Also take the opportunity here to try to do some optimization.
2271 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2272 Register NegatedReg;
2273 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2274 return true;
2275 I.getOperand(2).setReg(NegatedReg);
2276 I.setDesc(TII.get(TargetOpcode::G_SUB));
2277 return true;
2278}
2279
2280bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2281 MachineRegisterInfo &MRI) {
2282 // We try to match the immediate variant of LSL, which is actually an alias
2283 // for a special case of UBFM. Otherwise, we fall back to the imported
2284 // selector which will match the register variant.
2285 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2286 const auto &MO = I.getOperand(2);
2287 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2288 if (!VRegAndVal)
2289 return false;
2290
2291 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2292 if (DstTy.isVector())
2293 return false;
2294 bool Is64Bit = DstTy.getSizeInBits() == 64;
2295 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2296 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2297
2298 if (!Imm1Fn || !Imm2Fn)
2299 return false;
2300
2301 auto NewI =
2302 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2303 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2304
2305 for (auto &RenderFn : *Imm1Fn)
2306 RenderFn(NewI);
2307 for (auto &RenderFn : *Imm2Fn)
2308 RenderFn(NewI);
2309
2310 I.eraseFromParent();
2312 return true;
2313}
2314
2315bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2316 MachineInstr &I, MachineRegisterInfo &MRI) {
2317 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2318 // If we're storing a scalar, it doesn't matter what register bank that
2319 // scalar is on. All that matters is the size.
2320 //
2321 // So, if we see something like this (with a 32-bit scalar as an example):
2322 //
2323 // %x:gpr(s32) = ... something ...
2324 // %y:fpr(s32) = COPY %x:gpr(s32)
2325 // G_STORE %y:fpr(s32)
2326 //
2327 // We can fix this up into something like this:
2328 //
2329 // G_STORE %x:gpr(s32)
2330 //
2331 // And then continue the selection process normally.
2332 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2333 if (!DefDstReg.isValid())
2334 return false;
2335 LLT DefDstTy = MRI.getType(DefDstReg);
2336 Register StoreSrcReg = I.getOperand(0).getReg();
2337 LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2338
2339 // If we get something strange like a physical register, then we shouldn't
2340 // go any further.
2341 if (!DefDstTy.isValid())
2342 return false;
2343
2344 // Are the source and dst types the same size?
2345 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2346 return false;
2347
2348 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2349 RBI.getRegBank(DefDstReg, MRI, TRI))
2350 return false;
2351
2352 // We have a cross-bank copy, which is entering a store. Let's fold it.
2353 I.getOperand(0).setReg(DefDstReg);
2354 return true;
2355}
2356
2357bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2358 assert(I.getParent() && "Instruction should be in a basic block!");
2359 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2360
2361 MachineBasicBlock &MBB = *I.getParent();
2362 MachineFunction &MF = *MBB.getParent();
2363 MachineRegisterInfo &MRI = MF.getRegInfo();
2364
2365 switch (I.getOpcode()) {
2366 case AArch64::G_DUP: {
2367 // Before selecting a DUP instruction, check if it is better selected as a
2368 // MOV or load from a constant pool.
2369 Register Src = I.getOperand(1).getReg();
2370 auto ValAndVReg = getAnyConstantVRegValWithLookThrough(
2371 Src, MRI, /*LookThroughInstrs=*/true, /*LookThroughAnyExt=*/true);
2372 if (!ValAndVReg)
2373 return false;
2374 LLVMContext &Ctx = MF.getFunction().getContext();
2375 Register Dst = I.getOperand(0).getReg();
2377 MRI.getType(Dst).getNumElements(),
2378 ConstantInt::get(
2379 Type::getIntNTy(Ctx, MRI.getType(Dst).getScalarSizeInBits()),
2380 ValAndVReg->Value.trunc(MRI.getType(Dst).getScalarSizeInBits())));
2381 if (!emitConstantVector(Dst, CV, MIB, MRI))
2382 return false;
2383 I.eraseFromParent();
2384 return true;
2385 }
2386 case TargetOpcode::G_SEXT:
2387 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2388 // over a normal extend.
2389 if (selectUSMovFromExtend(I, MRI))
2390 return true;
2391 return false;
2392 case TargetOpcode::G_BR:
2393 return false;
2394 case TargetOpcode::G_SHL:
2395 return earlySelectSHL(I, MRI);
2396 case TargetOpcode::G_CONSTANT: {
2397 bool IsZero = false;
2398 if (I.getOperand(1).isCImm())
2399 IsZero = I.getOperand(1).getCImm()->isZero();
2400 else if (I.getOperand(1).isImm())
2401 IsZero = I.getOperand(1).getImm() == 0;
2402
2403 if (!IsZero)
2404 return false;
2405
2406 Register DefReg = I.getOperand(0).getReg();
2407 LLT Ty = MRI.getType(DefReg);
2408 if (Ty.getSizeInBits() == 64) {
2409 I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2410 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2411 } else if (Ty.getSizeInBits() <= 32) {
2412 I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2413 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2414 } else
2415 return false;
2416
2417 I.setDesc(TII.get(TargetOpcode::COPY));
2418 return true;
2419 }
2420
2421 case TargetOpcode::G_ADD: {
2422 // Check if this is being fed by a G_ICMP on either side.
2423 //
2424 // (cmp pred, x, y) + z
2425 //
2426 // In the above case, when the cmp is true, we increment z by 1. So, we can
2427 // fold the add into the cset for the cmp by using cinc.
2428 //
2429 // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2430 Register AddDst = I.getOperand(0).getReg();
2431 Register AddLHS = I.getOperand(1).getReg();
2432 Register AddRHS = I.getOperand(2).getReg();
2433 // Only handle scalars.
2434 LLT Ty = MRI.getType(AddLHS);
2435 if (Ty.isVector())
2436 return false;
2437 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2438 // bits.
2439 unsigned Size = Ty.getSizeInBits();
2440 if (Size != 32 && Size != 64)
2441 return false;
2442 auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2443 if (!MRI.hasOneNonDBGUse(Reg))
2444 return nullptr;
2445 // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2446 // compare.
2447 if (Size == 32)
2448 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2449 // We model scalar compares using 32-bit destinations right now.
2450 // If it's a 64-bit compare, it'll have 64-bit sources.
2451 Register ZExt;
2452 if (!mi_match(Reg, MRI,
2454 return nullptr;
2455 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2456 if (!Cmp ||
2457 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2458 return nullptr;
2459 return Cmp;
2460 };
2461 // Try to match
2462 // z + (cmp pred, x, y)
2463 MachineInstr *Cmp = MatchCmp(AddRHS);
2464 if (!Cmp) {
2465 // (cmp pred, x, y) + z
2466 std::swap(AddLHS, AddRHS);
2467 Cmp = MatchCmp(AddRHS);
2468 if (!Cmp)
2469 return false;
2470 }
2471 auto &PredOp = Cmp->getOperand(1);
2473 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2474 /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2475 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2477 CmpInst::getInversePredicate(Pred), Cmp->getOperand(3).getReg(), &MRI);
2478 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2479 I.eraseFromParent();
2480 return true;
2481 }
2482 case TargetOpcode::G_OR: {
2483 // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2484 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2485 // shifting and masking that we can replace with a BFI (encoded as a BFM).
2486 Register Dst = I.getOperand(0).getReg();
2487 LLT Ty = MRI.getType(Dst);
2488
2489 if (!Ty.isScalar())
2490 return false;
2491
2492 unsigned Size = Ty.getSizeInBits();
2493 if (Size != 32 && Size != 64)
2494 return false;
2495
2496 Register ShiftSrc;
2497 int64_t ShiftImm;
2498 Register MaskSrc;
2499 int64_t MaskImm;
2500 if (!mi_match(
2501 Dst, MRI,
2502 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2503 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2504 return false;
2505
2506 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2507 return false;
2508
2509 int64_t Immr = Size - ShiftImm;
2510 int64_t Imms = Size - ShiftImm - 1;
2511 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2512 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2513 I.eraseFromParent();
2514 return true;
2515 }
2516 case TargetOpcode::G_FENCE: {
2517 if (I.getOperand(1).getImm() == 0)
2518 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER));
2519 else
2520 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB))
2521 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb);
2522 I.eraseFromParent();
2523 return true;
2524 }
2525 default:
2526 return false;
2527 }
2528}
2529
2530bool AArch64InstructionSelector::select(MachineInstr &I) {
2531 assert(I.getParent() && "Instruction should be in a basic block!");
2532 assert(I.getParent()->getParent() && "Instruction should be in a function!");
2533
2534 MachineBasicBlock &MBB = *I.getParent();
2535 MachineFunction &MF = *MBB.getParent();
2536 MachineRegisterInfo &MRI = MF.getRegInfo();
2537
2538 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
2539 if (Subtarget->requiresStrictAlign()) {
2540 // We don't support this feature yet.
2541 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2542 return false;
2543 }
2544
2546
2547 unsigned Opcode = I.getOpcode();
2548 // G_PHI requires same handling as PHI
2549 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2550 // Certain non-generic instructions also need some special handling.
2551
2552 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) {
2554 return true;
2555 }
2556
2557 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2558 const Register DefReg = I.getOperand(0).getReg();
2559 const LLT DefTy = MRI.getType(DefReg);
2560
2561 const RegClassOrRegBank &RegClassOrBank =
2562 MRI.getRegClassOrRegBank(DefReg);
2563
2564 const TargetRegisterClass *DefRC =
2566 if (!DefRC) {
2567 if (!DefTy.isValid()) {
2568 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2569 return false;
2570 }
2571 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
2572 DefRC = getRegClassForTypeOnBank(DefTy, RB);
2573 if (!DefRC) {
2574 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2575 return false;
2576 }
2577 }
2578
2579 I.setDesc(TII.get(TargetOpcode::PHI));
2580
2581 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2582 }
2583
2584 if (I.isCopy())
2585 return selectCopy(I, TII, MRI, TRI, RBI);
2586
2587 if (I.isDebugInstr())
2588 return selectDebugInstr(I, MRI, RBI);
2589
2590 return true;
2591 }
2592
2593
2594 if (I.getNumOperands() != I.getNumExplicitOperands()) {
2595 LLVM_DEBUG(
2596 dbgs() << "Generic instruction has unexpected implicit operands\n");
2597 return false;
2598 }
2599
2600 // Try to do some lowering before we start instruction selecting. These
2601 // lowerings are purely transformations on the input G_MIR and so selection
2602 // must continue after any modification of the instruction.
2603 if (preISelLower(I)) {
2604 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2605 }
2606
2607 // There may be patterns where the importer can't deal with them optimally,
2608 // but does select it to a suboptimal sequence so our custom C++ selection
2609 // code later never has a chance to work on it. Therefore, we have an early
2610 // selection attempt here to give priority to certain selection routines
2611 // over the imported ones.
2612 if (earlySelect(I))
2613 return true;
2614
2615 if (selectImpl(I, *CoverageInfo))
2616 return true;
2617
2618 LLT Ty =
2619 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2620
2621 switch (Opcode) {
2622 case TargetOpcode::G_SBFX:
2623 case TargetOpcode::G_UBFX: {
2624 static const unsigned OpcTable[2][2] = {
2625 {AArch64::UBFMWri, AArch64::UBFMXri},
2626 {AArch64::SBFMWri, AArch64::SBFMXri}};
2627 bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2628 unsigned Size = Ty.getSizeInBits();
2629 unsigned Opc = OpcTable[IsSigned][Size == 64];
2630 auto Cst1 =
2631 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2632 assert(Cst1 && "Should have gotten a constant for src 1?");
2633 auto Cst2 =
2634 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2635 assert(Cst2 && "Should have gotten a constant for src 2?");
2636 auto LSB = Cst1->Value.getZExtValue();
2637 auto Width = Cst2->Value.getZExtValue();
2638 auto BitfieldInst =
2639 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2640 .addImm(LSB)
2641 .addImm(LSB + Width - 1);
2642 I.eraseFromParent();
2643 constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2644 return true;
2645 }
2646 case TargetOpcode::G_BRCOND:
2647 return selectCompareBranch(I, MF, MRI);
2648
2649 case TargetOpcode::G_BRINDIRECT: {
2650 const Function &Fn = MF.getFunction();
2651 if (std::optional<uint16_t> BADisc =
2653 auto MI = MIB.buildInstr(AArch64::BRA, {}, {I.getOperand(0).getReg()});
2654 MI.addImm(AArch64PACKey::IA);
2655 MI.addImm(*BADisc);
2656 MI.addReg(/*AddrDisc=*/AArch64::XZR);
2657 I.eraseFromParent();
2659 return true;
2660 }
2661 I.setDesc(TII.get(AArch64::BR));
2663 return true;
2664 }
2665
2666 case TargetOpcode::G_BRJT:
2667 return selectBrJT(I, MRI);
2668
2669 case AArch64::G_ADD_LOW: {
2670 // This op may have been separated from it's ADRP companion by the localizer
2671 // or some other code motion pass. Given that many CPUs will try to
2672 // macro fuse these operations anyway, select this into a MOVaddr pseudo
2673 // which will later be expanded into an ADRP+ADD pair after scheduling.
2674 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2675 if (BaseMI->getOpcode() != AArch64::ADRP) {
2676 I.setDesc(TII.get(AArch64::ADDXri));
2677 I.addOperand(MachineOperand::CreateImm(0));
2679 return true;
2680 }
2682 "Expected small code model");
2683 auto Op1 = BaseMI->getOperand(1);
2684 auto Op2 = I.getOperand(2);
2685 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2686 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2687 Op1.getTargetFlags())
2688 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2689 Op2.getTargetFlags());
2690 I.eraseFromParent();
2691 constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2692 return true;
2693 }
2694
2695 case TargetOpcode::G_FCONSTANT: {
2696 const Register DefReg = I.getOperand(0).getReg();
2697 const LLT DefTy = MRI.getType(DefReg);
2698 const unsigned DefSize = DefTy.getSizeInBits();
2699 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2700
2701 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB);
2702 // For 16, 64, and 128b values, emit a constant pool load.
2703 switch (DefSize) {
2704 default:
2705 llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2706 case 32:
2707 case 64: {
2708 bool OptForSize = shouldOptForSize(&MF);
2709 const auto &TLI = MF.getSubtarget().getTargetLowering();
2710 // If TLI says that this fpimm is illegal, then we'll expand to a
2711 // constant pool load.
2712 if (TLI->isFPImmLegal(I.getOperand(1).getFPImm()->getValueAPF(),
2713 EVT::getFloatingPointVT(DefSize), OptForSize))
2714 break;
2715 [[fallthrough]];
2716 }
2717 case 16:
2718 case 128: {
2719 auto *FPImm = I.getOperand(1).getFPImm();
2720 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2721 if (!LoadMI) {
2722 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2723 return false;
2724 }
2725 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2726 I.eraseFromParent();
2727 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2728 }
2729 }
2730
2731 assert((DefSize == 32 || DefSize == 64) && "Unexpected const def size");
2732 // Either emit a FMOV, or emit a copy to emit a normal mov.
2733 const Register DefGPRReg = MRI.createVirtualRegister(
2734 DefSize == 32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2735 MachineOperand &RegOp = I.getOperand(0);
2736 RegOp.setReg(DefGPRReg);
2737 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2738 MIB.buildCopy({DefReg}, {DefGPRReg});
2739
2740 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2741 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2742 return false;
2743 }
2744
2745 MachineOperand &ImmOp = I.getOperand(1);
2746 ImmOp.ChangeToImmediate(
2748
2749 const unsigned MovOpc =
2750 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2751 I.setDesc(TII.get(MovOpc));
2753 return true;
2754 }
2755 case TargetOpcode::G_EXTRACT: {
2756 Register DstReg = I.getOperand(0).getReg();
2757 Register SrcReg = I.getOperand(1).getReg();
2758 LLT SrcTy = MRI.getType(SrcReg);
2759 LLT DstTy = MRI.getType(DstReg);
2760 (void)DstTy;
2761 unsigned SrcSize = SrcTy.getSizeInBits();
2762
2763 if (SrcTy.getSizeInBits() > 64) {
2764 // This should be an extract of an s128, which is like a vector extract.
2765 if (SrcTy.getSizeInBits() != 128)
2766 return false;
2767 // Only support extracting 64 bits from an s128 at the moment.
2768 if (DstTy.getSizeInBits() != 64)
2769 return false;
2770
2771 unsigned Offset = I.getOperand(2).getImm();
2772 if (Offset % 64 != 0)
2773 return false;
2774
2775 // Check we have the right regbank always.
2776 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2777 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2778 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2779
2780 if (SrcRB.getID() == AArch64::GPRRegBankID) {
2781 auto NewI =
2782 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2783 .addUse(SrcReg, {},
2784 Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2785 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI,
2786 AArch64::GPR64RegClass, NewI->getOperand(0));
2787 I.eraseFromParent();
2788 return true;
2789 }
2790
2791 // Emit the same code as a vector extract.
2792 // Offset must be a multiple of 64.
2793 unsigned LaneIdx = Offset / 64;
2794 MachineInstr *Extract = emitExtractVectorElt(
2795 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2796 if (!Extract)
2797 return false;
2798 I.eraseFromParent();
2799 return true;
2800 }
2801
2802 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2803 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2804 Ty.getSizeInBits() - 1);
2805
2806 if (SrcSize < 64) {
2807 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2808 "unexpected G_EXTRACT types");
2810 return true;
2811 }
2812
2813 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2814 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2815 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2816 .addReg(DstReg, {}, AArch64::sub_32);
2817 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2818 AArch64::GPR32RegClass, MRI);
2819 I.getOperand(0).setReg(DstReg);
2820
2822 return true;
2823 }
2824
2825 case TargetOpcode::G_INSERT: {
2826 LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2827 LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2828 unsigned DstSize = DstTy.getSizeInBits();
2829 // Larger inserts are vectors, same-size ones should be something else by
2830 // now (split up or turned into COPYs).
2831 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2832 return false;
2833
2834 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2835 unsigned LSB = I.getOperand(3).getImm();
2836 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2837 I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2838 MachineInstrBuilder(MF, I).addImm(Width - 1);
2839
2840 if (DstSize < 64) {
2841 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2842 "unexpected G_INSERT types");
2844 return true;
2845 }
2846
2848 BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2849 TII.get(AArch64::SUBREG_TO_REG))
2850 .addDef(SrcReg)
2851 .addUse(I.getOperand(2).getReg())
2852 .addImm(AArch64::sub_32);
2853 RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2854 AArch64::GPR32RegClass, MRI);
2855 I.getOperand(2).setReg(SrcReg);
2856
2858 return true;
2859 }
2860 case TargetOpcode::G_FRAME_INDEX: {
2861 // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2862 if (Ty != LLT::pointer(0, 64)) {
2863 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2864 << ", expected: " << LLT::pointer(0, 64) << '\n');
2865 return false;
2866 }
2867 I.setDesc(TII.get(AArch64::ADDXri));
2868
2869 // MOs for a #0 shifted immediate.
2870 I.addOperand(MachineOperand::CreateImm(0));
2871 I.addOperand(MachineOperand::CreateImm(0));
2872
2874 return true;
2875 }
2876
2877 case TargetOpcode::G_GLOBAL_VALUE: {
2878 const GlobalValue *GV = nullptr;
2879 unsigned OpFlags;
2880 if (I.getOperand(1).isSymbol()) {
2881 OpFlags = I.getOperand(1).getTargetFlags();
2882 // Currently only used by "RtLibUseGOT".
2883 assert(OpFlags == AArch64II::MO_GOT);
2884 } else {
2885 GV = I.getOperand(1).getGlobal();
2886 if (GV->isThreadLocal()) {
2887 // We don't support instructions with emulated TLS variables yet
2888 if (TM.useEmulatedTLS())
2889 return false;
2890 return selectTLSGlobalValue(I, MRI);
2891 }
2892 OpFlags = STI.ClassifyGlobalReference(GV, TM);
2893 }
2894
2895 if (OpFlags & AArch64II::MO_GOT) {
2896 bool IsGOTSigned = MF.getInfo<AArch64FunctionInfo>()->hasELFSignedGOT();
2897 I.setDesc(TII.get(IsGOTSigned ? AArch64::LOADgotAUTH : AArch64::LOADgot));
2898 I.getOperand(1).setTargetFlags(OpFlags);
2899 I.addImplicitDefUseOperands(MF);
2900 } else if (TM.getCodeModel() == CodeModel::Large &&
2901 !TM.isPositionIndependent()) {
2902 // Materialize the global using movz/movk instructions.
2903 materializeLargeCMVal(I, GV, OpFlags);
2904 I.eraseFromParent();
2905 return true;
2906 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2907 I.setDesc(TII.get(AArch64::ADR));
2908 I.getOperand(1).setTargetFlags(OpFlags);
2909 } else {
2910 I.setDesc(TII.get(AArch64::MOVaddr));
2911 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2912 MachineInstrBuilder MIB(MF, I);
2913 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2915 }
2917 return true;
2918 }
2919
2920 case TargetOpcode::G_PTRAUTH_GLOBAL_VALUE:
2921 return selectPtrAuthGlobalValue(I, MRI);
2922
2923 case TargetOpcode::G_ZEXTLOAD:
2924 case TargetOpcode::G_LOAD:
2925 case TargetOpcode::G_STORE: {
2926 GLoadStore &LdSt = cast<GLoadStore>(I);
2927 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2928 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2929
2930 // Can only handle AddressSpace 0, 64-bit pointers.
2931 if (PtrTy != LLT::pointer(0, 64)) {
2932 return false;
2933 }
2934
2935 uint64_t MemSizeInBytes = LdSt.getMemSize().getValue();
2936 unsigned MemSizeInBits = LdSt.getMemSizeInBits().getValue();
2937 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2938
2939 // Need special instructions for atomics that affect ordering.
2940 if (isStrongerThanMonotonic(Order)) {
2941 assert(!isa<GZExtLoad>(LdSt));
2942 assert(MemSizeInBytes <= 8 &&
2943 "128-bit atomics should already be custom-legalized");
2944
2945 if (isa<GLoad>(LdSt)) {
2946 static constexpr unsigned LDAPROpcodes[] = {
2947 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX};
2948 static constexpr unsigned LDAROpcodes[] = {
2949 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX};
2950 ArrayRef<unsigned> Opcodes =
2951 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent
2952 ? LDAPROpcodes
2953 : LDAROpcodes;
2954 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2955 } else {
2956 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2957 AArch64::STLRW, AArch64::STLRX};
2958 Register ValReg = LdSt.getReg(0);
2959 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2960 // Emit a subreg copy of 32 bits.
2961 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2962 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2963 .addReg(I.getOperand(0).getReg(), {}, AArch64::sub_32);
2964 I.getOperand(0).setReg(NewVal);
2965 }
2966 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2967 }
2969 return true;
2970 }
2971
2972#ifndef NDEBUG
2973 const Register PtrReg = LdSt.getPointerReg();
2974 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2975 // Check that the pointer register is valid.
2976 assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2977 "Load/Store pointer operand isn't a GPR");
2978 assert(MRI.getType(PtrReg).isPointer() &&
2979 "Load/Store pointer operand isn't a pointer");
2980#endif
2981
2982 const Register ValReg = LdSt.getReg(0);
2983 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2984 LLT ValTy = MRI.getType(ValReg);
2985
2986 // The code below doesn't support truncating stores, so we need to split it
2987 // again.
2988 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2989 unsigned SubReg;
2990 LLT MemTy = LdSt.getMMO().getMemoryType();
2991 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
2992 if (!getSubRegForClass(RC, TRI, SubReg))
2993 return false;
2994
2995 // Generate a subreg copy.
2996 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2997 .addReg(ValReg, {}, SubReg)
2998 .getReg(0);
2999 RBI.constrainGenericRegister(Copy, *RC, MRI);
3000 LdSt.getOperand(0).setReg(Copy);
3001 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
3002 // If this is an any-extending load from the FPR bank, split it into a regular
3003 // load + extend.
3004 if (RB.getID() == AArch64::FPRRegBankID) {
3005 unsigned SubReg;
3006 LLT MemTy = LdSt.getMMO().getMemoryType();
3007 auto *RC = getRegClassForTypeOnBank(MemTy, RB);
3008 if (!getSubRegForClass(RC, TRI, SubReg))
3009 return false;
3010 Register OldDst = LdSt.getReg(0);
3011 Register NewDst =
3013 LdSt.getOperand(0).setReg(NewDst);
3014 MRI.setRegBank(NewDst, RB);
3015 // Generate a SUBREG_TO_REG to extend it.
3016 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
3017 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
3018 .addUse(NewDst)
3019 .addImm(SubReg);
3020 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB);
3021 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
3022 MIB.setInstr(LdSt);
3023 ValTy = MemTy; // This is no longer an extending load.
3024 }
3025 }
3026
3027 // Helper lambda for partially selecting I. Either returns the original
3028 // instruction with an updated opcode, or a new instruction.
3029 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
3030 bool IsStore = isa<GStore>(I);
3031 const unsigned NewOpc =
3032 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
3033 if (NewOpc == I.getOpcode())
3034 return nullptr;
3035 // Check if we can fold anything into the addressing mode.
3036 auto AddrModeFns =
3037 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
3038 if (!AddrModeFns) {
3039 // Can't fold anything. Use the original instruction.
3040 I.setDesc(TII.get(NewOpc));
3041 I.addOperand(MachineOperand::CreateImm(0));
3042 return &I;
3043 }
3044
3045 // Folded something. Create a new instruction and return it.
3046 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
3047 Register CurValReg = I.getOperand(0).getReg();
3048 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
3049 NewInst.cloneMemRefs(I);
3050 for (auto &Fn : *AddrModeFns)
3051 Fn(NewInst);
3052 I.eraseFromParent();
3053 return &*NewInst;
3054 };
3055
3056 MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
3057 if (!LoadStore)
3058 return false;
3059
3060 // If we're storing a 0, use WZR/XZR.
3061 if (Opcode == TargetOpcode::G_STORE) {
3063 LoadStore->getOperand(0).getReg(), MRI);
3064 if (CVal && CVal->Value == 0) {
3065 switch (LoadStore->getOpcode()) {
3066 case AArch64::STRWui:
3067 case AArch64::STRHHui:
3068 case AArch64::STRBBui:
3069 LoadStore->getOperand(0).setReg(AArch64::WZR);
3070 break;
3071 case AArch64::STRXui:
3072 LoadStore->getOperand(0).setReg(AArch64::XZR);
3073 break;
3074 }
3075 }
3076 }
3077
3078 if (IsZExtLoad || (Opcode == TargetOpcode::G_LOAD &&
3079 ValTy == LLT::scalar(64) && MemSizeInBits == 32)) {
3080 // The any/zextload from a smaller type to i32 should be handled by the
3081 // importer.
3082 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
3083 return false;
3084 // If we have an extending load then change the load's type to be a
3085 // narrower reg and zero_extend with SUBREG_TO_REG.
3086 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3087 Register DstReg = LoadStore->getOperand(0).getReg();
3088 LoadStore->getOperand(0).setReg(LdReg);
3089
3090 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
3091 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
3092 .addUse(LdReg)
3093 .addImm(AArch64::sub_32);
3094 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3095 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
3096 MRI);
3097 }
3098 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
3099 return true;
3100 }
3101
3102 case TargetOpcode::G_INDEXED_ZEXTLOAD:
3103 case TargetOpcode::G_INDEXED_SEXTLOAD:
3104 return selectIndexedExtLoad(I, MRI);
3105 case TargetOpcode::G_INDEXED_LOAD:
3106 return selectIndexedLoad(I, MRI);
3107 case TargetOpcode::G_INDEXED_STORE:
3108 return selectIndexedStore(cast<GIndexedStore>(I), MRI);
3109
3110 case TargetOpcode::G_LSHR:
3111 case TargetOpcode::G_ASHR:
3112 if (MRI.getType(I.getOperand(0).getReg()).isVector())
3113 return selectVectorAshrLshr(I, MRI);
3114 [[fallthrough]];
3115 case TargetOpcode::G_SHL:
3116 if (Opcode == TargetOpcode::G_SHL &&
3117 MRI.getType(I.getOperand(0).getReg()).isVector())
3118 return selectVectorSHL(I, MRI);
3119
3120 // These shifts were legalized to have 64 bit shift amounts because we
3121 // want to take advantage of the selection patterns that assume the
3122 // immediates are s64s, however, selectBinaryOp will assume both operands
3123 // will have the same bit size.
3124 {
3125 Register SrcReg = I.getOperand(1).getReg();
3126 Register ShiftReg = I.getOperand(2).getReg();
3127 const LLT ShiftTy = MRI.getType(ShiftReg);
3128 const LLT SrcTy = MRI.getType(SrcReg);
3129 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 &&
3130 ShiftTy.getSizeInBits() == 64) {
3131 assert(!ShiftTy.isVector() && "unexpected vector shift ty");
3132 // Insert a subregister copy to implement a 64->32 trunc
3133 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
3134 .addReg(ShiftReg, {}, AArch64::sub_32);
3135 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
3136 I.getOperand(2).setReg(Trunc.getReg(0));
3137 }
3138 }
3139 [[fallthrough]];
3140 case TargetOpcode::G_OR: {
3141 // Reject the various things we don't support yet.
3142 if (unsupportedBinOp(I, RBI, MRI, TRI))
3143 return false;
3144
3145 const unsigned OpSize = Ty.getSizeInBits();
3146
3147 const Register DefReg = I.getOperand(0).getReg();
3148 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
3149
3150 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
3151 if (NewOpc == I.getOpcode())
3152 return false;
3153
3154 I.setDesc(TII.get(NewOpc));
3155 // FIXME: Should the type be always reset in setDesc?
3156
3157 // Now that we selected an opcode, we need to constrain the register
3158 // operands to use appropriate classes.
3160 return true;
3161 }
3162
3163 case TargetOpcode::G_PTR_ADD: {
3164 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
3165 I.eraseFromParent();
3166 return true;
3167 }
3168
3169 case TargetOpcode::G_SADDE:
3170 case TargetOpcode::G_UADDE:
3171 case TargetOpcode::G_SSUBE:
3172 case TargetOpcode::G_USUBE:
3173 case TargetOpcode::G_SADDO:
3174 case TargetOpcode::G_UADDO:
3175 case TargetOpcode::G_SSUBO:
3176 case TargetOpcode::G_USUBO:
3177 return selectOverflowOp(I, MRI);
3178
3179 case TargetOpcode::G_PTRMASK: {
3180 Register MaskReg = I.getOperand(2).getReg();
3181 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3182 // TODO: Implement arbitrary cases
3183 if (!MaskVal || !isShiftedMask_64(*MaskVal))
3184 return false;
3185
3186 uint64_t Mask = *MaskVal;
3187 I.setDesc(TII.get(AArch64::ANDXri));
3188 I.getOperand(2).ChangeToImmediate(
3190
3192 return true;
3193 }
3194 case TargetOpcode::G_PTRTOINT:
3195 case TargetOpcode::G_TRUNC: {
3196 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3197 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3198
3199 const Register DstReg = I.getOperand(0).getReg();
3200 const Register SrcReg = I.getOperand(1).getReg();
3201
3202 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3203 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3204
3205 if (DstRB.getID() != SrcRB.getID()) {
3206 LLVM_DEBUG(
3207 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3208 return false;
3209 }
3210
3211 if (DstRB.getID() == AArch64::GPRRegBankID) {
3212 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3213 if (!DstRC)
3214 return false;
3215
3216 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB);
3217 if (!SrcRC)
3218 return false;
3219
3220 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3221 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3222 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3223 return false;
3224 }
3225
3226 if (DstRC == SrcRC) {
3227 // Nothing to be done
3228 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3229 SrcTy == LLT::scalar(64)) {
3230 llvm_unreachable("TableGen can import this case");
3231 return false;
3232 } else if (DstRC == &AArch64::GPR32RegClass &&
3233 SrcRC == &AArch64::GPR64RegClass) {
3234 I.getOperand(1).setSubReg(AArch64::sub_32);
3235 } else {
3236 LLVM_DEBUG(
3237 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3238 return false;
3239 }
3240
3241 I.setDesc(TII.get(TargetOpcode::COPY));
3242 return true;
3243 } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3244 if (DstTy == LLT::fixed_vector(4, 16) &&
3245 SrcTy == LLT::fixed_vector(4, 32)) {
3246 I.setDesc(TII.get(AArch64::XTNv4i16));
3248 return true;
3249 }
3250
3251 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3252 MachineInstr *Extract = emitExtractVectorElt(
3253 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3254 if (!Extract)
3255 return false;
3256 I.eraseFromParent();
3257 return true;
3258 }
3259
3260 // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3261 if (Opcode == TargetOpcode::G_PTRTOINT) {
3262 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3263 I.setDesc(TII.get(TargetOpcode::COPY));
3264 return selectCopy(I, TII, MRI, TRI, RBI);
3265 }
3266 }
3267
3268 return false;
3269 }
3270
3271 case TargetOpcode::G_ANYEXT: {
3272 if (selectUSMovFromExtend(I, MRI))
3273 return true;
3274
3275 const Register DstReg = I.getOperand(0).getReg();
3276 const Register SrcReg = I.getOperand(1).getReg();
3277
3278 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3279 if (RBDst.getID() != AArch64::GPRRegBankID) {
3280 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3281 << ", expected: GPR\n");
3282 return false;
3283 }
3284
3285 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3286 if (RBSrc.getID() != AArch64::GPRRegBankID) {
3287 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3288 << ", expected: GPR\n");
3289 return false;
3290 }
3291
3292 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3293
3294 if (DstSize == 0) {
3295 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3296 return false;
3297 }
3298
3299 if (DstSize != 64 && DstSize > 32) {
3300 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3301 << ", expected: 32 or 64\n");
3302 return false;
3303 }
3304 // At this point G_ANYEXT is just like a plain COPY, but we need
3305 // to explicitly form the 64-bit value if any.
3306 if (DstSize > 32) {
3307 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3308 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3309 .addDef(ExtSrc)
3310 .addUse(SrcReg)
3311 .addImm(AArch64::sub_32);
3312 I.getOperand(1).setReg(ExtSrc);
3313 }
3314 return selectCopy(I, TII, MRI, TRI, RBI);
3315 }
3316
3317 case TargetOpcode::G_ZEXT:
3318 case TargetOpcode::G_SEXT_INREG:
3319 case TargetOpcode::G_SEXT: {
3320 if (selectUSMovFromExtend(I, MRI))
3321 return true;
3322
3323 unsigned Opcode = I.getOpcode();
3324 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3325 const Register DefReg = I.getOperand(0).getReg();
3326 Register SrcReg = I.getOperand(1).getReg();
3327 const LLT DstTy = MRI.getType(DefReg);
3328 const LLT SrcTy = MRI.getType(SrcReg);
3329 unsigned DstSize = DstTy.getSizeInBits();
3330 unsigned SrcSize = SrcTy.getSizeInBits();
3331
3332 // SEXT_INREG has the same src reg size as dst, the size of the value to be
3333 // extended is encoded in the imm.
3334 if (Opcode == TargetOpcode::G_SEXT_INREG)
3335 SrcSize = I.getOperand(2).getImm();
3336
3337 if (DstTy.isVector())
3338 return false; // Should be handled by imported patterns.
3339
3340 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3341 AArch64::GPRRegBankID &&
3342 "Unexpected ext regbank");
3343
3344 MachineInstr *ExtI;
3345
3346 // First check if we're extending the result of a load which has a dest type
3347 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3348 // GPR register on AArch64 and all loads which are smaller automatically
3349 // zero-extend the upper bits. E.g.
3350 // %v(s8) = G_LOAD %p, :: (load 1)
3351 // %v2(s32) = G_ZEXT %v(s8)
3352 if (!IsSigned) {
3353 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3354 bool IsGPR =
3355 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3356 if (LoadMI && IsGPR) {
3357 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3358 unsigned BytesLoaded = MemOp->getSize().getValue();
3359 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3360 return selectCopy(I, TII, MRI, TRI, RBI);
3361 }
3362
3363 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3364 // + SUBREG_TO_REG.
3365 if (IsGPR && SrcSize == 32 && DstSize == 64) {
3366 Register SubregToRegSrc =
3367 MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3368 const Register ZReg = AArch64::WZR;
3369 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg})
3370 .addImm(0);
3371
3372 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3373 .addUse(SubregToRegSrc)
3374 .addImm(AArch64::sub_32);
3375
3376 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3377 MRI)) {
3378 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3379 return false;
3380 }
3381
3382 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3383 MRI)) {
3384 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3385 return false;
3386 }
3387
3388 I.eraseFromParent();
3389 return true;
3390 }
3391 }
3392
3393 if (DstSize == 64) {
3394 if (Opcode != TargetOpcode::G_SEXT_INREG) {
3395 // FIXME: Can we avoid manually doing this?
3396 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3397 MRI)) {
3398 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3399 << " operand\n");
3400 return false;
3401 }
3402 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3403 {&AArch64::GPR64RegClass}, {})
3404 .addUse(SrcReg)
3405 .addImm(AArch64::sub_32)
3406 .getReg(0);
3407 }
3408
3409 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3410 {DefReg}, {SrcReg})
3411 .addImm(0)
3412 .addImm(SrcSize - 1);
3413 } else if (DstSize <= 32) {
3414 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3415 {DefReg}, {SrcReg})
3416 .addImm(0)
3417 .addImm(SrcSize - 1);
3418 } else {
3419 return false;
3420 }
3421
3423 I.eraseFromParent();
3424 return true;
3425 }
3426
3427 case TargetOpcode::G_FREEZE:
3428 return selectCopy(I, TII, MRI, TRI, RBI);
3429
3430 case TargetOpcode::G_INTTOPTR:
3431 // The importer is currently unable to import pointer types since they
3432 // didn't exist in SelectionDAG.
3433 return selectCopy(I, TII, MRI, TRI, RBI);
3434
3435 case TargetOpcode::G_BITCAST:
3436 // Imported SelectionDAG rules can handle every bitcast except those that
3437 // bitcast from a type to the same type. Ideally, these shouldn't occur
3438 // but we might not run an optimizer that deletes them. The other exception
3439 // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3440 // of them.
3441 return selectCopy(I, TII, MRI, TRI, RBI);
3442
3443 case TargetOpcode::G_SELECT: {
3444 auto &Sel = cast<GSelect>(I);
3445 const Register CondReg = Sel.getCondReg();
3446 const Register TReg = Sel.getTrueReg();
3447 const Register FReg = Sel.getFalseReg();
3448
3449 if (tryOptSelect(Sel))
3450 return true;
3451
3452 // Make sure to use an unused vreg instead of wzr, so that the peephole
3453 // optimizations will be able to optimize these.
3454 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3455 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3456 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3458 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB))
3459 return false;
3460 Sel.eraseFromParent();
3461 return true;
3462 }
3463 case TargetOpcode::G_ICMP: {
3464 if (Ty.isVector())
3465 return false;
3466
3467 if (Ty != LLT::scalar(32)) {
3468 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3469 << ", expected: " << LLT::scalar(32) << '\n');
3470 return false;
3471 }
3472
3473 auto &PredOp = I.getOperand(1);
3474 emitIntegerCompare(I.getOperand(2), I.getOperand(3), PredOp, MIB);
3475 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
3477 CmpInst::getInversePredicate(Pred), I.getOperand(3).getReg(), &MRI);
3478 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3479 /*Src2=*/AArch64::WZR, InvCC, MIB);
3480 I.eraseFromParent();
3481 return true;
3482 }
3483
3484 case TargetOpcode::G_FCMP: {
3485 CmpInst::Predicate Pred =
3486 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3487 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3488 Pred) ||
3489 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3490 return false;
3491 I.eraseFromParent();
3492 return true;
3493 }
3494 case TargetOpcode::G_VASTART:
3495 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3496 : selectVaStartAAPCS(I, MF, MRI);
3497 case TargetOpcode::G_INTRINSIC:
3498 return selectIntrinsic(I, MRI);
3499 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3500 return selectIntrinsicWithSideEffects(I, MRI);
3501 case TargetOpcode::G_IMPLICIT_DEF: {
3502 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3503 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3504 const Register DstReg = I.getOperand(0).getReg();
3505 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3506 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB);
3507 RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3508 return true;
3509 }
3510 case TargetOpcode::G_BLOCK_ADDR: {
3511 Function *BAFn = I.getOperand(1).getBlockAddress()->getFunction();
3512 if (std::optional<uint16_t> BADisc =
3514 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
3515 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
3516 MIB.buildInstr(AArch64::MOVaddrPAC)
3517 .addBlockAddress(I.getOperand(1).getBlockAddress())
3519 .addReg(/*AddrDisc=*/AArch64::XZR)
3520 .addImm(*BADisc)
3521 .constrainAllUses(TII, TRI, RBI);
3522 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X16));
3523 RBI.constrainGenericRegister(I.getOperand(0).getReg(),
3524 AArch64::GPR64RegClass, MRI);
3525 I.eraseFromParent();
3526 return true;
3527 }
3529 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3530 I.eraseFromParent();
3531 return true;
3532 } else {
3533 I.setDesc(TII.get(AArch64::MOVaddrBA));
3534 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3535 I.getOperand(0).getReg())
3536 .addBlockAddress(I.getOperand(1).getBlockAddress(),
3537 /* Offset */ 0, AArch64II::MO_PAGE)
3539 I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3541 I.eraseFromParent();
3543 return true;
3544 }
3545 }
3546 case AArch64::G_DUP: {
3547 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3548 // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3549 // difficult because at RBS we may end up pessimizing the fpr case if we
3550 // decided to add an anyextend to fix this. Manual selection is the most
3551 // robust solution for now.
3552 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3553 AArch64::GPRRegBankID)
3554 return false; // We expect the fpr regbank case to be imported.
3555 LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3556 if (VecTy == LLT::fixed_vector(8, 8))
3557 I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3558 else if (VecTy == LLT::fixed_vector(16, 8))
3559 I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3560 else if (VecTy == LLT::fixed_vector(4, 16))
3561 I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3562 else if (VecTy == LLT::fixed_vector(8, 16))
3563 I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3564 else
3565 return false;
3567 return true;
3568 }
3569 case TargetOpcode::G_BUILD_VECTOR:
3570 return selectBuildVector(I, MRI);
3571 case TargetOpcode::G_MERGE_VALUES:
3572 return selectMergeValues(I, MRI);
3573 case TargetOpcode::G_UNMERGE_VALUES:
3574 return selectUnmergeValues(I, MRI);
3575 case TargetOpcode::G_SHUFFLE_VECTOR:
3576 return selectShuffleVector(I, MRI);
3577 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3578 return selectExtractElt(I, MRI);
3579 case TargetOpcode::G_CONCAT_VECTORS:
3580 return selectConcatVectors(I, MRI);
3581 case TargetOpcode::G_JUMP_TABLE:
3582 return selectJumpTable(I, MRI);
3583 case TargetOpcode::G_MEMCPY:
3584 case TargetOpcode::G_MEMCPY_INLINE:
3585 case TargetOpcode::G_MEMMOVE:
3586 case TargetOpcode::G_MEMSET:
3587 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature");
3588 return selectMOPS(I, MRI);
3589 }
3590
3591 return false;
3592}
3593
3594bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) {
3595 MachineIRBuilderState OldMIBState = MIB.getState();
3596 bool Success = select(I);
3597 MIB.setState(OldMIBState);
3598 return Success;
3599}
3600
3601bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI,
3602 MachineRegisterInfo &MRI) {
3603 unsigned Mopcode;
3604 switch (GI.getOpcode()) {
3605 case TargetOpcode::G_MEMCPY:
3606 case TargetOpcode::G_MEMCPY_INLINE:
3607 Mopcode = AArch64::MOPSMemoryCopyPseudo;
3608 break;
3609 case TargetOpcode::G_MEMMOVE:
3610 Mopcode = AArch64::MOPSMemoryMovePseudo;
3611 break;
3612 case TargetOpcode::G_MEMSET:
3613 // For tagged memset see llvm.aarch64.mops.memset.tag
3614 Mopcode = AArch64::MOPSMemorySetPseudo;
3615 break;
3616 }
3617
3618 auto &DstPtr = GI.getOperand(0);
3619 auto &SrcOrVal = GI.getOperand(1);
3620 auto &Size = GI.getOperand(2);
3621
3622 // Create copies of the registers that can be clobbered.
3623 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg());
3624 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg());
3625 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg());
3626
3627 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo;
3628 const auto &SrcValRegClass =
3629 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass;
3630
3631 // Constrain to specific registers
3632 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI);
3633 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI);
3634 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI);
3635
3636 MIB.buildCopy(DstPtrCopy, DstPtr);
3637 MIB.buildCopy(SrcValCopy, SrcOrVal);
3638 MIB.buildCopy(SizeCopy, Size);
3639
3640 // New instruction uses the copied registers because it must update them.
3641 // The defs are not used since they don't exist in G_MEM*. They are still
3642 // tied.
3643 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE
3644 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
3645 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3646 if (IsSet) {
3647 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize},
3648 {DstPtrCopy, SizeCopy, SrcValCopy});
3649 } else {
3650 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass);
3651 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize},
3652 {DstPtrCopy, SrcValCopy, SizeCopy});
3653 }
3654
3655 GI.eraseFromParent();
3656 return true;
3657}
3658
3659bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3660 MachineRegisterInfo &MRI) {
3661 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3662 Register JTAddr = I.getOperand(0).getReg();
3663 unsigned JTI = I.getOperand(1).getIndex();
3664 Register Index = I.getOperand(2).getReg();
3665
3666 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3667
3668 // With aarch64-jump-table-hardening, we only expand the jump table dispatch
3669 // sequence later, to guarantee the integrity of the intermediate values.
3670 if (MF->getFunction().hasFnAttribute("aarch64-jump-table-hardening")) {
3672 if (STI.isTargetMachO()) {
3673 if (CM != CodeModel::Small && CM != CodeModel::Large)
3674 report_fatal_error("Unsupported code-model for hardened jump-table");
3675 } else {
3676 // Note that COFF support would likely also need JUMP_TABLE_DEBUG_INFO.
3677 assert(STI.isTargetELF() &&
3678 "jump table hardening only supported on MachO/ELF");
3679 if (CM != CodeModel::Small)
3680 report_fatal_error("Unsupported code-model for hardened jump-table");
3681 }
3682
3683 MIB.buildCopy({AArch64::X16}, I.getOperand(2).getReg());
3684 MIB.buildInstr(AArch64::BR_JumpTable)
3685 .addJumpTableIndex(I.getOperand(1).getIndex());
3686 I.eraseFromParent();
3687 return true;
3688 }
3689
3690 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3691 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3692
3693 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3694 {TargetReg, ScratchReg}, {JTAddr, Index})
3695 .addJumpTableIndex(JTI);
3696 // Save the jump table info.
3697 MIB.buildInstr(TargetOpcode::JUMP_TABLE_DEBUG_INFO, {},
3698 {static_cast<int64_t>(JTI)});
3699 // Build the indirect branch.
3700 MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3701 I.eraseFromParent();
3702 constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3703 return true;
3704}
3705
3706bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3707 MachineRegisterInfo &MRI) {
3708 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3709 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3710
3711 Register DstReg = I.getOperand(0).getReg();
3712 unsigned JTI = I.getOperand(1).getIndex();
3713 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3714 auto MovMI =
3715 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3716 .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3718 I.eraseFromParent();
3720 return true;
3721}
3722
3723bool AArch64InstructionSelector::selectTLSGlobalValue(
3724 MachineInstr &I, MachineRegisterInfo &MRI) {
3725 if (!STI.isTargetMachO())
3726 return false;
3727 MachineFunction &MF = *I.getParent()->getParent();
3728 MF.getFrameInfo().setAdjustsStack(true);
3729
3730 const auto &GlobalOp = I.getOperand(1);
3731 assert(GlobalOp.getOffset() == 0 &&
3732 "Shouldn't have an offset on TLS globals!");
3733 const GlobalValue &GV = *GlobalOp.getGlobal();
3734
3735 auto LoadGOT =
3736 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3737 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3738
3739 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3740 {LoadGOT.getReg(0)})
3741 .addImm(0);
3742
3743 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3744 // TLS calls preserve all registers except those that absolutely must be
3745 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3746 // silly).
3747 unsigned Opcode = getBLRCallOpcode(MF);
3748
3749 // With ptrauth-calls, the tlv access thunk pointer is authenticated (IA, 0).
3750 if (MF.getFunction().hasFnAttribute("ptrauth-calls")) {
3751 assert(Opcode == AArch64::BLR);
3752 Opcode = AArch64::BLRAAZ;
3753 }
3754
3755 MIB.buildInstr(Opcode, {}, {Load})
3756 .addUse(AArch64::X0, RegState::Implicit)
3757 .addDef(AArch64::X0, RegState::Implicit)
3758 .addRegMask(TRI.getTLSCallPreservedMask());
3759
3760 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3761 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3762 MRI);
3763 I.eraseFromParent();
3764 return true;
3765}
3766
3767MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3768 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3769 MachineIRBuilder &MIRBuilder) const {
3770 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3771
3772 auto BuildFn = [&](unsigned SubregIndex) {
3773 auto Ins =
3774 MIRBuilder
3775 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3776 .addImm(SubregIndex);
3779 return &*Ins;
3780 };
3781
3782 switch (EltSize) {
3783 case 8:
3784 return BuildFn(AArch64::bsub);
3785 case 16:
3786 return BuildFn(AArch64::hsub);
3787 case 32:
3788 return BuildFn(AArch64::ssub);
3789 case 64:
3790 return BuildFn(AArch64::dsub);
3791 default:
3792 return nullptr;
3793 }
3794}
3795
3796MachineInstr *
3797AArch64InstructionSelector::emitNarrowVector(Register DstReg, Register SrcReg,
3798 MachineIRBuilder &MIB,
3799 MachineRegisterInfo &MRI) const {
3800 LLT DstTy = MRI.getType(DstReg);
3801 const TargetRegisterClass *RC =
3802 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(SrcReg, MRI, TRI));
3803 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
3804 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
3805 return nullptr;
3806 }
3807 unsigned SubReg = 0;
3808 if (!getSubRegForClass(RC, TRI, SubReg))
3809 return nullptr;
3810 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
3811 LLVM_DEBUG(dbgs() << "Unsupported destination size! ("
3812 << DstTy.getSizeInBits() << "\n");
3813 return nullptr;
3814 }
3815 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3816 .addReg(SrcReg, {}, SubReg);
3817 RBI.constrainGenericRegister(DstReg, *RC, MRI);
3818 return Copy;
3819}
3820
3821bool AArch64InstructionSelector::selectMergeValues(
3822 MachineInstr &I, MachineRegisterInfo &MRI) {
3823 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3824 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3825 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3826 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3827 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3828
3829 if (I.getNumOperands() != 3)
3830 return false;
3831
3832 // Merging 2 s64s into an s128.
3833 if (DstTy == LLT::scalar(128)) {
3834 if (SrcTy.getSizeInBits() != 64)
3835 return false;
3836 Register DstReg = I.getOperand(0).getReg();
3837 Register Src1Reg = I.getOperand(1).getReg();
3838 Register Src2Reg = I.getOperand(2).getReg();
3839 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3840 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg,
3841 /* LaneIdx */ 0, RB, MIB);
3842 if (!InsMI)
3843 return false;
3844 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3845 Src2Reg, /* LaneIdx */ 1, RB, MIB);
3846 if (!Ins2MI)
3847 return false;
3850 I.eraseFromParent();
3851 return true;
3852 }
3853
3854 if (RB.getID() != AArch64::GPRRegBankID)
3855 return false;
3856
3857 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3858 return false;
3859
3860 auto *DstRC = &AArch64::GPR64RegClass;
3861 Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3862 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3863 TII.get(TargetOpcode::SUBREG_TO_REG))
3864 .addDef(SubToRegDef)
3865 .addUse(I.getOperand(1).getReg())
3866 .addImm(AArch64::sub_32);
3867 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3868 // Need to anyext the second scalar before we can use bfm
3869 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3870 TII.get(TargetOpcode::SUBREG_TO_REG))
3871 .addDef(SubToRegDef2)
3872 .addUse(I.getOperand(2).getReg())
3873 .addImm(AArch64::sub_32);
3874 MachineInstr &BFM =
3875 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3876 .addDef(I.getOperand(0).getReg())
3877 .addUse(SubToRegDef)
3878 .addUse(SubToRegDef2)
3879 .addImm(32)
3880 .addImm(31);
3881 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3882 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3884 I.eraseFromParent();
3885 return true;
3886}
3887
3888static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3889 const unsigned EltSize) {
3890 // Choose a lane copy opcode and subregister based off of the size of the
3891 // vector's elements.
3892 switch (EltSize) {
3893 case 8:
3894 CopyOpc = AArch64::DUPi8;
3895 ExtractSubReg = AArch64::bsub;
3896 break;
3897 case 16:
3898 CopyOpc = AArch64::DUPi16;
3899 ExtractSubReg = AArch64::hsub;
3900 break;
3901 case 32:
3902 CopyOpc = AArch64::DUPi32;
3903 ExtractSubReg = AArch64::ssub;
3904 break;
3905 case 64:
3906 CopyOpc = AArch64::DUPi64;
3907 ExtractSubReg = AArch64::dsub;
3908 break;
3909 default:
3910 // Unknown size, bail out.
3911 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3912 return false;
3913 }
3914 return true;
3915}
3916
3917MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3918 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3919 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3920 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3921 unsigned CopyOpc = 0;
3922 unsigned ExtractSubReg = 0;
3923 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3924 LLVM_DEBUG(
3925 dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3926 return nullptr;
3927 }
3928
3929 const TargetRegisterClass *DstRC =
3930 getRegClassForTypeOnBank(ScalarTy, DstRB, true);
3931 if (!DstRC) {
3932 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3933 return nullptr;
3934 }
3935
3936 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3937 const LLT &VecTy = MRI.getType(VecReg);
3938 const TargetRegisterClass *VecRC =
3939 getRegClassForTypeOnBank(VecTy, VecRB, true);
3940 if (!VecRC) {
3941 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3942 return nullptr;
3943 }
3944
3945 // The register that we're going to copy into.
3946 Register InsertReg = VecReg;
3947 if (!DstReg)
3948 DstReg = MRI.createVirtualRegister(DstRC);
3949 // If the lane index is 0, we just use a subregister COPY.
3950 if (LaneIdx == 0) {
3951 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3952 .addReg(VecReg, {}, ExtractSubReg);
3953 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3954 return &*Copy;
3955 }
3956
3957 // Lane copies require 128-bit wide registers. If we're dealing with an
3958 // unpacked vector, then we need to move up to that width. Insert an implicit
3959 // def and a subregister insert to get us there.
3960 if (VecTy.getSizeInBits() != 128) {
3961 MachineInstr *ScalarToVector = emitScalarToVector(
3962 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3963 if (!ScalarToVector)
3964 return nullptr;
3965 InsertReg = ScalarToVector->getOperand(0).getReg();
3966 }
3967
3968 MachineInstr *LaneCopyMI =
3969 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3970 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3971
3972 // Make sure that we actually constrain the initial copy.
3973 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3974 return LaneCopyMI;
3975}
3976
3977bool AArch64InstructionSelector::selectExtractElt(
3978 MachineInstr &I, MachineRegisterInfo &MRI) {
3979 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3980 "unexpected opcode!");
3981 Register DstReg = I.getOperand(0).getReg();
3982 const LLT NarrowTy = MRI.getType(DstReg);
3983 const Register SrcReg = I.getOperand(1).getReg();
3984 const LLT WideTy = MRI.getType(SrcReg);
3985 (void)WideTy;
3986 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3987 "source register size too small!");
3988 assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
3989
3990 // Need the lane index to determine the correct copy opcode.
3991 MachineOperand &LaneIdxOp = I.getOperand(2);
3992 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3993
3994 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3995 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3996 return false;
3997 }
3998
3999 // Find the index to extract from.
4000 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4001 if (!VRegAndVal)
4002 return false;
4003 unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4004
4005
4006 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4007 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4008 LaneIdx, MIB);
4009 if (!Extract)
4010 return false;
4011
4012 I.eraseFromParent();
4013 return true;
4014}
4015
4016bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4017 MachineInstr &I, MachineRegisterInfo &MRI) {
4018 unsigned NumElts = I.getNumOperands() - 1;
4019 Register SrcReg = I.getOperand(NumElts).getReg();
4020 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4021 const LLT SrcTy = MRI.getType(SrcReg);
4022
4023 assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4024 if (SrcTy.getSizeInBits() > 128) {
4025 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4026 return false;
4027 }
4028
4029 // We implement a split vector operation by treating the sub-vectors as
4030 // scalars and extracting them.
4031 const RegisterBank &DstRB =
4032 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4033 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4034 Register Dst = I.getOperand(OpIdx).getReg();
4035 MachineInstr *Extract =
4036 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4037 if (!Extract)
4038 return false;
4039 }
4040 I.eraseFromParent();
4041 return true;
4042}
4043
4044bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4045 MachineRegisterInfo &MRI) {
4046 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4047 "unexpected opcode");
4048
4049 // TODO: Handle unmerging into GPRs and from scalars to scalars.
4050 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4051 AArch64::FPRRegBankID ||
4052 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4053 AArch64::FPRRegBankID) {
4054 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4055 "currently unsupported.\n");
4056 return false;
4057 }
4058
4059 // The last operand is the vector source register, and every other operand is
4060 // a register to unpack into.
4061 unsigned NumElts = I.getNumOperands() - 1;
4062 Register SrcReg = I.getOperand(NumElts).getReg();
4063 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4064 const LLT WideTy = MRI.getType(SrcReg);
4065
4066 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4067 "source register size too small!");
4068
4069 if (!NarrowTy.isScalar())
4070 return selectSplitVectorUnmerge(I, MRI);
4071
4072 // Choose a lane copy opcode and subregister based off of the size of the
4073 // vector's elements.
4074 unsigned CopyOpc = 0;
4075 unsigned ExtractSubReg = 0;
4076 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4077 return false;
4078
4079 // Set up for the lane copies.
4080 MachineBasicBlock &MBB = *I.getParent();
4081
4082 // Stores the registers we'll be copying from.
4083 SmallVector<Register, 4> InsertRegs;
4084
4085 // We'll use the first register twice, so we only need NumElts-1 registers.
4086 unsigned NumInsertRegs = NumElts - 1;
4087
4088 // If our elements fit into exactly 128 bits, then we can copy from the source
4089 // directly. Otherwise, we need to do a bit of setup with some subregister
4090 // inserts.
4091 if (NarrowTy.getSizeInBits() * NumElts == 128) {
4092 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4093 } else {
4094 // No. We have to perform subregister inserts. For each insert, create an
4095 // implicit def and a subregister insert, and save the register we create.
4096 // For scalar sources, treat as a pseudo-vector of NarrowTy elements.
4097 unsigned EltSize = WideTy.isVector() ? WideTy.getScalarSizeInBits()
4098 : NarrowTy.getSizeInBits();
4099 const TargetRegisterClass *RC = getRegClassForTypeOnBank(
4100 LLT::fixed_vector(NumElts, EltSize), *RBI.getRegBank(SrcReg, MRI, TRI));
4101 unsigned SubReg = 0;
4102 bool Found = getSubRegForClass(RC, TRI, SubReg);
4103 (void)Found;
4104 assert(Found && "expected to find last operand's subeg idx");
4105 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4106 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4107 MachineInstr &ImpDefMI =
4108 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4109 ImpDefReg);
4110
4111 // Now, create the subregister insert from SrcReg.
4112 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4113 MachineInstr &InsMI =
4114 *BuildMI(MBB, I, I.getDebugLoc(),
4115 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4116 .addUse(ImpDefReg)
4117 .addUse(SrcReg)
4118 .addImm(SubReg);
4119
4120 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4122
4123 // Save the register so that we can copy from it after.
4124 InsertRegs.push_back(InsertReg);
4125 }
4126 }
4127
4128 // Now that we've created any necessary subregister inserts, we can
4129 // create the copies.
4130 //
4131 // Perform the first copy separately as a subregister copy.
4132 Register CopyTo = I.getOperand(0).getReg();
4133 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4134 .addReg(InsertRegs[0], {}, ExtractSubReg);
4135 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4136
4137 // Now, perform the remaining copies as vector lane copies.
4138 unsigned LaneIdx = 1;
4139 for (Register InsReg : InsertRegs) {
4140 Register CopyTo = I.getOperand(LaneIdx).getReg();
4141 MachineInstr &CopyInst =
4142 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4143 .addUse(InsReg)
4144 .addImm(LaneIdx);
4145 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4146 ++LaneIdx;
4147 }
4148
4149 // Separately constrain the first copy's destination. Because of the
4150 // limitation in constrainOperandRegClass, we can't guarantee that this will
4151 // actually be constrained. So, do it ourselves using the second operand.
4152 const TargetRegisterClass *RC =
4153 MRI.getRegClassOrNull(I.getOperand(1).getReg());
4154 if (!RC) {
4155 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4156 return false;
4157 }
4158
4159 RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4160 I.eraseFromParent();
4161 return true;
4162}
4163
4164bool AArch64InstructionSelector::selectConcatVectors(
4165 MachineInstr &I, MachineRegisterInfo &MRI) {
4166 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4167 "Unexpected opcode");
4168 Register Dst = I.getOperand(0).getReg();
4169 Register Op1 = I.getOperand(1).getReg();
4170 Register Op2 = I.getOperand(2).getReg();
4171 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4172 if (!ConcatMI)
4173 return false;
4174 I.eraseFromParent();
4175 return true;
4176}
4177
4178unsigned
4179AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4180 MachineFunction &MF) const {
4181 Type *CPTy = CPVal->getType();
4182 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4183
4184 MachineConstantPool *MCP = MF.getConstantPool();
4185 return MCP->getConstantPoolIndex(CPVal, Alignment);
4186}
4187
4188MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4189 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4190 const TargetRegisterClass *RC;
4191 unsigned Opc;
4192 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
4193 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4194 switch (Size) {
4195 case 16:
4196 RC = &AArch64::FPR128RegClass;
4197 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
4198 break;
4199 case 8:
4200 RC = &AArch64::FPR64RegClass;
4201 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
4202 break;
4203 case 4:
4204 RC = &AArch64::FPR32RegClass;
4205 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
4206 break;
4207 case 2:
4208 RC = &AArch64::FPR16RegClass;
4209 Opc = AArch64::LDRHui;
4210 break;
4211 default:
4212 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4213 << *CPVal->getType());
4214 return nullptr;
4215 }
4216
4217 MachineInstr *LoadMI = nullptr;
4218 auto &MF = MIRBuilder.getMF();
4219 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4220 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
4221 // Use load(literal) for tiny code model.
4222 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
4223 } else {
4224 auto Adrp =
4225 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4226 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4227
4228 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
4229 .addConstantPoolIndex(
4231
4233 }
4234
4235 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4236 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4238 Size, Align(Size)));
4240 return LoadMI;
4241}
4242
4243/// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4244/// size and RB.
4245static std::pair<unsigned, unsigned>
4246getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4247 unsigned Opc, SubregIdx;
4248 if (RB.getID() == AArch64::GPRRegBankID) {
4249 if (EltSize == 8) {
4250 Opc = AArch64::INSvi8gpr;
4251 SubregIdx = AArch64::bsub;
4252 } else if (EltSize == 16) {
4253 Opc = AArch64::INSvi16gpr;
4254 SubregIdx = AArch64::ssub;
4255 } else if (EltSize == 32) {
4256 Opc = AArch64::INSvi32gpr;
4257 SubregIdx = AArch64::ssub;
4258 } else if (EltSize == 64) {
4259 Opc = AArch64::INSvi64gpr;
4260 SubregIdx = AArch64::dsub;
4261 } else {
4262 llvm_unreachable("invalid elt size!");
4263 }
4264 } else {
4265 if (EltSize == 8) {
4266 Opc = AArch64::INSvi8lane;
4267 SubregIdx = AArch64::bsub;
4268 } else if (EltSize == 16) {
4269 Opc = AArch64::INSvi16lane;
4270 SubregIdx = AArch64::hsub;
4271 } else if (EltSize == 32) {
4272 Opc = AArch64::INSvi32lane;
4273 SubregIdx = AArch64::ssub;
4274 } else if (EltSize == 64) {
4275 Opc = AArch64::INSvi64lane;
4276 SubregIdx = AArch64::dsub;
4277 } else {
4278 llvm_unreachable("invalid elt size!");
4279 }
4280 }
4281 return std::make_pair(Opc, SubregIdx);
4282}
4283
4284MachineInstr *AArch64InstructionSelector::emitInstr(
4285 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4286 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4287 const ComplexRendererFns &RenderFns) const {
4288 assert(Opcode && "Expected an opcode?");
4289 assert(!isPreISelGenericOpcode(Opcode) &&
4290 "Function should only be used to produce selected instructions!");
4291 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4292 if (RenderFns)
4293 for (auto &Fn : *RenderFns)
4294 Fn(MI);
4296 return &*MI;
4297}
4298
4299MachineInstr *AArch64InstructionSelector::emitAddSub(
4300 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4301 Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4302 MachineIRBuilder &MIRBuilder) const {
4303 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4304 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4305 auto Ty = MRI.getType(LHS.getReg());
4306 assert(!Ty.isVector() && "Expected a scalar or pointer?");
4307 unsigned Size = Ty.getSizeInBits();
4308 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4309 bool Is32Bit = Size == 32;
4310
4311 // INSTRri form with positive arithmetic immediate.
4312 if (auto Fns = selectArithImmed(RHS))
4313 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4314 MIRBuilder, Fns);
4315
4316 // INSTRri form with negative arithmetic immediate.
4317 if (auto Fns = selectNegArithImmed(RHS))
4318 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4319 MIRBuilder, Fns);
4320
4321 // INSTRrx form.
4322 if (auto Fns = selectArithExtendedRegister(RHS))
4323 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4324 MIRBuilder, Fns);
4325
4326 // INSTRrs form.
4327 if (auto Fns = selectShiftedRegister(RHS))
4328 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4329 MIRBuilder, Fns);
4330 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4331 MIRBuilder);
4332}
4333
4334MachineInstr *
4335AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4336 MachineOperand &RHS,
4337 MachineIRBuilder &MIRBuilder) const {
4338 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4339 {{AArch64::ADDXri, AArch64::ADDWri},
4340 {AArch64::ADDXrs, AArch64::ADDWrs},
4341 {AArch64::ADDXrr, AArch64::ADDWrr},
4342 {AArch64::SUBXri, AArch64::SUBWri},
4343 {AArch64::ADDXrx, AArch64::ADDWrx}}};
4344 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4345}
4346
4347MachineInstr *
4348AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4349 MachineOperand &RHS,
4350 MachineIRBuilder &MIRBuilder) const {
4351 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4352 {{AArch64::ADDSXri, AArch64::ADDSWri},
4353 {AArch64::ADDSXrs, AArch64::ADDSWrs},
4354 {AArch64::ADDSXrr, AArch64::ADDSWrr},
4355 {AArch64::SUBSXri, AArch64::SUBSWri},
4356 {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4357 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4358}
4359
4360MachineInstr *
4361AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4362 MachineOperand &RHS,
4363 MachineIRBuilder &MIRBuilder) const {
4364 const std::array<std::array<unsigned, 2>, 5> OpcTable{
4365 {{AArch64::SUBSXri, AArch64::SUBSWri},
4366 {AArch64::SUBSXrs, AArch64::SUBSWrs},
4367 {AArch64::SUBSXrr, AArch64::SUBSWrr},
4368 {AArch64::ADDSXri, AArch64::ADDSWri},
4369 {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4370 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4371}
4372
4373MachineInstr *
4374AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
4375 MachineOperand &RHS,
4376 MachineIRBuilder &MIRBuilder) const {
4377 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4378 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4379 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4380 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
4381 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4382}
4383
4384MachineInstr *
4385AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
4386 MachineOperand &RHS,
4387 MachineIRBuilder &MIRBuilder) const {
4388 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4389 MachineRegisterInfo *MRI = MIRBuilder.getMRI();
4390 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
4391 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
4392 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
4393}
4394
4395MachineInstr *
4396AArch64InstructionSelector::emitCMP(MachineOperand &LHS, MachineOperand &RHS,
4397 MachineIRBuilder &MIRBuilder) const {
4398 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4399 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
4400 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4401 return emitSUBS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4402}
4403
4404MachineInstr *
4405AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4406 MachineIRBuilder &MIRBuilder) const {
4407 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4408 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4409 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4410 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4411}
4412
4413MachineInstr *
4414AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4415 MachineIRBuilder &MIRBuilder) const {
4416 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4417 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4418 LLT Ty = MRI.getType(LHS.getReg());
4419 unsigned RegSize = Ty.getSizeInBits();
4420 bool Is32Bit = (RegSize == 32);
4421 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4422 {AArch64::ANDSXrs, AArch64::ANDSWrs},
4423 {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4424 // ANDS needs a logical immediate for its immediate form. Check if we can
4425 // fold one in.
4426 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4427 int64_t Imm = ValAndVReg->Value.getSExtValue();
4428
4430 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4433 return &*TstMI;
4434 }
4435 }
4436
4437 if (auto Fns = selectLogicalShiftedRegister(RHS))
4438 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4439 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4440}
4441
4442MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4443 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4444 MachineIRBuilder &MIRBuilder) const {
4445 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4446 assert(Predicate.isPredicate() && "Expected predicate?");
4447 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4448 LLT CmpTy = MRI.getType(LHS.getReg());
4449 assert(!CmpTy.isVector() && "Expected scalar or pointer");
4450 unsigned Size = CmpTy.getSizeInBits();
4451 (void)Size;
4452 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4453 // Fold the compare into a cmn or tst if possible.
4454 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4455 return FoldCmp;
4456 return emitCMP(LHS, RHS, MIRBuilder);
4457}
4458
4459MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4460 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4461 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4462#ifndef NDEBUG
4463 LLT Ty = MRI.getType(Dst);
4464 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4465 "Expected a 32-bit scalar register?");
4466#endif
4467 const Register ZReg = AArch64::WZR;
4468 AArch64CC::CondCode CC1, CC2;
4469 changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4470 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4471 if (CC2 == AArch64CC::AL)
4472 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4473 MIRBuilder);
4474 const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4475 Register Def1Reg = MRI.createVirtualRegister(RC);
4476 Register Def2Reg = MRI.createVirtualRegister(RC);
4477 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4478 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4479 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4480 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4482 return &*OrMI;
4483}
4484
4485MachineInstr *AArch64InstructionSelector::emitFPCompare(
4486 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder,
4487 std::optional<CmpInst::Predicate> Pred) const {
4488 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4489 LLT Ty = MRI.getType(LHS);
4490 if (Ty.isVector())
4491 return nullptr;
4492 unsigned OpSize = Ty.getSizeInBits();
4493 assert(OpSize == 16 || OpSize == 32 || OpSize == 64);
4494
4495 // If this is a compare against +0.0, then we don't have
4496 // to explicitly materialize a constant.
4497 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4498 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4499
4500 auto IsEqualityPred = [](CmpInst::Predicate P) {
4501 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4503 };
4504 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4505 // Try commuting the operands.
4506 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4507 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4508 ShouldUseImm = true;
4509 std::swap(LHS, RHS);
4510 }
4511 }
4512 unsigned CmpOpcTbl[2][3] = {
4513 {AArch64::FCMPHrr, AArch64::FCMPSrr, AArch64::FCMPDrr},
4514 {AArch64::FCMPHri, AArch64::FCMPSri, AArch64::FCMPDri}};
4515 unsigned CmpOpc =
4516 CmpOpcTbl[ShouldUseImm][OpSize == 16 ? 0 : (OpSize == 32 ? 1 : 2)];
4517
4518 // Partially build the compare. Decide if we need to add a use for the
4519 // third operand based off whether or not we're comparing against 0.0.
4520 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4522 if (!ShouldUseImm)
4523 CmpMI.addUse(RHS);
4525 return &*CmpMI;
4526}
4527
4528MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4529 std::optional<Register> Dst, Register Op1, Register Op2,
4530 MachineIRBuilder &MIRBuilder) const {
4531 // We implement a vector concat by:
4532 // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4533 // 2. Insert the upper vector into the destination's upper element
4534 // TODO: some of this code is common with G_BUILD_VECTOR handling.
4535 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4536
4537 const LLT Op1Ty = MRI.getType(Op1);
4538 const LLT Op2Ty = MRI.getType(Op2);
4539
4540 if (Op1Ty != Op2Ty) {
4541 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4542 return nullptr;
4543 }
4544 assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4545
4546 if (Op1Ty.getSizeInBits() >= 128) {
4547 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4548 return nullptr;
4549 }
4550
4551 // At the moment we just support 64 bit vector concats.
4552 if (Op1Ty.getSizeInBits() != 64) {
4553 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4554 return nullptr;
4555 }
4556
4557 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4558 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4559 const TargetRegisterClass *DstRC =
4560 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank);
4561
4562 MachineInstr *WidenedOp1 =
4563 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4564 MachineInstr *WidenedOp2 =
4565 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4566 if (!WidenedOp1 || !WidenedOp2) {
4567 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4568 return nullptr;
4569 }
4570
4571 // Now do the insert of the upper element.
4572 unsigned InsertOpc, InsSubRegIdx;
4573 std::tie(InsertOpc, InsSubRegIdx) =
4574 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4575
4576 if (!Dst)
4577 Dst = MRI.createVirtualRegister(DstRC);
4578 auto InsElt =
4579 MIRBuilder
4580 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4581 .addImm(1) /* Lane index */
4582 .addUse(WidenedOp2->getOperand(0).getReg())
4583 .addImm(0);
4585 return &*InsElt;
4586}
4587
4588MachineInstr *
4589AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4590 Register Src2, AArch64CC::CondCode Pred,
4591 MachineIRBuilder &MIRBuilder) const {
4592 auto &MRI = *MIRBuilder.getMRI();
4593 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4594 // If we used a register class, then this won't necessarily have an LLT.
4595 // Compute the size based off whether or not we have a class or bank.
4596 unsigned Size;
4597 if (const auto *RC = dyn_cast<const TargetRegisterClass *>(RegClassOrBank))
4598 Size = TRI.getRegSizeInBits(*RC);
4599 else
4600 Size = MRI.getType(Dst).getSizeInBits();
4601 // Some opcodes use s1.
4602 assert(Size <= 64 && "Expected 64 bits or less only!");
4603 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4604 unsigned Opc = OpcTable[Size == 64];
4605 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4607 return &*CSINC;
4608}
4609
4610MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
4611 Register CarryReg) {
4612 MachineRegisterInfo *MRI = MIB.getMRI();
4613 unsigned Opcode = I.getOpcode();
4614
4615 // If the instruction is a SUB, we need to negate the carry,
4616 // because borrowing is indicated by carry-flag == 0.
4617 bool NeedsNegatedCarry =
4618 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
4619
4620 // If the previous instruction will already produce the correct carry, do not
4621 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
4622 // generated during legalization of wide add/sub. This optimization depends on
4623 // these sequences not being interrupted by other instructions.
4624 // We have to select the previous instruction before the carry-using
4625 // instruction is deleted by the calling function, otherwise the previous
4626 // instruction might become dead and would get deleted.
4627 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
4628 if (SrcMI == I.getPrevNode()) {
4629 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
4630 bool ProducesNegatedCarry = CarrySrcMI->isSub();
4631 if (NeedsNegatedCarry == ProducesNegatedCarry &&
4632 CarrySrcMI->isUnsigned() &&
4633 CarrySrcMI->getCarryOutReg() == CarryReg &&
4634 selectAndRestoreState(*SrcMI))
4635 return nullptr;
4636 }
4637 }
4638
4639 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
4640
4641 if (NeedsNegatedCarry) {
4642 // (0 - Carry) sets !C in NZCV when Carry == 1
4643 Register ZReg = AArch64::WZR;
4644 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
4645 }
4646
4647 // (Carry - 1) sets !C in NZCV when Carry == 0
4648 auto Fns = select12BitValueWithLeftShift(1);
4649 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
4650}
4651
4652bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
4653 MachineRegisterInfo &MRI) {
4654 auto &CarryMI = cast<GAddSubCarryOut>(I);
4655
4656 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
4657 // Set NZCV carry according to carry-in VReg
4658 emitCarryIn(I, CarryInMI->getCarryInReg());
4659 }
4660
4661 // Emit the operation and get the correct condition code.
4662 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
4663 CarryMI.getLHS(), CarryMI.getRHS(), MIB);
4664
4665 Register CarryOutReg = CarryMI.getCarryOutReg();
4666
4667 // Don't convert carry-out to VReg if it is never used
4668 if (!MRI.use_nodbg_empty(CarryOutReg)) {
4669 // Now, put the overflow result in the register given by the first operand
4670 // to the overflow op. CSINC increments the result when the predicate is
4671 // false, so to get the increment when it's true, we need to use the
4672 // inverse. In this case, we want to increment when carry is set.
4673 Register ZReg = AArch64::WZR;
4674 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
4675 getInvertedCondCode(OpAndCC.second), MIB);
4676 }
4677
4678 I.eraseFromParent();
4679 return true;
4680}
4681
4682std::pair<MachineInstr *, AArch64CC::CondCode>
4683AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4684 MachineOperand &LHS,
4685 MachineOperand &RHS,
4686 MachineIRBuilder &MIRBuilder) const {
4687 switch (Opcode) {
4688 default:
4689 llvm_unreachable("Unexpected opcode!");
4690 case TargetOpcode::G_SADDO:
4691 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4692 case TargetOpcode::G_UADDO:
4693 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4694 case TargetOpcode::G_SSUBO:
4695 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4696 case TargetOpcode::G_USUBO:
4697 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4698 case TargetOpcode::G_SADDE:
4699 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4700 case TargetOpcode::G_UADDE:
4701 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4702 case TargetOpcode::G_SSUBE:
4703 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4704 case TargetOpcode::G_USUBE:
4705 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4706 }
4707}
4708
4709/// Returns true if @p Val is a tree of AND/OR/CMP operations that can be
4710/// expressed as a conjunction.
4711/// \param CanNegate Set to true if we can negate the whole sub-tree just by
4712/// changing the conditions on the CMP tests.
4713/// (this means we can call emitConjunctionRec() with
4714/// Negate==true on this sub-tree)
4715/// \param MustBeFirst Set to true if this subtree needs to be negated and we
4716/// cannot do the negation naturally. We are required to
4717/// emit the subtree first in this case.
4718/// \param WillNegate Is true if are called when the result of this
4719/// subexpression must be negated. This happens when the
4720/// outer expression is an OR. We can use this fact to know
4721/// that we have a double negation (or (or ...) ...) that
4722/// can be implemented for free.
4723static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
4724 bool WillNegate, MachineRegisterInfo &MRI,
4725 unsigned Depth = 0) {
4726 if (!MRI.hasOneNonDBGUse(Val))
4727 return false;
4728 MachineInstr *ValDef = MRI.getVRegDef(Val);
4729 unsigned Opcode = ValDef->getOpcode();
4730 if (isa<GAnyCmp>(ValDef)) {
4731 CanNegate = true;
4732 MustBeFirst = false;
4733 return true;
4734 }
4735 // Protect against exponential runtime and stack overflow.
4736 if (Depth > 6)
4737 return false;
4738 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) {
4739 bool IsOR = Opcode == TargetOpcode::G_OR;
4740 Register O0 = ValDef->getOperand(1).getReg();
4741 Register O1 = ValDef->getOperand(2).getReg();
4742 bool CanNegateL;
4743 bool MustBeFirstL;
4744 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1))
4745 return false;
4746 bool CanNegateR;
4747 bool MustBeFirstR;
4748 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1))
4749 return false;
4750
4751 if (MustBeFirstL && MustBeFirstR)
4752 return false;
4753
4754 if (IsOR) {
4755 // For an OR expression we need to be able to naturally negate at least
4756 // one side or we cannot do the transformation at all.
4757 if (!CanNegateL && !CanNegateR)
4758 return false;
4759 // If we the result of the OR will be negated and we can naturally negate
4760 // the leaves, then this sub-tree as a whole negates naturally.
4761 CanNegate = WillNegate && CanNegateL && CanNegateR;
4762 // If we cannot naturally negate the whole sub-tree, then this must be
4763 // emitted first.
4764 MustBeFirst = !CanNegate;
4765 } else {
4766 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND");
4767 // We cannot naturally negate an AND operation.
4768 CanNegate = false;
4769 MustBeFirst = MustBeFirstL || MustBeFirstR;
4770 }
4771 return true;
4772 }
4773 return false;
4774}
4775
4776MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
4779 MachineIRBuilder &MIB) const {
4780 auto &MRI = *MIB.getMRI();
4781 LLT OpTy = MRI.getType(LHS);
4782 unsigned CCmpOpc;
4783 std::optional<ValueAndVReg> C;
4784 if (CmpInst::isIntPredicate(CC)) {
4785 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
4787 if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4788 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4789 else if (C->Value.ule(31))
4790 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
4791 else
4792 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMNWi : AArch64::CCMNXi;
4793 } else {
4794 assert(OpTy.getSizeInBits() == 16 || OpTy.getSizeInBits() == 32 ||
4795 OpTy.getSizeInBits() == 64);
4796 switch (OpTy.getSizeInBits()) {
4797 case 16:
4798 assert(STI.hasFullFP16() && "Expected Full FP16 for fp16 comparisons");
4799 CCmpOpc = AArch64::FCCMPHrr;
4800 break;
4801 case 32:
4802 CCmpOpc = AArch64::FCCMPSrr;
4803 break;
4804 case 64:
4805 CCmpOpc = AArch64::FCCMPDrr;
4806 break;
4807 default:
4808 return nullptr;
4809 }
4810 }
4812 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4813 auto CCmp =
4814 MIB.buildInstr(CCmpOpc, {}, {LHS});
4815 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
4816 CCmp.addImm(C->Value.getZExtValue());
4817 else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)
4818 CCmp.addImm(C->Value.abs().getZExtValue());
4819 else
4820 CCmp.addReg(RHS);
4821 CCmp.addImm(NZCV).addImm(Predicate);
4823 return &*CCmp;
4824}
4825
4826MachineInstr *AArch64InstructionSelector::emitConjunctionRec(
4827 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp,
4828 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const {
4829 // We're at a tree leaf, produce a conditional comparison operation.
4830 auto &MRI = *MIB.getMRI();
4831 MachineInstr *ValDef = MRI.getVRegDef(Val);
4832 unsigned Opcode = ValDef->getOpcode();
4833 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) {
4834 Register LHS = Cmp->getLHSReg();
4835 Register RHS = Cmp->getRHSReg();
4836 CmpInst::Predicate CC = Cmp->getCond();
4837 if (Negate)
4839 if (isa<GICmp>(Cmp)) {
4840 OutCC = changeICMPPredToAArch64CC(CC, RHS, MIB.getMRI());
4841 } else {
4842 // Handle special FP cases.
4843 AArch64CC::CondCode ExtraCC;
4844 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
4845 // Some floating point conditions can't be tested with a single condition
4846 // code. Construct an additional comparison in this case.
4847 if (ExtraCC != AArch64CC::AL) {
4848 MachineInstr *ExtraCmp;
4849 if (!CCOp)
4850 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC);
4851 else
4852 ExtraCmp =
4853 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB);
4854 CCOp = ExtraCmp->getOperand(0).getReg();
4855 Predicate = ExtraCC;
4856 }
4857 }
4858
4859 // Produce a normal comparison if we are first in the chain
4860 if (!CCOp) {
4861 if (isa<GICmp>(Cmp))
4862 return emitCMP(Cmp->getOperand(2), Cmp->getOperand(3), MIB);
4863 return emitFPCompare(Cmp->getOperand(2).getReg(),
4864 Cmp->getOperand(3).getReg(), MIB);
4865 }
4866 // Otherwise produce a ccmp.
4867 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB);
4868 }
4869 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree");
4870
4871 bool IsOR = Opcode == TargetOpcode::G_OR;
4872
4873 Register LHS = ValDef->getOperand(1).getReg();
4874 bool CanNegateL;
4875 bool MustBeFirstL;
4876 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI);
4877 assert(ValidL && "Valid conjunction/disjunction tree");
4878 (void)ValidL;
4879
4880 Register RHS = ValDef->getOperand(2).getReg();
4881 bool CanNegateR;
4882 bool MustBeFirstR;
4883 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI);
4884 assert(ValidR && "Valid conjunction/disjunction tree");
4885 (void)ValidR;
4886
4887 // Swap sub-tree that must come first to the right side.
4888 if (MustBeFirstL) {
4889 assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
4890 std::swap(LHS, RHS);
4891 std::swap(CanNegateL, CanNegateR);
4892 std::swap(MustBeFirstL, MustBeFirstR);
4893 }
4894
4895 bool NegateR;
4896 bool NegateAfterR;
4897 bool NegateL;
4898 bool NegateAfterAll;
4899 if (Opcode == TargetOpcode::G_OR) {
4900 // Swap the sub-tree that we can negate naturally to the left.
4901 if (!CanNegateL) {
4902 assert(CanNegateR && "at least one side must be negatable");
4903 assert(!MustBeFirstR && "invalid conjunction/disjunction tree");
4904 assert(!Negate);
4905 std::swap(LHS, RHS);
4906 NegateR = false;
4907 NegateAfterR = true;
4908 } else {
4909 // Negate the left sub-tree if possible, otherwise negate the result.
4910 NegateR = CanNegateR;
4911 NegateAfterR = !CanNegateR;
4912 }
4913 NegateL = true;
4914 NegateAfterAll = !Negate;
4915 } else {
4916 assert(Opcode == TargetOpcode::G_AND &&
4917 "Valid conjunction/disjunction tree");
4918 assert(!Negate && "Valid conjunction/disjunction tree");
4919
4920 NegateL = false;
4921 NegateR = false;
4922 NegateAfterR = false;
4923 NegateAfterAll = false;
4924 }
4925
4926 // Emit sub-trees.
4927 AArch64CC::CondCode RHSCC;
4928 MachineInstr *CmpR =
4929 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB);
4930 if (NegateAfterR)
4931 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
4932 MachineInstr *CmpL = emitConjunctionRec(
4933 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB);
4934 if (NegateAfterAll)
4935 OutCC = AArch64CC::getInvertedCondCode(OutCC);
4936 return CmpL;
4937}
4938
4939MachineInstr *AArch64InstructionSelector::emitConjunction(
4940 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const {
4941 bool DummyCanNegate;
4942 bool DummyMustBeFirst;
4943 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false,
4944 *MIB.getMRI()))
4945 return nullptr;
4946 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB);
4947}
4948
4949bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI,
4950 MachineInstr &CondMI) {
4951 AArch64CC::CondCode AArch64CC;
4952 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB);
4953 if (!ConjMI)
4954 return false;
4955
4956 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB);
4957 SelI.eraseFromParent();
4958 return true;
4959}
4960
4961bool AArch64InstructionSelector::tryOptSelect(GSelect &I) {
4962 MachineRegisterInfo &MRI = *MIB.getMRI();
4963 // We want to recognize this pattern:
4964 //
4965 // $z = G_FCMP pred, $x, $y
4966 // ...
4967 // $w = G_SELECT $z, $a, $b
4968 //
4969 // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4970 // some copies/truncs in between.)
4971 //
4972 // If we see this, then we can emit something like this:
4973 //
4974 // fcmp $x, $y
4975 // fcsel $w, $a, $b, pred
4976 //
4977 // Rather than emitting both of the rather long sequences in the standard
4978 // G_FCMP/G_SELECT select methods.
4979
4980 // First, check if the condition is defined by a compare.
4981 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4982
4983 // We can only fold if all of the defs have one use.
4984 Register CondDefReg = CondDef->getOperand(0).getReg();
4985 if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4986 // Unless it's another select.
4987 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4988 if (CondDef == &UI)
4989 continue;
4990 if (UI.getOpcode() != TargetOpcode::G_SELECT)
4991 return false;
4992 }
4993 }
4994
4995 // Is the condition defined by a compare?
4996 unsigned CondOpc = CondDef->getOpcode();
4997 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) {
4998 if (tryOptSelectConjunction(I, *CondDef))
4999 return true;
5000 return false;
5001 }
5002
5004 if (CondOpc == TargetOpcode::G_ICMP) {
5005 auto &PredOp = CondDef->getOperand(1);
5006 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), PredOp,
5007 MIB);
5008 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
5009 CondCode =
5010 changeICMPPredToAArch64CC(Pred, CondDef->getOperand(3).getReg(), &MRI);
5011 } else {
5012 // Get the condition code for the select.
5013 auto Pred =
5014 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
5015 AArch64CC::CondCode CondCode2;
5016 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
5017
5018 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
5019 // instructions to emit the comparison.
5020 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
5021 // unnecessary.
5022 if (CondCode2 != AArch64CC::AL)
5023 return false;
5024
5025 if (!emitFPCompare(CondDef->getOperand(2).getReg(),
5026 CondDef->getOperand(3).getReg(), MIB)) {
5027 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
5028 return false;
5029 }
5030 }
5031
5032 // Emit the select.
5033 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
5034 I.getOperand(3).getReg(), CondCode, MIB);
5035 I.eraseFromParent();
5036 return true;
5037}
5038
5039MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
5040 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
5041 MachineIRBuilder &MIRBuilder) const {
5042 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
5043 "Unexpected MachineOperand");
5044 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5045 // We want to find this sort of thing:
5046 // x = G_SUB 0, y
5047 // G_ICMP z, x
5048 //
5049 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
5050 // e.g:
5051 //
5052 // cmn z, y
5053
5054 // Check if the RHS or LHS of the G_ICMP is defined by a SUB
5055 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5056 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
5057 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
5058
5059 // Given this:
5060 //
5061 // x = G_SUB 0, y
5062 // G_ICMP z, x
5063 //
5064 // Produce this:
5065 //
5066 // cmn z, y
5067 if (isCMN(RHSDef, P, MRI))
5068 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
5069
5070 // Same idea here, but with the LHS of the compare instead:
5071 //
5072 // Given this:
5073 //
5074 // x = G_SUB 0, y
5075 // G_ICMP x, z
5076 //
5077 // Produce this:
5078 //
5079 // cmn y, z
5080 //
5081 // But be careful! We need to swap the predicate!
5082 if (isCMN(LHSDef, P, MRI)) {
5083 if (!CmpInst::isEquality(P)) {
5086 }
5087 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
5088 }
5089
5090 // Given this:
5091 //
5092 // z = G_AND x, y
5093 // G_ICMP z, 0
5094 //
5095 // Produce this if the compare is signed:
5096 //
5097 // tst x, y
5098 if (!CmpInst::isUnsigned(P) && LHSDef &&
5099 LHSDef->getOpcode() == TargetOpcode::G_AND) {
5100 // Make sure that the RHS is 0.
5101 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
5102 if (!ValAndVReg || ValAndVReg->Value != 0)
5103 return nullptr;
5104
5105 return emitTST(LHSDef->getOperand(1),
5106 LHSDef->getOperand(2), MIRBuilder);
5107 }
5108
5109 return nullptr;
5110}
5111
5112bool AArch64InstructionSelector::selectShuffleVector(
5113 MachineInstr &I, MachineRegisterInfo &MRI) {
5114 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5115 Register Src1Reg = I.getOperand(1).getReg();
5116 Register Src2Reg = I.getOperand(2).getReg();
5117 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
5118
5119 MachineBasicBlock &MBB = *I.getParent();
5120 MachineFunction &MF = *MBB.getParent();
5121 LLVMContext &Ctx = MF.getFunction().getContext();
5122
5123 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
5124
5126 for (int Val : Mask) {
5127 // For now, any undef indexes we'll just assume to be 0. This should be
5128 // optimized in future, e.g. to select DUP etc.
5129 Val = Val < 0 ? 0 : Val;
5130 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5131 unsigned Offset = Byte + Val * BytesPerElt;
5132 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
5133 }
5134 }
5135
5136 // Use a constant pool to load the index vector for TBL.
5137 Constant *CPVal = ConstantVector::get(CstIdxs);
5138 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
5139 if (!IndexLoad) {
5140 LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
5141 return false;
5142 }
5143
5144 if (DstTy.getSizeInBits() != 128) {
5145 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
5146 // This case can be done with TBL1.
5147 MachineInstr *Concat =
5148 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB);
5149 if (!Concat) {
5150 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
5151 return false;
5152 }
5153
5154 // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
5155 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
5156 IndexLoad->getOperand(0).getReg(), MIB);
5157
5158 auto TBL1 = MIB.buildInstr(
5159 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
5160 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
5162
5163 auto Copy =
5164 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
5165 .addReg(TBL1.getReg(0), {}, AArch64::dsub);
5166 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
5167 I.eraseFromParent();
5168 return true;
5169 }
5170
5171 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
5172 // Q registers for regalloc.
5173 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
5174 auto RegSeq = createQTuple(Regs, MIB);
5175 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
5176 {RegSeq, IndexLoad->getOperand(0)});
5178 I.eraseFromParent();
5179 return true;
5180}
5181
5182MachineInstr *AArch64InstructionSelector::emitLaneInsert(
5183 std::optional<Register> DstReg, Register SrcReg, Register EltReg,
5184 unsigned LaneIdx, const RegisterBank &RB,
5185 MachineIRBuilder &MIRBuilder) const {
5186 MachineInstr *InsElt = nullptr;
5187 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5188 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
5189
5190 // Create a register to define with the insert if one wasn't passed in.
5191 if (!DstReg)
5192 DstReg = MRI.createVirtualRegister(DstRC);
5193
5194 unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
5195 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
5196
5197 if (RB.getID() == AArch64::FPRRegBankID) {
5198 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
5199 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5200 .addImm(LaneIdx)
5201 .addUse(InsSub->getOperand(0).getReg())
5202 .addImm(0);
5203 } else {
5204 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
5205 .addImm(LaneIdx)
5206 .addUse(EltReg);
5207 }
5208
5210 return InsElt;
5211}
5212
5213bool AArch64InstructionSelector::selectUSMovFromExtend(
5214 MachineInstr &MI, MachineRegisterInfo &MRI) {
5215 if (MI.getOpcode() != TargetOpcode::G_SEXT &&
5216 MI.getOpcode() != TargetOpcode::G_ZEXT &&
5217 MI.getOpcode() != TargetOpcode::G_ANYEXT)
5218 return false;
5219 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
5220 const Register DefReg = MI.getOperand(0).getReg();
5221 const LLT DstTy = MRI.getType(DefReg);
5222 unsigned DstSize = DstTy.getSizeInBits();
5223
5224 if (DstSize != 32 && DstSize != 64)
5225 return false;
5226
5227 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
5228 MI.getOperand(1).getReg(), MRI);
5229 int64_t Lane;
5230 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
5231 return false;
5232 Register Src0 = Extract->getOperand(1).getReg();
5233
5234 const LLT VecTy = MRI.getType(Src0);
5235 if (VecTy.isScalableVector())
5236 return false;
5237
5238 if (VecTy.getSizeInBits() != 128) {
5239 const MachineInstr *ScalarToVector = emitScalarToVector(
5240 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
5241 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
5242 Src0 = ScalarToVector->getOperand(0).getReg();
5243 }
5244
5245 unsigned Opcode;
5246 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
5247 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
5248 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
5249 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
5250 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
5251 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
5252 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
5253 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
5254 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
5255 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
5256 else
5257 llvm_unreachable("Unexpected type combo for S/UMov!");
5258
5259 // We may need to generate one of these, depending on the type and sign of the
5260 // input:
5261 // DstReg = SMOV Src0, Lane;
5262 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
5263 MachineInstr *ExtI = nullptr;
5264 if (DstSize == 64 && !IsSigned) {
5265 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
5266 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
5267 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
5268 .addUse(NewReg)
5269 .addImm(AArch64::sub_32);
5270 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
5271 } else
5272 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
5273
5275 MI.eraseFromParent();
5276 return true;
5277}
5278
5279MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm8(
5280 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5281 unsigned int Op;
5282 if (DstSize == 128) {
5283 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5284 return nullptr;
5285 Op = AArch64::MOVIv16b_ns;
5286 } else {
5287 Op = AArch64::MOVIv8b_ns;
5288 }
5289
5290 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5291
5294 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5296 return &*Mov;
5297 }
5298 return nullptr;
5299}
5300
5301MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm16(
5302 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5303 bool Inv) {
5304
5305 unsigned int Op;
5306 if (DstSize == 128) {
5307 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5308 return nullptr;
5309 Op = Inv ? AArch64::MVNIv8i16 : AArch64::MOVIv8i16;
5310 } else {
5311 Op = Inv ? AArch64::MVNIv4i16 : AArch64::MOVIv4i16;
5312 }
5313
5314 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5315 uint64_t Shift;
5316
5319 Shift = 0;
5320 } else if (AArch64_AM::isAdvSIMDModImmType6(Val)) {
5322 Shift = 8;
5323 } else
5324 return nullptr;
5325
5326 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5328 return &*Mov;
5329}
5330
5331MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm32(
5332 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5333 bool Inv) {
5334
5335 unsigned int Op;
5336 if (DstSize == 128) {
5337 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5338 return nullptr;
5339 Op = Inv ? AArch64::MVNIv4i32 : AArch64::MOVIv4i32;
5340 } else {
5341 Op = Inv ? AArch64::MVNIv2i32 : AArch64::MOVIv2i32;
5342 }
5343
5344 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5345 uint64_t Shift;
5346
5349 Shift = 0;
5350 } else if ((AArch64_AM::isAdvSIMDModImmType2(Val))) {
5352 Shift = 8;
5353 } else if ((AArch64_AM::isAdvSIMDModImmType3(Val))) {
5355 Shift = 16;
5356 } else if ((AArch64_AM::isAdvSIMDModImmType4(Val))) {
5358 Shift = 24;
5359 } else
5360 return nullptr;
5361
5362 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5364 return &*Mov;
5365}
5366
5367MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm64(
5368 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5369
5370 unsigned int Op;
5371 if (DstSize == 128) {
5372 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5373 return nullptr;
5374 Op = AArch64::MOVIv2d_ns;
5375 } else {
5376 Op = AArch64::MOVID;
5377 }
5378
5379 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5382 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5384 return &*Mov;
5385 }
5386 return nullptr;
5387}
5388
5389MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImm321s(
5390 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder,
5391 bool Inv) {
5392
5393 unsigned int Op;
5394 if (DstSize == 128) {
5395 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5396 return nullptr;
5397 Op = Inv ? AArch64::MVNIv4s_msl : AArch64::MOVIv4s_msl;
5398 } else {
5399 Op = Inv ? AArch64::MVNIv2s_msl : AArch64::MOVIv2s_msl;
5400 }
5401
5402 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5403 uint64_t Shift;
5404
5407 Shift = 264;
5408 } else if (AArch64_AM::isAdvSIMDModImmType8(Val)) {
5410 Shift = 272;
5411 } else
5412 return nullptr;
5413
5414 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val).addImm(Shift);
5416 return &*Mov;
5417}
5418
5419MachineInstr *AArch64InstructionSelector::tryAdvSIMDModImmFP(
5420 Register Dst, unsigned DstSize, APInt Bits, MachineIRBuilder &Builder) {
5421
5422 unsigned int Op;
5423 bool IsWide = false;
5424 if (DstSize == 128) {
5425 if (Bits.getHiBits(64) != Bits.getLoBits(64))
5426 return nullptr;
5427 Op = AArch64::FMOVv4f32_ns;
5428 IsWide = true;
5429 } else {
5430 Op = AArch64::FMOVv2f32_ns;
5431 }
5432
5433 uint64_t Val = Bits.zextOrTrunc(64).getZExtValue();
5434
5437 } else if (IsWide && AArch64_AM::isAdvSIMDModImmType12(Val)) {
5439 Op = AArch64::FMOVv2f64_ns;
5440 } else
5441 return nullptr;
5442
5443 auto Mov = Builder.buildInstr(Op, {Dst}, {}).addImm(Val);
5445 return &*Mov;
5446}
5447
5448bool AArch64InstructionSelector::selectIndexedExtLoad(
5449 MachineInstr &MI, MachineRegisterInfo &MRI) {
5450 auto &ExtLd = cast<GIndexedAnyExtLoad>(MI);
5451 Register Dst = ExtLd.getDstReg();
5452 Register WriteBack = ExtLd.getWritebackReg();
5453 Register Base = ExtLd.getBaseReg();
5454 Register Offset = ExtLd.getOffsetReg();
5455 LLT Ty = MRI.getType(Dst);
5456 assert(Ty.getSizeInBits() <= 64); // Only for scalar GPRs.
5457 unsigned MemSizeBits = ExtLd.getMMO().getMemoryType().getSizeInBits();
5458 bool IsPre = ExtLd.isPre();
5459 bool IsSExt = isa<GIndexedSExtLoad>(ExtLd);
5460 unsigned InsertIntoSubReg = 0;
5461 bool IsDst64 = Ty.getSizeInBits() == 64;
5462
5463 // ZExt/SExt should be on gpr but can handle extload and zextload of fpr, so
5464 // long as they are scalar.
5465 bool IsFPR = RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID;
5466 if ((IsSExt && IsFPR) || Ty.isVector())
5467 return false;
5468
5469 unsigned Opc = 0;
5470 LLT NewLdDstTy;
5471 LLT s32 = LLT::scalar(32);
5472 LLT s64 = LLT::scalar(64);
5473
5474 if (MemSizeBits == 8) {
5475 if (IsSExt) {
5476 if (IsDst64)
5477 Opc = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
5478 else
5479 Opc = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
5480 NewLdDstTy = IsDst64 ? s64 : s32;
5481 } else if (IsFPR) {
5482 Opc = IsPre ? AArch64::LDRBpre : AArch64::LDRBpost;
5483 InsertIntoSubReg = AArch64::bsub;
5484 NewLdDstTy = LLT::scalar(MemSizeBits);
5485 } else {
5486 Opc = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
5487 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5488 NewLdDstTy = s32;
5489 }
5490 } else if (MemSizeBits == 16) {
5491 if (IsSExt) {
5492 if (IsDst64)
5493 Opc = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
5494 else
5495 Opc = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
5496 NewLdDstTy = IsDst64 ? s64 : s32;
5497 } else if (IsFPR) {
5498 Opc = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
5499 InsertIntoSubReg = AArch64::hsub;
5500 NewLdDstTy = LLT::scalar(MemSizeBits);
5501 } else {
5502 Opc = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
5503 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5504 NewLdDstTy = s32;
5505 }
5506 } else if (MemSizeBits == 32) {
5507 if (IsSExt) {
5508 Opc = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
5509 NewLdDstTy = s64;
5510 } else if (IsFPR) {
5511 Opc = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
5512 InsertIntoSubReg = AArch64::ssub;
5513 NewLdDstTy = LLT::scalar(MemSizeBits);
5514 } else {
5515 Opc = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
5516 InsertIntoSubReg = IsDst64 ? AArch64::sub_32 : 0;
5517 NewLdDstTy = s32;
5518 }
5519 } else {
5520 llvm_unreachable("Unexpected size for indexed load");
5521 }
5522
5523 auto Cst = getIConstantVRegVal(Offset, MRI);
5524 if (!Cst)
5525 return false; // Shouldn't happen, but just in case.
5526
5527 auto LdMI = MIB.buildInstr(Opc, {WriteBack, NewLdDstTy}, {Base})
5528 .addImm(Cst->getSExtValue());
5529 LdMI.cloneMemRefs(ExtLd);
5531 // Make sure to select the load with the MemTy as the dest type, and then
5532 // insert into a larger reg if needed.
5533 if (InsertIntoSubReg) {
5534 // Generate a SUBREG_TO_REG.
5535 auto SubToReg = MIB.buildInstr(TargetOpcode::SUBREG_TO_REG, {Dst}, {})
5536 .addUse(LdMI.getReg(1))
5537 .addImm(InsertIntoSubReg);
5539 SubToReg.getReg(0),
5540 *getRegClassForTypeOnBank(MRI.getType(Dst),
5541 *RBI.getRegBank(Dst, MRI, TRI)),
5542 MRI);
5543 } else {
5544 auto Copy = MIB.buildCopy(Dst, LdMI.getReg(1));
5545 selectCopy(*Copy, TII, MRI, TRI, RBI);
5546 }
5547 MI.eraseFromParent();
5548
5549 return true;
5550}
5551
5552bool AArch64InstructionSelector::selectIndexedLoad(MachineInstr &MI,
5553 MachineRegisterInfo &MRI) {
5554 auto &Ld = cast<GIndexedLoad>(MI);
5555 Register Dst = Ld.getDstReg();
5556 Register WriteBack = Ld.getWritebackReg();
5557 Register Base = Ld.getBaseReg();
5558 Register Offset = Ld.getOffsetReg();
5559 assert(MRI.getType(Dst).getSizeInBits() <= 128 &&
5560 "Unexpected type for indexed load");
5561 unsigned MemSize = Ld.getMMO().getMemoryType().getSizeInBytes();
5562
5563 if (MemSize < MRI.getType(Dst).getSizeInBytes())
5564 return selectIndexedExtLoad(MI, MRI);
5565
5566 unsigned Opc = 0;
5567 if (Ld.isPre()) {
5568 static constexpr unsigned GPROpcodes[] = {
5569 AArch64::LDRBBpre, AArch64::LDRHHpre, AArch64::LDRWpre,
5570 AArch64::LDRXpre};
5571 static constexpr unsigned FPROpcodes[] = {
5572 AArch64::LDRBpre, AArch64::LDRHpre, AArch64::LDRSpre, AArch64::LDRDpre,
5573 AArch64::LDRQpre};
5574 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5575 ? FPROpcodes[Log2_32(MemSize)]
5576 : GPROpcodes[Log2_32(MemSize)];
5577 ;
5578 } else {
5579 static constexpr unsigned GPROpcodes[] = {
5580 AArch64::LDRBBpost, AArch64::LDRHHpost, AArch64::LDRWpost,
5581 AArch64::LDRXpost};
5582 static constexpr unsigned FPROpcodes[] = {
5583 AArch64::LDRBpost, AArch64::LDRHpost, AArch64::LDRSpost,
5584 AArch64::LDRDpost, AArch64::LDRQpost};
5585 Opc = (RBI.getRegBank(Dst, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5586 ? FPROpcodes[Log2_32(MemSize)]
5587 : GPROpcodes[Log2_32(MemSize)];
5588 ;
5589 }
5590 auto Cst = getIConstantVRegVal(Offset, MRI);
5591 if (!Cst)
5592 return false; // Shouldn't happen, but just in case.
5593 auto LdMI =
5594 MIB.buildInstr(Opc, {WriteBack, Dst}, {Base}).addImm(Cst->getSExtValue());
5595 LdMI.cloneMemRefs(Ld);
5597 MI.eraseFromParent();
5598 return true;
5599}
5600
5601bool AArch64InstructionSelector::selectIndexedStore(GIndexedStore &I,
5602 MachineRegisterInfo &MRI) {
5603 Register Dst = I.getWritebackReg();
5604 Register Val = I.getValueReg();
5605 Register Base = I.getBaseReg();
5606 Register Offset = I.getOffsetReg();
5607 assert(MRI.getType(Val).getSizeInBits() <= 128 &&
5608 "Unexpected type for indexed store");
5609
5610 LocationSize MemSize = I.getMMO().getSize();
5611 unsigned MemSizeInBytes = MemSize.getValue();
5612
5613 assert(MemSizeInBytes && MemSizeInBytes <= 16 &&
5614 "Unexpected indexed store size");
5615 unsigned MemSizeLog2 = Log2_32(MemSizeInBytes);
5616
5617 unsigned Opc = 0;
5618 if (I.isPre()) {
5619 static constexpr unsigned GPROpcodes[] = {
5620 AArch64::STRBBpre, AArch64::STRHHpre, AArch64::STRWpre,
5621 AArch64::STRXpre};
5622 static constexpr unsigned FPROpcodes[] = {
5623 AArch64::STRBpre, AArch64::STRHpre, AArch64::STRSpre, AArch64::STRDpre,
5624 AArch64::STRQpre};
5625
5626 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5627 Opc = FPROpcodes[MemSizeLog2];
5628 else
5629 Opc = GPROpcodes[MemSizeLog2];
5630 } else {
5631 static constexpr unsigned GPROpcodes[] = {
5632 AArch64::STRBBpost, AArch64::STRHHpost, AArch64::STRWpost,
5633 AArch64::STRXpost};
5634 static constexpr unsigned FPROpcodes[] = {
5635 AArch64::STRBpost, AArch64::STRHpost, AArch64::STRSpost,
5636 AArch64::STRDpost, AArch64::STRQpost};
5637
5638 if (RBI.getRegBank(Val, MRI, TRI)->getID() == AArch64::FPRRegBankID)
5639 Opc = FPROpcodes[MemSizeLog2];
5640 else
5641 Opc = GPROpcodes[MemSizeLog2];
5642 }
5643
5644 auto Cst = getIConstantVRegVal(Offset, MRI);
5645 if (!Cst)
5646 return false; // Shouldn't happen, but just in case.
5647 auto Str =
5648 MIB.buildInstr(Opc, {Dst}, {Val, Base}).addImm(Cst->getSExtValue());
5649 Str.cloneMemRefs(I);
5651 I.eraseFromParent();
5652 return true;
5653}
5654
5655MachineInstr *
5656AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5657 MachineIRBuilder &MIRBuilder,
5658 MachineRegisterInfo &MRI) {
5659 LLT DstTy = MRI.getType(Dst);
5660 unsigned DstSize = DstTy.getSizeInBits();
5661 assert((DstSize == 64 || DstSize == 128) &&
5662 "Unexpected vector constant size");
5663
5664 if (CV->isNullValue()) {
5665 if (DstSize == 128) {
5666 auto Mov =
5667 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5669 return &*Mov;
5670 }
5671
5672 if (DstSize == 64) {
5673 auto Mov =
5674 MIRBuilder
5675 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5676 .addImm(0);
5677 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5678 .addReg(Mov.getReg(0), {}, AArch64::dsub);
5679 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5680 return &*Copy;
5681 }
5682 }
5683
5684 if (Constant *SplatValue = CV->getSplatValue()) {
5685 APInt SplatValueAsInt =
5686 isa<ConstantFP>(SplatValue)
5687 ? cast<ConstantFP>(SplatValue)->getValueAPF().bitcastToAPInt()
5688 : SplatValue->getUniqueInteger();
5689 APInt DefBits = APInt::getSplat(
5690 DstSize, SplatValueAsInt.trunc(DstTy.getScalarSizeInBits()));
5691 auto TryMOVIWithBits = [&](APInt DefBits) -> MachineInstr * {
5692 MachineInstr *NewOp;
5693 bool Inv = false;
5694 if ((NewOp = tryAdvSIMDModImm64(Dst, DstSize, DefBits, MIRBuilder)) ||
5695 (NewOp =
5696 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5697 (NewOp =
5698 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5699 (NewOp =
5700 tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5701 (NewOp = tryAdvSIMDModImm8(Dst, DstSize, DefBits, MIRBuilder)) ||
5702 (NewOp = tryAdvSIMDModImmFP(Dst, DstSize, DefBits, MIRBuilder)))
5703 return NewOp;
5704
5705 DefBits = ~DefBits;
5706 Inv = true;
5707 if ((NewOp =
5708 tryAdvSIMDModImm32(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5709 (NewOp =
5710 tryAdvSIMDModImm321s(Dst, DstSize, DefBits, MIRBuilder, Inv)) ||
5711 (NewOp = tryAdvSIMDModImm16(Dst, DstSize, DefBits, MIRBuilder, Inv)))
5712 return NewOp;
5713 return nullptr;
5714 };
5715
5716 if (auto *NewOp = TryMOVIWithBits(DefBits))
5717 return NewOp;
5718
5719 // See if a fneg of the constant can be materialized with a MOVI, etc
5720 auto TryWithFNeg = [&](APInt DefBits, int NumBits,
5721 unsigned NegOpc) -> MachineInstr * {
5722 // FNegate each sub-element of the constant
5723 APInt Neg = APInt::getHighBitsSet(NumBits, 1).zext(DstSize);
5724 APInt NegBits(DstSize, 0);
5725 unsigned NumElts = DstSize / NumBits;
5726 for (unsigned i = 0; i < NumElts; i++)
5727 NegBits |= Neg << (NumBits * i);
5728 NegBits = DefBits ^ NegBits;
5729
5730 // Try to create the new constants with MOVI, and if so generate a fneg
5731 // for it.
5732 if (auto *NewOp = TryMOVIWithBits(NegBits)) {
5733 Register NewDst = MRI.createVirtualRegister(
5734 DstSize == 64 ? &AArch64::FPR64RegClass : &AArch64::FPR128RegClass);
5735 NewOp->getOperand(0).setReg(NewDst);
5736 return MIRBuilder.buildInstr(NegOpc, {Dst}, {NewDst});
5737 }
5738 return nullptr;
5739 };
5740 MachineInstr *R;
5741 if ((R = TryWithFNeg(DefBits, 32,
5742 DstSize == 64 ? AArch64::FNEGv2f32
5743 : AArch64::FNEGv4f32)) ||
5744 (R = TryWithFNeg(DefBits, 64,
5745 DstSize == 64 ? AArch64::FNEGDr
5746 : AArch64::FNEGv2f64)) ||
5747 (STI.hasFullFP16() &&
5748 (R = TryWithFNeg(DefBits, 16,
5749 DstSize == 64 ? AArch64::FNEGv4f16
5750 : AArch64::FNEGv8f16))))
5751 return R;
5752 }
5753
5754 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5755 if (!CPLoad) {
5756 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5757 return nullptr;
5758 }
5759
5760 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5762 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5763 return &*Copy;
5764}
5765
5766bool AArch64InstructionSelector::tryOptConstantBuildVec(
5767 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5768 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5769 unsigned DstSize = DstTy.getSizeInBits();
5770 assert(DstSize <= 128 && "Unexpected build_vec type!");
5771 if (DstSize < 32)
5772 return false;
5773 // Check if we're building a constant vector, in which case we want to
5774 // generate a constant pool load instead of a vector insert sequence.
5776 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5777 Register OpReg = I.getOperand(Idx).getReg();
5778 if (auto AnyConst = getAnyConstantVRegValWithLookThrough(
5779 OpReg, MRI, /*LookThroughInstrs=*/true,
5780 /*LookThroughAnyExt=*/true)) {
5781 MachineInstr *DefMI = MRI.getVRegDef(AnyConst->VReg);
5782
5783 if (DefMI->getOpcode() == TargetOpcode::G_CONSTANT) {
5784 Csts.emplace_back(
5785 ConstantInt::get(MIB.getMF().getFunction().getContext(),
5786 std::move(AnyConst->Value)));
5787 continue;
5788 }
5789
5790 if (DefMI->getOpcode() == TargetOpcode::G_FCONSTANT) {
5791 Csts.emplace_back(
5792 const_cast<ConstantFP *>(DefMI->getOperand(1).getFPImm()));
5793 continue;
5794 }
5795 }
5796 return false;
5797 }
5798 Constant *CV = ConstantVector::get(Csts);
5799 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5800 return false;
5801 I.eraseFromParent();
5802 return true;
5803}
5804
5805bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5806 MachineInstr &I, MachineRegisterInfo &MRI) {
5807 // Given:
5808 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5809 //
5810 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5811 Register Dst = I.getOperand(0).getReg();
5812 Register EltReg = I.getOperand(1).getReg();
5813 LLT EltTy = MRI.getType(EltReg);
5814 // If the index isn't on the same bank as its elements, then this can't be a
5815 // SUBREG_TO_REG.
5816 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5817 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5818 if (EltRB != DstRB)
5819 return false;
5820 if (any_of(drop_begin(I.operands(), 2), [&MRI](const MachineOperand &Op) {
5821 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), MRI);
5822 }))
5823 return false;
5824 unsigned SubReg;
5825 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB);
5826 if (!EltRC)
5827 return false;
5828 const TargetRegisterClass *DstRC =
5829 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB);
5830 if (!DstRC)
5831 return false;
5832 if (!getSubRegForClass(EltRC, TRI, SubReg))
5833 return false;
5834 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5835 .addUse(EltReg)
5836 .addImm(SubReg);
5837 I.eraseFromParent();
5838 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5839 return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5840}
5841
5842bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5843 MachineRegisterInfo &MRI) {
5844 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5845 // Until we port more of the optimized selections, for now just use a vector
5846 // insert sequence.
5847 const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5848 const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5849 unsigned EltSize = EltTy.getSizeInBits();
5850
5851 if (tryOptConstantBuildVec(I, DstTy, MRI))
5852 return true;
5853 if (tryOptBuildVecToSubregToReg(I, MRI))
5854 return true;
5855
5856 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
5857 return false; // Don't support all element types yet.
5858 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5859
5860 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5861 MachineInstr *ScalarToVec =
5862 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5863 I.getOperand(1).getReg(), MIB);
5864 if (!ScalarToVec)
5865 return false;
5866
5867 Register DstVec = ScalarToVec->getOperand(0).getReg();
5868 unsigned DstSize = DstTy.getSizeInBits();
5869
5870 // Keep track of the last MI we inserted. Later on, we might be able to save
5871 // a copy using it.
5872 MachineInstr *PrevMI = ScalarToVec;
5873 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5874 // Note that if we don't do a subregister copy, we can end up making an
5875 // extra register.
5876 Register OpReg = I.getOperand(i).getReg();
5877 // Do not emit inserts for undefs
5878 if (!getOpcodeDef<GImplicitDef>(OpReg, MRI)) {
5879 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, OpReg, i - 1, RB, MIB);
5880 DstVec = PrevMI->getOperand(0).getReg();
5881 }
5882 }
5883
5884 // If DstTy's size in bits is less than 128, then emit a subregister copy
5885 // from DstVec to the last register we've defined.
5886 if (DstSize < 128) {
5887 // Force this to be FPR using the destination vector.
5888 const TargetRegisterClass *RC =
5889 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5890 if (!RC)
5891 return false;
5892 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5893 LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5894 return false;
5895 }
5896
5897 unsigned SubReg = 0;
5898 if (!getSubRegForClass(RC, TRI, SubReg))
5899 return false;
5900 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5901 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5902 << "\n");
5903 return false;
5904 }
5905
5907 Register DstReg = I.getOperand(0).getReg();
5908
5909 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, {}, SubReg);
5910 MachineOperand &RegOp = I.getOperand(1);
5911 RegOp.setReg(Reg);
5912 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5913 } else {
5914 // We either have a vector with all elements (except the first one) undef or
5915 // at least one non-undef non-first element. In the first case, we need to
5916 // constrain the output register ourselves as we may have generated an
5917 // INSERT_SUBREG operation which is a generic operation for which the
5918 // output regclass cannot be automatically chosen.
5919 //
5920 // In the second case, there is no need to do this as it may generate an
5921 // instruction like INSvi32gpr where the regclass can be automatically
5922 // chosen.
5923 //
5924 // Also, we save a copy by re-using the destination register on the final
5925 // insert.
5926 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5928
5929 Register DstReg = PrevMI->getOperand(0).getReg();
5930 if (PrevMI == ScalarToVec && DstReg.isVirtual()) {
5931 const TargetRegisterClass *RC =
5932 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI));
5933 RBI.constrainGenericRegister(DstReg, *RC, MRI);
5934 }
5935 }
5936
5938 return true;
5939}
5940
5941bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5942 unsigned NumVecs,
5943 MachineInstr &I) {
5944 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5945 assert(Opc && "Expected an opcode?");
5946 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5947 auto &MRI = *MIB.getMRI();
5948 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5949 unsigned Size = Ty.getSizeInBits();
5950 assert((Size == 64 || Size == 128) &&
5951 "Destination must be 64 bits or 128 bits?");
5952 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5953 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5954 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5955 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5956 Load.cloneMemRefs(I);
5958 Register SelectedLoadDst = Load->getOperand(0).getReg();
5959 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5960 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5961 .addReg(SelectedLoadDst, {}, SubReg + Idx);
5962 // Emit the subreg copies and immediately select them.
5963 // FIXME: We should refactor our copy code into an emitCopy helper and
5964 // clean up uses of this pattern elsewhere in the selector.
5965 selectCopy(*Vec, TII, MRI, TRI, RBI);
5966 }
5967 return true;
5968}
5969
5970bool AArch64InstructionSelector::selectVectorLoadLaneIntrinsic(
5971 unsigned Opc, unsigned NumVecs, MachineInstr &I) {
5972 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5973 assert(Opc && "Expected an opcode?");
5974 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5975 auto &MRI = *MIB.getMRI();
5976 LLT Ty = MRI.getType(I.getOperand(0).getReg());
5977 bool Narrow = Ty.getSizeInBits() == 64;
5978
5979 auto FirstSrcRegIt = I.operands_begin() + NumVecs + 1;
5980 SmallVector<Register, 4> Regs(NumVecs);
5981 std::transform(FirstSrcRegIt, FirstSrcRegIt + NumVecs, Regs.begin(),
5982 [](auto MO) { return MO.getReg(); });
5983
5984 if (Narrow) {
5985 transform(Regs, Regs.begin(), [this](Register Reg) {
5986 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
5987 ->getOperand(0)
5988 .getReg();
5989 });
5990 Ty = Ty.multiplyElements(2);
5991 }
5992
5993 Register Tuple = createQTuple(Regs, MIB);
5994 auto LaneNo = getIConstantVRegVal((FirstSrcRegIt + NumVecs)->getReg(), MRI);
5995 if (!LaneNo)
5996 return false;
5997
5998 Register Ptr = (FirstSrcRegIt + NumVecs + 1)->getReg();
5999 auto Load = MIB.buildInstr(Opc, {Ty}, {})
6000 .addReg(Tuple)
6001 .addImm(LaneNo->getZExtValue())
6002 .addReg(Ptr);
6003 Load.cloneMemRefs(I);
6005 Register SelectedLoadDst = Load->getOperand(0).getReg();
6006 unsigned SubReg = AArch64::qsub0;
6007 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
6008 auto Vec = MIB.buildInstr(TargetOpcode::COPY,
6009 {Narrow ? DstOp(&AArch64::FPR128RegClass)
6010 : DstOp(I.getOperand(Idx).getReg())},
6011 {})
6012 .addReg(SelectedLoadDst, {}, SubReg + Idx);
6013 Register WideReg = Vec.getReg(0);
6014 // Emit the subreg copies and immediately select them.
6015 selectCopy(*Vec, TII, MRI, TRI, RBI);
6016 if (Narrow &&
6017 !emitNarrowVector(I.getOperand(Idx).getReg(), WideReg, MIB, MRI))
6018 return false;
6019 }
6020 return true;
6021}
6022
6023void AArch64InstructionSelector::selectVectorStoreIntrinsic(MachineInstr &I,
6024 unsigned NumVecs,
6025 unsigned Opc) {
6026 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6027 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6028 Register Ptr = I.getOperand(1 + NumVecs).getReg();
6029
6030 SmallVector<Register, 2> Regs(NumVecs);
6031 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6032 Regs.begin(), [](auto MO) { return MO.getReg(); });
6033
6034 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
6035 : createDTuple(Regs, MIB);
6036 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
6037 Store.cloneMemRefs(I);
6039}
6040
6041bool AArch64InstructionSelector::selectVectorStoreLaneIntrinsic(
6042 MachineInstr &I, unsigned NumVecs, unsigned Opc) {
6043 MachineRegisterInfo &MRI = I.getParent()->getParent()->getRegInfo();
6044 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6045 bool Narrow = Ty.getSizeInBits() == 64;
6046
6047 SmallVector<Register, 2> Regs(NumVecs);
6048 std::transform(I.operands_begin() + 1, I.operands_begin() + 1 + NumVecs,
6049 Regs.begin(), [](auto MO) { return MO.getReg(); });
6050
6051 if (Narrow)
6052 transform(Regs, Regs.begin(), [this](Register Reg) {
6053 return emitScalarToVector(64, &AArch64::FPR128RegClass, Reg, MIB)
6054 ->getOperand(0)
6055 .getReg();
6056 });
6057
6058 Register Tuple = createQTuple(Regs, MIB);
6059
6060 auto LaneNo = getIConstantVRegVal(I.getOperand(1 + NumVecs).getReg(), MRI);
6061 if (!LaneNo)
6062 return false;
6063 Register Ptr = I.getOperand(1 + NumVecs + 1).getReg();
6064 auto Store = MIB.buildInstr(Opc, {}, {})
6065 .addReg(Tuple)
6066 .addImm(LaneNo->getZExtValue())
6067 .addReg(Ptr);
6068 Store.cloneMemRefs(I);
6070 return true;
6071}
6072
6073bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
6074 MachineInstr &I, MachineRegisterInfo &MRI) {
6075 // Find the intrinsic ID.
6076 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6077
6078 const LLT S8 = LLT::scalar(8);
6079 const LLT S16 = LLT::scalar(16);
6080 const LLT S32 = LLT::scalar(32);
6081 const LLT S64 = LLT::scalar(64);
6082 const LLT P0 = LLT::pointer(0, 64);
6083 // Select the instruction.
6084 switch (IntrinID) {
6085 default:
6086 return false;
6087 case Intrinsic::aarch64_ldxp:
6088 case Intrinsic::aarch64_ldaxp: {
6089 auto NewI = MIB.buildInstr(
6090 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
6091 {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
6092 {I.getOperand(3)});
6093 NewI.cloneMemRefs(I);
6095 break;
6096 }
6097 case Intrinsic::aarch64_neon_ld1x2: {
6098 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6099 unsigned Opc = 0;
6100 if (Ty == LLT::fixed_vector(8, S8))
6101 Opc = AArch64::LD1Twov8b;
6102 else if (Ty == LLT::fixed_vector(16, S8))
6103 Opc = AArch64::LD1Twov16b;
6104 else if (Ty == LLT::fixed_vector(4, S16))
6105 Opc = AArch64::LD1Twov4h;
6106 else if (Ty == LLT::fixed_vector(8, S16))
6107 Opc = AArch64::LD1Twov8h;
6108 else if (Ty == LLT::fixed_vector(2, S32))
6109 Opc = AArch64::LD1Twov2s;
6110 else if (Ty == LLT::fixed_vector(4, S32))
6111 Opc = AArch64::LD1Twov4s;
6112 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6113 Opc = AArch64::LD1Twov2d;
6114 else if (Ty == S64 || Ty == P0)
6115 Opc = AArch64::LD1Twov1d;
6116 else
6117 llvm_unreachable("Unexpected type for ld1x2!");
6118 selectVectorLoadIntrinsic(Opc, 2, I);
6119 break;
6120 }
6121 case Intrinsic::aarch64_neon_ld1x3: {
6122 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6123 unsigned Opc = 0;
6124 if (Ty == LLT::fixed_vector(8, S8))
6125 Opc = AArch64::LD1Threev8b;
6126 else if (Ty == LLT::fixed_vector(16, S8))
6127 Opc = AArch64::LD1Threev16b;
6128 else if (Ty == LLT::fixed_vector(4, S16))
6129 Opc = AArch64::LD1Threev4h;
6130 else if (Ty == LLT::fixed_vector(8, S16))
6131 Opc = AArch64::LD1Threev8h;
6132 else if (Ty == LLT::fixed_vector(2, S32))
6133 Opc = AArch64::LD1Threev2s;
6134 else if (Ty == LLT::fixed_vector(4, S32))
6135 Opc = AArch64::LD1Threev4s;
6136 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6137 Opc = AArch64::LD1Threev2d;
6138 else if (Ty == S64 || Ty == P0)
6139 Opc = AArch64::LD1Threev1d;
6140 else
6141 llvm_unreachable("Unexpected type for ld1x3!");
6142 selectVectorLoadIntrinsic(Opc, 3, I);
6143 break;
6144 }
6145 case Intrinsic::aarch64_neon_ld1x4: {
6146 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6147 unsigned Opc = 0;
6148 if (Ty == LLT::fixed_vector(8, S8))
6149 Opc = AArch64::LD1Fourv8b;
6150 else if (Ty == LLT::fixed_vector(16, S8))
6151 Opc = AArch64::LD1Fourv16b;
6152 else if (Ty == LLT::fixed_vector(4, S16))
6153 Opc = AArch64::LD1Fourv4h;
6154 else if (Ty == LLT::fixed_vector(8, S16))
6155 Opc = AArch64::LD1Fourv8h;
6156 else if (Ty == LLT::fixed_vector(2, S32))
6157 Opc = AArch64::LD1Fourv2s;
6158 else if (Ty == LLT::fixed_vector(4, S32))
6159 Opc = AArch64::LD1Fourv4s;
6160 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6161 Opc = AArch64::LD1Fourv2d;
6162 else if (Ty == S64 || Ty == P0)
6163 Opc = AArch64::LD1Fourv1d;
6164 else
6165 llvm_unreachable("Unexpected type for ld1x4!");
6166 selectVectorLoadIntrinsic(Opc, 4, I);
6167 break;
6168 }
6169 case Intrinsic::aarch64_neon_ld2: {
6170 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6171 unsigned Opc = 0;
6172 if (Ty == LLT::fixed_vector(8, S8))
6173 Opc = AArch64::LD2Twov8b;
6174 else if (Ty == LLT::fixed_vector(16, S8))
6175 Opc = AArch64::LD2Twov16b;
6176 else if (Ty == LLT::fixed_vector(4, S16))
6177 Opc = AArch64::LD2Twov4h;
6178 else if (Ty == LLT::fixed_vector(8, S16))
6179 Opc = AArch64::LD2Twov8h;
6180 else if (Ty == LLT::fixed_vector(2, S32))
6181 Opc = AArch64::LD2Twov2s;
6182 else if (Ty == LLT::fixed_vector(4, S32))
6183 Opc = AArch64::LD2Twov4s;
6184 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6185 Opc = AArch64::LD2Twov2d;
6186 else if (Ty == S64 || Ty == P0)
6187 Opc = AArch64::LD1Twov1d;
6188 else
6189 llvm_unreachable("Unexpected type for ld2!");
6190 selectVectorLoadIntrinsic(Opc, 2, I);
6191 break;
6192 }
6193 case Intrinsic::aarch64_neon_ld2lane: {
6194 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6195 unsigned Opc;
6196 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6197 Opc = AArch64::LD2i8;
6198 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6199 Opc = AArch64::LD2i16;
6200 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6201 Opc = AArch64::LD2i32;
6202 else if (Ty == LLT::fixed_vector(2, S64) ||
6203 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6204 Opc = AArch64::LD2i64;
6205 else
6206 llvm_unreachable("Unexpected type for st2lane!");
6207 if (!selectVectorLoadLaneIntrinsic(Opc, 2, I))
6208 return false;
6209 break;
6210 }
6211 case Intrinsic::aarch64_neon_ld2r: {
6212 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6213 unsigned Opc = 0;
6214 if (Ty == LLT::fixed_vector(8, S8))
6215 Opc = AArch64::LD2Rv8b;
6216 else if (Ty == LLT::fixed_vector(16, S8))
6217 Opc = AArch64::LD2Rv16b;
6218 else if (Ty == LLT::fixed_vector(4, S16))
6219 Opc = AArch64::LD2Rv4h;
6220 else if (Ty == LLT::fixed_vector(8, S16))
6221 Opc = AArch64::LD2Rv8h;
6222 else if (Ty == LLT::fixed_vector(2, S32))
6223 Opc = AArch64::LD2Rv2s;
6224 else if (Ty == LLT::fixed_vector(4, S32))
6225 Opc = AArch64::LD2Rv4s;
6226 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6227 Opc = AArch64::LD2Rv2d;
6228 else if (Ty == S64 || Ty == P0)
6229 Opc = AArch64::LD2Rv1d;
6230 else
6231 llvm_unreachable("Unexpected type for ld2r!");
6232 selectVectorLoadIntrinsic(Opc, 2, I);
6233 break;
6234 }
6235 case Intrinsic::aarch64_neon_ld3: {
6236 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6237 unsigned Opc = 0;
6238 if (Ty == LLT::fixed_vector(8, S8))
6239 Opc = AArch64::LD3Threev8b;
6240 else if (Ty == LLT::fixed_vector(16, S8))
6241 Opc = AArch64::LD3Threev16b;
6242 else if (Ty == LLT::fixed_vector(4, S16))
6243 Opc = AArch64::LD3Threev4h;
6244 else if (Ty == LLT::fixed_vector(8, S16))
6245 Opc = AArch64::LD3Threev8h;
6246 else if (Ty == LLT::fixed_vector(2, S32))
6247 Opc = AArch64::LD3Threev2s;
6248 else if (Ty == LLT::fixed_vector(4, S32))
6249 Opc = AArch64::LD3Threev4s;
6250 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6251 Opc = AArch64::LD3Threev2d;
6252 else if (Ty == S64 || Ty == P0)
6253 Opc = AArch64::LD1Threev1d;
6254 else
6255 llvm_unreachable("Unexpected type for ld3!");
6256 selectVectorLoadIntrinsic(Opc, 3, I);
6257 break;
6258 }
6259 case Intrinsic::aarch64_neon_ld3lane: {
6260 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6261 unsigned Opc;
6262 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6263 Opc = AArch64::LD3i8;
6264 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6265 Opc = AArch64::LD3i16;
6266 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6267 Opc = AArch64::LD3i32;
6268 else if (Ty == LLT::fixed_vector(2, S64) ||
6269 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6270 Opc = AArch64::LD3i64;
6271 else
6272 llvm_unreachable("Unexpected type for st3lane!");
6273 if (!selectVectorLoadLaneIntrinsic(Opc, 3, I))
6274 return false;
6275 break;
6276 }
6277 case Intrinsic::aarch64_neon_ld3r: {
6278 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6279 unsigned Opc = 0;
6280 if (Ty == LLT::fixed_vector(8, S8))
6281 Opc = AArch64::LD3Rv8b;
6282 else if (Ty == LLT::fixed_vector(16, S8))
6283 Opc = AArch64::LD3Rv16b;
6284 else if (Ty == LLT::fixed_vector(4, S16))
6285 Opc = AArch64::LD3Rv4h;
6286 else if (Ty == LLT::fixed_vector(8, S16))
6287 Opc = AArch64::LD3Rv8h;
6288 else if (Ty == LLT::fixed_vector(2, S32))
6289 Opc = AArch64::LD3Rv2s;
6290 else if (Ty == LLT::fixed_vector(4, S32))
6291 Opc = AArch64::LD3Rv4s;
6292 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6293 Opc = AArch64::LD3Rv2d;
6294 else if (Ty == S64 || Ty == P0)
6295 Opc = AArch64::LD3Rv1d;
6296 else
6297 llvm_unreachable("Unexpected type for ld3r!");
6298 selectVectorLoadIntrinsic(Opc, 3, I);
6299 break;
6300 }
6301 case Intrinsic::aarch64_neon_ld4: {
6302 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6303 unsigned Opc = 0;
6304 if (Ty == LLT::fixed_vector(8, S8))
6305 Opc = AArch64::LD4Fourv8b;
6306 else if (Ty == LLT::fixed_vector(16, S8))
6307 Opc = AArch64::LD4Fourv16b;
6308 else if (Ty == LLT::fixed_vector(4, S16))
6309 Opc = AArch64::LD4Fourv4h;
6310 else if (Ty == LLT::fixed_vector(8, S16))
6311 Opc = AArch64::LD4Fourv8h;
6312 else if (Ty == LLT::fixed_vector(2, S32))
6313 Opc = AArch64::LD4Fourv2s;
6314 else if (Ty == LLT::fixed_vector(4, S32))
6315 Opc = AArch64::LD4Fourv4s;
6316 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6317 Opc = AArch64::LD4Fourv2d;
6318 else if (Ty == S64 || Ty == P0)
6319 Opc = AArch64::LD1Fourv1d;
6320 else
6321 llvm_unreachable("Unexpected type for ld4!");
6322 selectVectorLoadIntrinsic(Opc, 4, I);
6323 break;
6324 }
6325 case Intrinsic::aarch64_neon_ld4lane: {
6326 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6327 unsigned Opc;
6328 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6329 Opc = AArch64::LD4i8;
6330 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6331 Opc = AArch64::LD4i16;
6332 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6333 Opc = AArch64::LD4i32;
6334 else if (Ty == LLT::fixed_vector(2, S64) ||
6335 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6336 Opc = AArch64::LD4i64;
6337 else
6338 llvm_unreachable("Unexpected type for st4lane!");
6339 if (!selectVectorLoadLaneIntrinsic(Opc, 4, I))
6340 return false;
6341 break;
6342 }
6343 case Intrinsic::aarch64_neon_ld4r: {
6344 LLT Ty = MRI.getType(I.getOperand(0).getReg());
6345 unsigned Opc = 0;
6346 if (Ty == LLT::fixed_vector(8, S8))
6347 Opc = AArch64::LD4Rv8b;
6348 else if (Ty == LLT::fixed_vector(16, S8))
6349 Opc = AArch64::LD4Rv16b;
6350 else if (Ty == LLT::fixed_vector(4, S16))
6351 Opc = AArch64::LD4Rv4h;
6352 else if (Ty == LLT::fixed_vector(8, S16))
6353 Opc = AArch64::LD4Rv8h;
6354 else if (Ty == LLT::fixed_vector(2, S32))
6355 Opc = AArch64::LD4Rv2s;
6356 else if (Ty == LLT::fixed_vector(4, S32))
6357 Opc = AArch64::LD4Rv4s;
6358 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6359 Opc = AArch64::LD4Rv2d;
6360 else if (Ty == S64 || Ty == P0)
6361 Opc = AArch64::LD4Rv1d;
6362 else
6363 llvm_unreachable("Unexpected type for ld4r!");
6364 selectVectorLoadIntrinsic(Opc, 4, I);
6365 break;
6366 }
6367 case Intrinsic::aarch64_neon_st1x2: {
6368 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6369 unsigned Opc;
6370 if (Ty == LLT::fixed_vector(8, S8))
6371 Opc = AArch64::ST1Twov8b;
6372 else if (Ty == LLT::fixed_vector(16, S8))
6373 Opc = AArch64::ST1Twov16b;
6374 else if (Ty == LLT::fixed_vector(4, S16))
6375 Opc = AArch64::ST1Twov4h;
6376 else if (Ty == LLT::fixed_vector(8, S16))
6377 Opc = AArch64::ST1Twov8h;
6378 else if (Ty == LLT::fixed_vector(2, S32))
6379 Opc = AArch64::ST1Twov2s;
6380 else if (Ty == LLT::fixed_vector(4, S32))
6381 Opc = AArch64::ST1Twov4s;
6382 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6383 Opc = AArch64::ST1Twov2d;
6384 else if (Ty == S64 || Ty == P0)
6385 Opc = AArch64::ST1Twov1d;
6386 else
6387 llvm_unreachable("Unexpected type for st1x2!");
6388 selectVectorStoreIntrinsic(I, 2, Opc);
6389 break;
6390 }
6391 case Intrinsic::aarch64_neon_st1x3: {
6392 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6393 unsigned Opc;
6394 if (Ty == LLT::fixed_vector(8, S8))
6395 Opc = AArch64::ST1Threev8b;
6396 else if (Ty == LLT::fixed_vector(16, S8))
6397 Opc = AArch64::ST1Threev16b;
6398 else if (Ty == LLT::fixed_vector(4, S16))
6399 Opc = AArch64::ST1Threev4h;
6400 else if (Ty == LLT::fixed_vector(8, S16))
6401 Opc = AArch64::ST1Threev8h;
6402 else if (Ty == LLT::fixed_vector(2, S32))
6403 Opc = AArch64::ST1Threev2s;
6404 else if (Ty == LLT::fixed_vector(4, S32))
6405 Opc = AArch64::ST1Threev4s;
6406 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6407 Opc = AArch64::ST1Threev2d;
6408 else if (Ty == S64 || Ty == P0)
6409 Opc = AArch64::ST1Threev1d;
6410 else
6411 llvm_unreachable("Unexpected type for st1x3!");
6412 selectVectorStoreIntrinsic(I, 3, Opc);
6413 break;
6414 }
6415 case Intrinsic::aarch64_neon_st1x4: {
6416 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6417 unsigned Opc;
6418 if (Ty == LLT::fixed_vector(8, S8))
6419 Opc = AArch64::ST1Fourv8b;
6420 else if (Ty == LLT::fixed_vector(16, S8))
6421 Opc = AArch64::ST1Fourv16b;
6422 else if (Ty == LLT::fixed_vector(4, S16))
6423 Opc = AArch64::ST1Fourv4h;
6424 else if (Ty == LLT::fixed_vector(8, S16))
6425 Opc = AArch64::ST1Fourv8h;
6426 else if (Ty == LLT::fixed_vector(2, S32))
6427 Opc = AArch64::ST1Fourv2s;
6428 else if (Ty == LLT::fixed_vector(4, S32))
6429 Opc = AArch64::ST1Fourv4s;
6430 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6431 Opc = AArch64::ST1Fourv2d;
6432 else if (Ty == S64 || Ty == P0)
6433 Opc = AArch64::ST1Fourv1d;
6434 else
6435 llvm_unreachable("Unexpected type for st1x4!");
6436 selectVectorStoreIntrinsic(I, 4, Opc);
6437 break;
6438 }
6439 case Intrinsic::aarch64_neon_st2: {
6440 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6441 unsigned Opc;
6442 if (Ty == LLT::fixed_vector(8, S8))
6443 Opc = AArch64::ST2Twov8b;
6444 else if (Ty == LLT::fixed_vector(16, S8))
6445 Opc = AArch64::ST2Twov16b;
6446 else if (Ty == LLT::fixed_vector(4, S16))
6447 Opc = AArch64::ST2Twov4h;
6448 else if (Ty == LLT::fixed_vector(8, S16))
6449 Opc = AArch64::ST2Twov8h;
6450 else if (Ty == LLT::fixed_vector(2, S32))
6451 Opc = AArch64::ST2Twov2s;
6452 else if (Ty == LLT::fixed_vector(4, S32))
6453 Opc = AArch64::ST2Twov4s;
6454 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6455 Opc = AArch64::ST2Twov2d;
6456 else if (Ty == S64 || Ty == P0)
6457 Opc = AArch64::ST1Twov1d;
6458 else
6459 llvm_unreachable("Unexpected type for st2!");
6460 selectVectorStoreIntrinsic(I, 2, Opc);
6461 break;
6462 }
6463 case Intrinsic::aarch64_neon_st3: {
6464 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6465 unsigned Opc;
6466 if (Ty == LLT::fixed_vector(8, S8))
6467 Opc = AArch64::ST3Threev8b;
6468 else if (Ty == LLT::fixed_vector(16, S8))
6469 Opc = AArch64::ST3Threev16b;
6470 else if (Ty == LLT::fixed_vector(4, S16))
6471 Opc = AArch64::ST3Threev4h;
6472 else if (Ty == LLT::fixed_vector(8, S16))
6473 Opc = AArch64::ST3Threev8h;
6474 else if (Ty == LLT::fixed_vector(2, S32))
6475 Opc = AArch64::ST3Threev2s;
6476 else if (Ty == LLT::fixed_vector(4, S32))
6477 Opc = AArch64::ST3Threev4s;
6478 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6479 Opc = AArch64::ST3Threev2d;
6480 else if (Ty == S64 || Ty == P0)
6481 Opc = AArch64::ST1Threev1d;
6482 else
6483 llvm_unreachable("Unexpected type for st3!");
6484 selectVectorStoreIntrinsic(I, 3, Opc);
6485 break;
6486 }
6487 case Intrinsic::aarch64_neon_st4: {
6488 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6489 unsigned Opc;
6490 if (Ty == LLT::fixed_vector(8, S8))
6491 Opc = AArch64::ST4Fourv8b;
6492 else if (Ty == LLT::fixed_vector(16, S8))
6493 Opc = AArch64::ST4Fourv16b;
6494 else if (Ty == LLT::fixed_vector(4, S16))
6495 Opc = AArch64::ST4Fourv4h;
6496 else if (Ty == LLT::fixed_vector(8, S16))
6497 Opc = AArch64::ST4Fourv8h;
6498 else if (Ty == LLT::fixed_vector(2, S32))
6499 Opc = AArch64::ST4Fourv2s;
6500 else if (Ty == LLT::fixed_vector(4, S32))
6501 Opc = AArch64::ST4Fourv4s;
6502 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
6503 Opc = AArch64::ST4Fourv2d;
6504 else if (Ty == S64 || Ty == P0)
6505 Opc = AArch64::ST1Fourv1d;
6506 else
6507 llvm_unreachable("Unexpected type for st4!");
6508 selectVectorStoreIntrinsic(I, 4, Opc);
6509 break;
6510 }
6511 case Intrinsic::aarch64_neon_st2lane: {
6512 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6513 unsigned Opc;
6514 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6515 Opc = AArch64::ST2i8;
6516 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6517 Opc = AArch64::ST2i16;
6518 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6519 Opc = AArch64::ST2i32;
6520 else if (Ty == LLT::fixed_vector(2, S64) ||
6521 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6522 Opc = AArch64::ST2i64;
6523 else
6524 llvm_unreachable("Unexpected type for st2lane!");
6525 if (!selectVectorStoreLaneIntrinsic(I, 2, Opc))
6526 return false;
6527 break;
6528 }
6529 case Intrinsic::aarch64_neon_st3lane: {
6530 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6531 unsigned Opc;
6532 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6533 Opc = AArch64::ST3i8;
6534 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6535 Opc = AArch64::ST3i16;
6536 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6537 Opc = AArch64::ST3i32;
6538 else if (Ty == LLT::fixed_vector(2, S64) ||
6539 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6540 Opc = AArch64::ST3i64;
6541 else
6542 llvm_unreachable("Unexpected type for st3lane!");
6543 if (!selectVectorStoreLaneIntrinsic(I, 3, Opc))
6544 return false;
6545 break;
6546 }
6547 case Intrinsic::aarch64_neon_st4lane: {
6548 LLT Ty = MRI.getType(I.getOperand(1).getReg());
6549 unsigned Opc;
6550 if (Ty == LLT::fixed_vector(8, S8) || Ty == LLT::fixed_vector(16, S8))
6551 Opc = AArch64::ST4i8;
6552 else if (Ty == LLT::fixed_vector(4, S16) || Ty == LLT::fixed_vector(8, S16))
6553 Opc = AArch64::ST4i16;
6554 else if (Ty == LLT::fixed_vector(2, S32) || Ty == LLT::fixed_vector(4, S32))
6555 Opc = AArch64::ST4i32;
6556 else if (Ty == LLT::fixed_vector(2, S64) ||
6557 Ty == LLT::fixed_vector(2, P0) || Ty == S64 || Ty == P0)
6558 Opc = AArch64::ST4i64;
6559 else
6560 llvm_unreachable("Unexpected type for st4lane!");
6561 if (!selectVectorStoreLaneIntrinsic(I, 4, Opc))
6562 return false;
6563 break;
6564 }
6565 case Intrinsic::aarch64_mops_memset_tag: {
6566 // Transform
6567 // %dst:gpr(p0) = \
6568 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag),
6569 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64)
6570 // where %dst is updated, into
6571 // %Rd:GPR64common, %Rn:GPR64) = \
6572 // MOPSMemorySetTaggingPseudo \
6573 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64
6574 // where Rd and Rn are tied.
6575 // It is expected that %val has been extended to s64 in legalization.
6576 // Note that the order of the size/value operands are swapped.
6577
6578 Register DstDef = I.getOperand(0).getReg();
6579 // I.getOperand(1) is the intrinsic function
6580 Register DstUse = I.getOperand(2).getReg();
6581 Register ValUse = I.getOperand(3).getReg();
6582 Register SizeUse = I.getOperand(4).getReg();
6583
6584 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one.
6585 // Therefore an additional virtual register is required for the updated size
6586 // operand. This value is not accessible via the semantics of the intrinsic.
6588
6589 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo,
6590 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse});
6591 Memset.cloneMemRefs(I);
6593 break;
6594 }
6595 case Intrinsic::ptrauth_resign_load_relative: {
6596 Register DstReg = I.getOperand(0).getReg();
6597 Register ValReg = I.getOperand(2).getReg();
6598 uint64_t AUTKey = I.getOperand(3).getImm();
6599 Register AUTDisc = I.getOperand(4).getReg();
6600 uint64_t PACKey = I.getOperand(5).getImm();
6601 Register PACDisc = I.getOperand(6).getReg();
6602 int64_t Addend = I.getOperand(7).getImm();
6603
6604 Register AUTAddrDisc = AUTDisc;
6605 uint16_t AUTConstDiscC = 0;
6606 std::tie(AUTConstDiscC, AUTAddrDisc) =
6608
6609 Register PACAddrDisc = PACDisc;
6610 uint16_t PACConstDiscC = 0;
6611 std::tie(PACConstDiscC, PACAddrDisc) =
6613
6614 MIB.buildCopy({AArch64::X16}, {ValReg});
6615
6616 MIB.buildInstr(AArch64::AUTRELLOADPAC)
6617 .addImm(AUTKey)
6618 .addImm(AUTConstDiscC)
6619 .addUse(AUTAddrDisc)
6620 .addImm(PACKey)
6621 .addImm(PACConstDiscC)
6622 .addUse(PACAddrDisc)
6623 .addImm(Addend)
6624 .constrainAllUses(TII, TRI, RBI);
6625 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6626
6627 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6628 I.eraseFromParent();
6629 return true;
6630 }
6631 }
6632
6633 I.eraseFromParent();
6634 return true;
6635}
6636
6637bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
6638 MachineRegisterInfo &MRI) {
6639 unsigned IntrinID = cast<GIntrinsic>(I).getIntrinsicID();
6640
6641 switch (IntrinID) {
6642 default:
6643 break;
6644 case Intrinsic::ptrauth_resign: {
6645 Register DstReg = I.getOperand(0).getReg();
6646 Register ValReg = I.getOperand(2).getReg();
6647 uint64_t AUTKey = I.getOperand(3).getImm();
6648 Register AUTDisc = I.getOperand(4).getReg();
6649 uint64_t PACKey = I.getOperand(5).getImm();
6650 Register PACDisc = I.getOperand(6).getReg();
6651
6652 Register AUTAddrDisc = AUTDisc;
6653 uint16_t AUTConstDiscC = 0;
6654 std::tie(AUTConstDiscC, AUTAddrDisc) =
6656
6657 Register PACAddrDisc = PACDisc;
6658 uint16_t PACConstDiscC = 0;
6659 std::tie(PACConstDiscC, PACAddrDisc) =
6661
6662 MIB.buildCopy({AArch64::X16}, {ValReg});
6663 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6664 MIB.buildInstr(AArch64::AUTPAC)
6665 .addImm(AUTKey)
6666 .addImm(AUTConstDiscC)
6667 .addUse(AUTAddrDisc)
6668 .addImm(PACKey)
6669 .addImm(PACConstDiscC)
6670 .addUse(PACAddrDisc)
6671 .constrainAllUses(TII, TRI, RBI);
6672 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6673
6674 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6675 I.eraseFromParent();
6676 return true;
6677 }
6678 case Intrinsic::ptrauth_auth: {
6679 Register DstReg = I.getOperand(0).getReg();
6680 Register ValReg = I.getOperand(2).getReg();
6681 uint64_t AUTKey = I.getOperand(3).getImm();
6682 Register AUTDisc = I.getOperand(4).getReg();
6683
6684 Register AUTAddrDisc = AUTDisc;
6685 uint16_t AUTConstDiscC = 0;
6686 std::tie(AUTConstDiscC, AUTAddrDisc) =
6688
6689 if (STI.isX16X17Safer()) {
6690 MIB.buildCopy({AArch64::X16}, {ValReg});
6691 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6692 MIB.buildInstr(AArch64::AUTx16x17)
6693 .addImm(AUTKey)
6694 .addImm(AUTConstDiscC)
6695 .addUse(AUTAddrDisc)
6696 .constrainAllUses(TII, TRI, RBI);
6697 MIB.buildCopy({DstReg}, Register(AArch64::X16));
6698 } else {
6699 Register ScratchReg =
6700 MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
6701 MIB.buildInstr(AArch64::AUTxMxN)
6702 .addDef(DstReg)
6703 .addDef(ScratchReg)
6704 .addUse(ValReg)
6705 .addImm(AUTKey)
6706 .addImm(AUTConstDiscC)
6707 .addUse(AUTAddrDisc)
6708 .constrainAllUses(TII, TRI, RBI);
6709 }
6710
6711 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6712 I.eraseFromParent();
6713 return true;
6714 }
6715 case Intrinsic::frameaddress:
6716 case Intrinsic::returnaddress: {
6717 MachineFunction &MF = *I.getParent()->getParent();
6718 MachineFrameInfo &MFI = MF.getFrameInfo();
6719
6720 unsigned Depth = I.getOperand(2).getImm();
6721 Register DstReg = I.getOperand(0).getReg();
6722 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
6723
6724 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
6725 if (!MFReturnAddr) {
6726 // Insert the copy from LR/X30 into the entry block, before it can be
6727 // clobbered by anything.
6728 MFI.setReturnAddressIsTaken(true);
6729 MFReturnAddr = getFunctionLiveInPhysReg(
6730 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc());
6731 }
6732
6733 if (STI.hasPAuth()) {
6734 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
6735 } else {
6736 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
6737 MIB.buildInstr(AArch64::XPACLRI);
6738 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6739 }
6740
6741 I.eraseFromParent();
6742 return true;
6743 }
6744
6745 MFI.setFrameAddressIsTaken(true);
6746 Register FrameAddr(AArch64::FP);
6747 while (Depth--) {
6748 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
6749 auto Ldr =
6750 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
6752 FrameAddr = NextFrame;
6753 }
6754
6755 if (IntrinID == Intrinsic::frameaddress)
6756 MIB.buildCopy({DstReg}, {FrameAddr});
6757 else {
6758 MFI.setReturnAddressIsTaken(true);
6759
6760 if (STI.hasPAuth()) {
6761 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
6762 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
6763 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
6764 } else {
6765 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
6766 .addImm(1);
6767 MIB.buildInstr(AArch64::XPACLRI);
6768 MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
6769 }
6770 }
6771
6772 I.eraseFromParent();
6773 return true;
6774 }
6775 case Intrinsic::aarch64_neon_tbl2:
6776 SelectTable(I, MRI, 2, AArch64::TBLv8i8Two, AArch64::TBLv16i8Two, false);
6777 return true;
6778 case Intrinsic::aarch64_neon_tbl3:
6779 SelectTable(I, MRI, 3, AArch64::TBLv8i8Three, AArch64::TBLv16i8Three,
6780 false);
6781 return true;
6782 case Intrinsic::aarch64_neon_tbl4:
6783 SelectTable(I, MRI, 4, AArch64::TBLv8i8Four, AArch64::TBLv16i8Four, false);
6784 return true;
6785 case Intrinsic::aarch64_neon_tbx2:
6786 SelectTable(I, MRI, 2, AArch64::TBXv8i8Two, AArch64::TBXv16i8Two, true);
6787 return true;
6788 case Intrinsic::aarch64_neon_tbx3:
6789 SelectTable(I, MRI, 3, AArch64::TBXv8i8Three, AArch64::TBXv16i8Three, true);
6790 return true;
6791 case Intrinsic::aarch64_neon_tbx4:
6792 SelectTable(I, MRI, 4, AArch64::TBXv8i8Four, AArch64::TBXv16i8Four, true);
6793 return true;
6794 case Intrinsic::swift_async_context_addr:
6795 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
6796 {Register(AArch64::FP)})
6797 .addImm(8)
6798 .addImm(0);
6800
6802 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
6803 I.eraseFromParent();
6804 return true;
6805 }
6806 return false;
6807}
6808
6809// G_PTRAUTH_GLOBAL_VALUE lowering
6810//
6811// We have 3 lowering alternatives to choose from:
6812// - MOVaddrPAC: similar to MOVaddr, with added PAC.
6813// If the GV doesn't need a GOT load (i.e., is locally defined)
6814// materialize the pointer using adrp+add+pac. See LowerMOVaddrPAC.
6815//
6816// - LOADgotPAC: similar to LOADgot, with added PAC.
6817// If the GV needs a GOT load, materialize the pointer using the usual
6818// GOT adrp+ldr, +pac. Pointers in GOT are assumed to be not signed, the GOT
6819// section is assumed to be read-only (for example, via relro mechanism). See
6820// LowerMOVaddrPAC.
6821//
6822// - LOADauthptrstatic: similar to LOADgot, but use a
6823// special stub slot instead of a GOT slot.
6824// Load a signed pointer for symbol 'sym' from a stub slot named
6825// 'sym$auth_ptr$key$disc' filled by dynamic linker during relocation
6826// resolving. This usually lowers to adrp+ldr, but also emits an entry into
6827// .data with an
6828// @AUTH relocation. See LowerLOADauthptrstatic.
6829//
6830// All 3 are pseudos that are expand late to longer sequences: this lets us
6831// provide integrity guarantees on the to-be-signed intermediate values.
6832//
6833// LOADauthptrstatic is undesirable because it requires a large section filled
6834// with often similarly-signed pointers, making it a good harvesting target.
6835// Thus, it's only used for ptrauth references to extern_weak to avoid null
6836// checks.
6837
6838bool AArch64InstructionSelector::selectPtrAuthGlobalValue(
6839 MachineInstr &I, MachineRegisterInfo &MRI) const {
6840 Register DefReg = I.getOperand(0).getReg();
6841 Register Addr = I.getOperand(1).getReg();
6842 uint64_t Key = I.getOperand(2).getImm();
6843 Register AddrDisc = I.getOperand(3).getReg();
6844 uint64_t Disc = I.getOperand(4).getImm();
6845 int64_t Offset = 0;
6846
6848 report_fatal_error("key in ptrauth global out of range [0, " +
6849 Twine((int)AArch64PACKey::LAST) + "]");
6850
6851 // Blend only works if the integer discriminator is 16-bit wide.
6852 if (!isUInt<16>(Disc))
6854 "constant discriminator in ptrauth global out of range [0, 0xffff]");
6855
6856 // Choosing between 3 lowering alternatives is target-specific.
6857 if (!STI.isTargetELF() && !STI.isTargetMachO())
6858 report_fatal_error("ptrauth global lowering only supported on MachO/ELF");
6859
6860 if (!MRI.hasOneDef(Addr))
6861 return false;
6862
6863 // First match any offset we take from the real global.
6864 const MachineInstr *DefMI = &*MRI.def_instr_begin(Addr);
6865 if (DefMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
6866 Register OffsetReg = DefMI->getOperand(2).getReg();
6867 if (!MRI.hasOneDef(OffsetReg))
6868 return false;
6869 const MachineInstr &OffsetMI = *MRI.def_instr_begin(OffsetReg);
6870 if (OffsetMI.getOpcode() != TargetOpcode::G_CONSTANT)
6871 return false;
6872
6873 Addr = DefMI->getOperand(1).getReg();
6874 if (!MRI.hasOneDef(Addr))
6875 return false;
6876
6877 DefMI = &*MRI.def_instr_begin(Addr);
6878 Offset = OffsetMI.getOperand(1).getCImm()->getSExtValue();
6879 }
6880
6881 // We should be left with a genuine unauthenticated GlobalValue.
6882 const GlobalValue *GV;
6883 if (DefMI->getOpcode() == TargetOpcode::G_GLOBAL_VALUE) {
6884 GV = DefMI->getOperand(1).getGlobal();
6886 } else if (DefMI->getOpcode() == AArch64::G_ADD_LOW) {
6887 GV = DefMI->getOperand(2).getGlobal();
6889 } else {
6890 return false;
6891 }
6892
6893 MachineIRBuilder MIB(I);
6894
6895 // Classify the reference to determine whether it needs a GOT load.
6896 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
6897 const bool NeedsGOTLoad = ((OpFlags & AArch64II::MO_GOT) != 0);
6898 assert(((OpFlags & (~AArch64II::MO_GOT)) == 0) &&
6899 "unsupported non-GOT op flags on ptrauth global reference");
6900 assert((!GV->hasExternalWeakLinkage() || NeedsGOTLoad) &&
6901 "unsupported non-GOT reference to weak ptrauth global");
6902
6903 std::optional<APInt> AddrDiscVal = getIConstantVRegVal(AddrDisc, MRI);
6904 bool HasAddrDisc = !AddrDiscVal || *AddrDiscVal != 0;
6905
6906 // Non-extern_weak:
6907 // - No GOT load needed -> MOVaddrPAC
6908 // - GOT load for non-extern_weak -> LOADgotPAC
6909 // Note that we disallow extern_weak refs to avoid null checks later.
6910 if (!GV->hasExternalWeakLinkage()) {
6911 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X16}, {});
6912 MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {AArch64::X17}, {});
6913 MIB.buildInstr(NeedsGOTLoad ? AArch64::LOADgotPAC : AArch64::MOVaddrPAC)
6915 .addImm(Key)
6916 .addReg(HasAddrDisc ? AddrDisc : AArch64::XZR)
6917 .addImm(Disc)
6918 .constrainAllUses(TII, TRI, RBI);
6919 MIB.buildCopy(DefReg, Register(AArch64::X16));
6920 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6921 I.eraseFromParent();
6922 return true;
6923 }
6924
6925 // extern_weak -> LOADauthptrstatic
6926
6927 // Offsets and extern_weak don't mix well: ptrauth aside, you'd get the
6928 // offset alone as a pointer if the symbol wasn't available, which would
6929 // probably break null checks in users. Ptrauth complicates things further:
6930 // error out.
6931 if (Offset != 0)
6933 "unsupported non-zero offset in weak ptrauth global reference");
6934
6935 if (HasAddrDisc)
6936 report_fatal_error("unsupported weak addr-div ptrauth global");
6937
6938 MIB.buildInstr(AArch64::LOADauthptrstatic, {DefReg}, {})
6939 .addGlobalAddress(GV, Offset)
6940 .addImm(Key)
6941 .addImm(Disc);
6942 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
6943
6944 I.eraseFromParent();
6945 return true;
6946}
6947
6948void AArch64InstructionSelector::SelectTable(MachineInstr &I,
6949 MachineRegisterInfo &MRI,
6950 unsigned NumVec, unsigned Opc1,
6951 unsigned Opc2, bool isExt) {
6952 Register DstReg = I.getOperand(0).getReg();
6953 unsigned Opc = MRI.getType(DstReg) == LLT::fixed_vector(8, 8) ? Opc1 : Opc2;
6954
6955 // Create the REG_SEQUENCE
6957 for (unsigned i = 0; i < NumVec; i++)
6958 Regs.push_back(I.getOperand(i + 2 + isExt).getReg());
6959 Register RegSeq = createQTuple(Regs, MIB);
6960
6961 Register IdxReg = I.getOperand(2 + NumVec + isExt).getReg();
6962 MachineInstrBuilder Instr;
6963 if (isExt) {
6964 Register Reg = I.getOperand(2).getReg();
6965 Instr = MIB.buildInstr(Opc, {DstReg}, {Reg, RegSeq, IdxReg});
6966 } else
6967 Instr = MIB.buildInstr(Opc, {DstReg}, {RegSeq, IdxReg});
6969 I.eraseFromParent();
6970}
6971
6972InstructionSelector::ComplexRendererFns
6973AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
6974 auto MaybeImmed = getImmedFromMO(Root);
6975 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6976 return std::nullopt;
6977 uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
6978 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6979}
6980
6981InstructionSelector::ComplexRendererFns
6982AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
6983 auto MaybeImmed = getImmedFromMO(Root);
6984 if (MaybeImmed == std::nullopt || *MaybeImmed > 31)
6985 return std::nullopt;
6986 uint64_t Enc = 31 - *MaybeImmed;
6987 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6988}
6989
6990InstructionSelector::ComplexRendererFns
6991AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
6992 auto MaybeImmed = getImmedFromMO(Root);
6993 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
6994 return std::nullopt;
6995 uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
6996 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
6997}
6998
6999InstructionSelector::ComplexRendererFns
7000AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
7001 auto MaybeImmed = getImmedFromMO(Root);
7002 if (MaybeImmed == std::nullopt || *MaybeImmed > 63)
7003 return std::nullopt;
7004 uint64_t Enc = 63 - *MaybeImmed;
7005 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
7006}
7007
7008/// Helper to select an immediate value that can be represented as a 12-bit
7009/// value shifted left by either 0 or 12. If it is possible to do so, return
7010/// the immediate and shift value. If not, return std::nullopt.
7011///
7012/// Used by selectArithImmed and selectNegArithImmed.
7013InstructionSelector::ComplexRendererFns
7014AArch64InstructionSelector::select12BitValueWithLeftShift(
7015 uint64_t Immed) const {
7016 unsigned ShiftAmt;
7017 if (Immed >> 12 == 0) {
7018 ShiftAmt = 0;
7019 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
7020 ShiftAmt = 12;
7021 Immed = Immed >> 12;
7022 } else
7023 return std::nullopt;
7024
7025 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
7026 return {{
7027 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
7028 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
7029 }};
7030}
7031
7032/// SelectArithImmed - Select an immediate value that can be represented as
7033/// a 12-bit value shifted left by either 0 or 12. If so, return true with
7034/// Val set to the 12-bit value and Shift set to the shifter operand.
7035InstructionSelector::ComplexRendererFns
7036AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
7037 // This function is called from the addsub_shifted_imm ComplexPattern,
7038 // which lists [imm] as the list of opcode it's interested in, however
7039 // we still need to check whether the operand is actually an immediate
7040 // here because the ComplexPattern opcode list is only used in
7041 // root-level opcode matching.
7042 auto MaybeImmed = getImmedFromMO(Root);
7043 if (MaybeImmed == std::nullopt)
7044 return std::nullopt;
7045 return select12BitValueWithLeftShift(*MaybeImmed);
7046}
7047
7048/// SelectNegArithImmed - As above, but negates the value before trying to
7049/// select it.
7050InstructionSelector::ComplexRendererFns
7051AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
7052 // We need a register here, because we need to know if we have a 64 or 32
7053 // bit immediate.
7054 if (!Root.isReg())
7055 return std::nullopt;
7056 auto MaybeImmed = getImmedFromMO(Root);
7057 if (MaybeImmed == std::nullopt)
7058 return std::nullopt;
7059 uint64_t Immed = *MaybeImmed;
7060
7061 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
7062 // have the opposite effect on the C flag, so this pattern mustn't match under
7063 // those circumstances.
7064 if (Immed == 0)
7065 return std::nullopt;
7066
7067 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
7068 // the root.
7069 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7070 if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
7071 Immed = ~((uint32_t)Immed) + 1;
7072 else
7073 Immed = ~Immed + 1ULL;
7074
7075 if (Immed & 0xFFFFFFFFFF000000ULL)
7076 return std::nullopt;
7077
7078 Immed &= 0xFFFFFFULL;
7079 return select12BitValueWithLeftShift(Immed);
7080}
7081
7082/// Checks if we are sure that folding MI into load/store addressing mode is
7083/// beneficial or not.
7084///
7085/// Returns:
7086/// - true if folding MI would be beneficial.
7087/// - false if folding MI would be bad.
7088/// - std::nullopt if it is not sure whether folding MI is beneficial.
7089///
7090/// \p MI can be the offset operand of G_PTR_ADD, e.g. G_SHL in the example:
7091///
7092/// %13:gpr(s64) = G_CONSTANT i64 1
7093/// %8:gpr(s64) = G_SHL %6, %13(s64)
7094/// %9:gpr(p0) = G_PTR_ADD %0, %8(s64)
7095/// %12:gpr(s32) = G_LOAD %9(p0) :: (load (s16))
7096std::optional<bool> AArch64InstructionSelector::isWorthFoldingIntoAddrMode(
7097 const MachineInstr &MI, const MachineRegisterInfo &MRI) const {
7098 if (MI.getOpcode() == AArch64::G_SHL) {
7099 // Address operands with shifts are free, except for running on subtargets
7100 // with AddrLSLSlow14.
7101 if (const auto ValAndVeg = getIConstantVRegValWithLookThrough(
7102 MI.getOperand(2).getReg(), MRI)) {
7103 const APInt ShiftVal = ValAndVeg->Value;
7104
7105 // Don't fold if we know this will be slow.
7106 return !(STI.hasAddrLSLSlow14() && (ShiftVal == 1 || ShiftVal == 4));
7107 }
7108 }
7109 return std::nullopt;
7110}
7111
7112/// Return true if it is worth folding MI into an extended register. That is,
7113/// if it's safe to pull it into the addressing mode of a load or store as a
7114/// shift.
7115/// \p IsAddrOperand whether the def of MI is used as an address operand
7116/// (e.g. feeding into an LDR/STR).
7117bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
7118 const MachineInstr &MI, const MachineRegisterInfo &MRI,
7119 bool IsAddrOperand) const {
7120
7121 // Always fold if there is one use, or if we're optimizing for size.
7122 Register DefReg = MI.getOperand(0).getReg();
7123 if (MRI.hasOneNonDBGUse(DefReg) ||
7124 MI.getParent()->getParent()->getFunction().hasOptSize())
7125 return true;
7126
7127 if (IsAddrOperand) {
7128 // If we are already sure that folding MI is good or bad, return the result.
7129 if (const auto Worth = isWorthFoldingIntoAddrMode(MI, MRI))
7130 return *Worth;
7131
7132 // Fold G_PTR_ADD if its offset operand can be folded
7133 if (MI.getOpcode() == AArch64::G_PTR_ADD) {
7134 MachineInstr *OffsetInst =
7135 getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
7136
7137 // Note, we already know G_PTR_ADD is used by at least two instructions.
7138 // If we are also sure about whether folding is beneficial or not,
7139 // return the result.
7140 if (const auto Worth = isWorthFoldingIntoAddrMode(*OffsetInst, MRI))
7141 return *Worth;
7142 }
7143 }
7144
7145 // FIXME: Consider checking HasALULSLFast as appropriate.
7146
7147 // We have a fastpath, so folding a shift in and potentially computing it
7148 // many times may be beneficial. Check if this is only used in memory ops.
7149 // If it is, then we should fold.
7150 return all_of(MRI.use_nodbg_instructions(DefReg),
7151 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
7152}
7153
7154InstructionSelector::ComplexRendererFns
7155AArch64InstructionSelector::selectExtendedSHL(
7156 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
7157 unsigned SizeInBytes, bool WantsExt) const {
7158 assert(Base.isReg() && "Expected base to be a register operand");
7159 assert(Offset.isReg() && "Expected offset to be a register operand");
7160
7161 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7162 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
7163
7164 unsigned OffsetOpc = OffsetInst->getOpcode();
7165 bool LookedThroughZExt = false;
7166 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
7167 // Try to look through a ZEXT.
7168 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
7169 return std::nullopt;
7170
7171 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
7172 OffsetOpc = OffsetInst->getOpcode();
7173 LookedThroughZExt = true;
7174
7175 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
7176 return std::nullopt;
7177 }
7178 // Make sure that the memory op is a valid size.
7179 int64_t LegalShiftVal = Log2_32(SizeInBytes);
7180 if (LegalShiftVal == 0)
7181 return std::nullopt;
7182 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7183 return std::nullopt;
7184
7185 // Now, try to find the specific G_CONSTANT. Start by assuming that the
7186 // register we will offset is the LHS, and the register containing the
7187 // constant is the RHS.
7188 Register OffsetReg = OffsetInst->getOperand(1).getReg();
7189 Register ConstantReg = OffsetInst->getOperand(2).getReg();
7190 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7191 if (!ValAndVReg) {
7192 // We didn't get a constant on the RHS. If the opcode is a shift, then
7193 // we're done.
7194 if (OffsetOpc == TargetOpcode::G_SHL)
7195 return std::nullopt;
7196
7197 // If we have a G_MUL, we can use either register. Try looking at the RHS.
7198 std::swap(OffsetReg, ConstantReg);
7199 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
7200 if (!ValAndVReg)
7201 return std::nullopt;
7202 }
7203
7204 // The value must fit into 3 bits, and must be positive. Make sure that is
7205 // true.
7206 int64_t ImmVal = ValAndVReg->Value.getSExtValue();
7207
7208 // Since we're going to pull this into a shift, the constant value must be
7209 // a power of 2. If we got a multiply, then we need to check this.
7210 if (OffsetOpc == TargetOpcode::G_MUL) {
7211 if (!llvm::has_single_bit<uint32_t>(ImmVal))
7212 return std::nullopt;
7213
7214 // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
7215 ImmVal = Log2_32(ImmVal);
7216 }
7217
7218 if ((ImmVal & 0x7) != ImmVal)
7219 return std::nullopt;
7220
7221 // We are only allowed to shift by LegalShiftVal. This shift value is built
7222 // into the instruction, so we can't just use whatever we want.
7223 if (ImmVal != LegalShiftVal)
7224 return std::nullopt;
7225
7226 unsigned SignExtend = 0;
7227 if (WantsExt) {
7228 // Check if the offset is defined by an extend, unless we looked through a
7229 // G_ZEXT earlier.
7230 if (!LookedThroughZExt) {
7231 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
7232 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
7234 return std::nullopt;
7235
7236 SignExtend = AArch64_AM::isSignExtendShiftType(Ext) ? 1 : 0;
7237 // We only support SXTW for signed extension here.
7238 if (SignExtend && Ext != AArch64_AM::SXTW)
7239 return std::nullopt;
7240 OffsetReg = ExtInst->getOperand(1).getReg();
7241 }
7242
7243 // Need a 32-bit wide register here.
7244 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
7245 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
7246 }
7247
7248 // We can use the LHS of the GEP as the base, and the LHS of the shift as an
7249 // offset. Signify that we are shifting by setting the shift flag to 1.
7250 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
7251 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
7252 [=](MachineInstrBuilder &MIB) {
7253 // Need to add both immediates here to make sure that they are both
7254 // added to the instruction.
7255 MIB.addImm(SignExtend);
7256 MIB.addImm(1);
7257 }}};
7258}
7259
7260/// This is used for computing addresses like this:
7261///
7262/// ldr x1, [x2, x3, lsl #3]
7263///
7264/// Where x2 is the base register, and x3 is an offset register. The shift-left
7265/// is a constant value specific to this load instruction. That is, we'll never
7266/// see anything other than a 3 here (which corresponds to the size of the
7267/// element being loaded.)
7268InstructionSelector::ComplexRendererFns
7269AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
7270 MachineOperand &Root, unsigned SizeInBytes) const {
7271 if (!Root.isReg())
7272 return std::nullopt;
7273 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7274
7275 // We want to find something like this:
7276 //
7277 // val = G_CONSTANT LegalShiftVal
7278 // shift = G_SHL off_reg val
7279 // ptr = G_PTR_ADD base_reg shift
7280 // x = G_LOAD ptr
7281 //
7282 // And fold it into this addressing mode:
7283 //
7284 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
7285
7286 // Check if we can find the G_PTR_ADD.
7287 MachineInstr *PtrAdd =
7288 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7289 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7290 return std::nullopt;
7291
7292 // Now, try to match an opcode which will match our specific offset.
7293 // We want a G_SHL or a G_MUL.
7294 MachineInstr *OffsetInst =
7295 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
7296 return selectExtendedSHL(Root, PtrAdd->getOperand(1),
7297 OffsetInst->getOperand(0), SizeInBytes,
7298 /*WantsExt=*/false);
7299}
7300
7301/// This is used for computing addresses like this:
7302///
7303/// ldr x1, [x2, x3]
7304///
7305/// Where x2 is the base register, and x3 is an offset register.
7306///
7307/// When possible (or profitable) to fold a G_PTR_ADD into the address
7308/// calculation, this will do so. Otherwise, it will return std::nullopt.
7309InstructionSelector::ComplexRendererFns
7310AArch64InstructionSelector::selectAddrModeRegisterOffset(
7311 MachineOperand &Root) const {
7312 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7313
7314 // We need a GEP.
7315 MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
7316 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
7317 return std::nullopt;
7318
7319 // If this is used more than once, let's not bother folding.
7320 // TODO: Check if they are memory ops. If they are, then we can still fold
7321 // without having to recompute anything.
7322 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
7323 return std::nullopt;
7324
7325 // Base is the GEP's LHS, offset is its RHS.
7326 return {{[=](MachineInstrBuilder &MIB) {
7327 MIB.addUse(Gep->getOperand(1).getReg());
7328 },
7329 [=](MachineInstrBuilder &MIB) {
7330 MIB.addUse(Gep->getOperand(2).getReg());
7331 },
7332 [=](MachineInstrBuilder &MIB) {
7333 // Need to add both immediates here to make sure that they are both
7334 // added to the instruction.
7335 MIB.addImm(0);
7336 MIB.addImm(0);
7337 }}};
7338}
7339
7340/// This is intended to be equivalent to selectAddrModeXRO in
7341/// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
7342InstructionSelector::ComplexRendererFns
7343AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
7344 unsigned SizeInBytes) const {
7345 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7346 if (!Root.isReg())
7347 return std::nullopt;
7348 MachineInstr *PtrAdd =
7349 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7350 if (!PtrAdd)
7351 return std::nullopt;
7352
7353 // Check for an immediates which cannot be encoded in the [base + imm]
7354 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
7355 // end up with code like:
7356 //
7357 // mov x0, wide
7358 // add x1 base, x0
7359 // ldr x2, [x1, x0]
7360 //
7361 // In this situation, we can use the [base, xreg] addressing mode to save an
7362 // add/sub:
7363 //
7364 // mov x0, wide
7365 // ldr x2, [base, x0]
7366 auto ValAndVReg =
7368 if (ValAndVReg) {
7369 unsigned Scale = Log2_32(SizeInBytes);
7370 int64_t ImmOff = ValAndVReg->Value.getSExtValue();
7371
7372 // Skip immediates that can be selected in the load/store addressing
7373 // mode.
7374 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
7375 ImmOff < (0x1000 << Scale))
7376 return std::nullopt;
7377
7378 // Helper lambda to decide whether or not it is preferable to emit an add.
7379 auto isPreferredADD = [](int64_t ImmOff) {
7380 // Constants in [0x0, 0xfff] can be encoded in an add.
7381 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
7382 return true;
7383
7384 // Can it be encoded in an add lsl #12?
7385 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
7386 return false;
7387
7388 // It can be encoded in an add lsl #12, but we may not want to. If it is
7389 // possible to select this as a single movz, then prefer that. A single
7390 // movz is faster than an add with a shift.
7391 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
7392 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
7393 };
7394
7395 // If the immediate can be encoded in a single add/sub, then bail out.
7396 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
7397 return std::nullopt;
7398 }
7399
7400 // Try to fold shifts into the addressing mode.
7401 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
7402 if (AddrModeFns)
7403 return AddrModeFns;
7404
7405 // If that doesn't work, see if it's possible to fold in registers from
7406 // a GEP.
7407 return selectAddrModeRegisterOffset(Root);
7408}
7409
7410/// This is used for computing addresses like this:
7411///
7412/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
7413///
7414/// Where we have a 64-bit base register, a 32-bit offset register, and an
7415/// extend (which may or may not be signed).
7416InstructionSelector::ComplexRendererFns
7417AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
7418 unsigned SizeInBytes) const {
7419 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
7420
7421 MachineInstr *PtrAdd =
7422 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
7423 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI, true))
7424 return std::nullopt;
7425
7426 MachineOperand &LHS = PtrAdd->getOperand(1);
7427 MachineOperand &RHS = PtrAdd->getOperand(2);
7428 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
7429
7430 // The first case is the same as selectAddrModeXRO, except we need an extend.
7431 // In this case, we try to find a shift and extend, and fold them into the
7432 // addressing mode.
7433 //
7434 // E.g.
7435 //
7436 // off_reg = G_Z/S/ANYEXT ext_reg
7437 // val = G_CONSTANT LegalShiftVal
7438 // shift = G_SHL off_reg val
7439 // ptr = G_PTR_ADD base_reg shift
7440 // x = G_LOAD ptr
7441 //
7442 // In this case we can get a load like this:
7443 //
7444 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
7445 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
7446 SizeInBytes, /*WantsExt=*/true);
7447 if (ExtendedShl)
7448 return ExtendedShl;
7449
7450 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
7451 //
7452 // e.g.
7453 // ldr something, [base_reg, ext_reg, sxtw]
7454 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI, true))
7455 return std::nullopt;
7456
7457 // Check if this is an extend. We'll get an extend type if it is.
7459 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
7461 return std::nullopt;
7462
7463 // Need a 32-bit wide register.
7464 MachineIRBuilder MIB(*PtrAdd);
7465 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
7466 AArch64::GPR32RegClass, MIB);
7467 unsigned SignExtend = Ext == AArch64_AM::SXTW;
7468
7469 // Base is LHS, offset is ExtReg.
7470 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
7471 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7472 [=](MachineInstrBuilder &MIB) {
7473 MIB.addImm(SignExtend);
7474 MIB.addImm(0);
7475 }}};
7476}
7477
7478/// Select a "register plus unscaled signed 9-bit immediate" address. This
7479/// should only match when there is an offset that is not valid for a scaled
7480/// immediate addressing mode. The "Size" argument is the size in bytes of the
7481/// memory reference, which is needed here to know what is valid for a scaled
7482/// immediate.
7483InstructionSelector::ComplexRendererFns
7484AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
7485 unsigned Size) const {
7486 MachineRegisterInfo &MRI =
7487 Root.getParent()->getParent()->getParent()->getRegInfo();
7488
7489 if (!Root.isReg())
7490 return std::nullopt;
7491
7492 if (!isBaseWithConstantOffset(Root, MRI))
7493 return std::nullopt;
7494
7495 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7496
7497 MachineOperand &OffImm = RootDef->getOperand(2);
7498 if (!OffImm.isReg())
7499 return std::nullopt;
7500 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
7501 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT)
7502 return std::nullopt;
7503 int64_t RHSC;
7504 MachineOperand &RHSOp1 = RHS->getOperand(1);
7505 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
7506 return std::nullopt;
7507 RHSC = RHSOp1.getCImm()->getSExtValue();
7508
7509 if (RHSC >= -256 && RHSC < 256) {
7510 MachineOperand &Base = RootDef->getOperand(1);
7511 return {{
7512 [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
7513 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
7514 }};
7515 }
7516 return std::nullopt;
7517}
7518
7519InstructionSelector::ComplexRendererFns
7520AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
7521 unsigned Size,
7522 MachineRegisterInfo &MRI) const {
7523 if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
7524 return std::nullopt;
7525 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
7526 if (Adrp.getOpcode() != AArch64::ADRP)
7527 return std::nullopt;
7528
7529 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
7530 auto Offset = Adrp.getOperand(1).getOffset();
7531 if (Offset % Size != 0)
7532 return std::nullopt;
7533
7534 auto GV = Adrp.getOperand(1).getGlobal();
7535 if (GV->isThreadLocal())
7536 return std::nullopt;
7537
7538 auto &MF = *RootDef.getParent()->getParent();
7539 if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
7540 return std::nullopt;
7541
7542 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
7543 MachineIRBuilder MIRBuilder(RootDef);
7544 Register AdrpReg = Adrp.getOperand(0).getReg();
7545 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
7546 [=](MachineInstrBuilder &MIB) {
7547 MIB.addGlobalAddress(GV, Offset,
7548 OpFlags | AArch64II::MO_PAGEOFF |
7550 }}};
7551}
7552
7553/// Select a "register plus scaled unsigned 12-bit immediate" address. The
7554/// "Size" argument is the size in bytes of the memory reference, which
7555/// determines the scale.
7556InstructionSelector::ComplexRendererFns
7557AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
7558 unsigned Size) const {
7559 MachineFunction &MF = *Root.getParent()->getParent()->getParent();
7560 MachineRegisterInfo &MRI = MF.getRegInfo();
7561
7562 if (!Root.isReg())
7563 return std::nullopt;
7564
7565 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
7566 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
7567 return {{
7568 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
7569 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7570 }};
7571 }
7572
7574 // Check if we can fold in the ADD of small code model ADRP + ADD address.
7575 // HACK: ld64 on Darwin doesn't support relocations on PRFM, so we can't fold
7576 // globals into the offset.
7577 MachineInstr *RootParent = Root.getParent();
7578 if (CM == CodeModel::Small &&
7579 !(RootParent->getOpcode() == AArch64::G_AARCH64_PREFETCH &&
7580 STI.isTargetDarwin())) {
7581 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
7582 if (OpFns)
7583 return OpFns;
7584 }
7585
7586 if (isBaseWithConstantOffset(Root, MRI)) {
7587 MachineOperand &LHS = RootDef->getOperand(1);
7588 MachineOperand &RHS = RootDef->getOperand(2);
7589 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
7590 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
7591
7592 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
7593 unsigned Scale = Log2_32(Size);
7594 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
7595 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
7596 return {{
7597 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
7598 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7599 }};
7600
7601 return {{
7602 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
7603 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
7604 }};
7605 }
7606 }
7607
7608 // Before falling back to our general case, check if the unscaled
7609 // instructions can handle this. If so, that's preferable.
7610 if (selectAddrModeUnscaled(Root, Size))
7611 return std::nullopt;
7612
7613 return {{
7614 [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
7615 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
7616 }};
7617}
7618
7619/// Given a shift instruction, return the correct shift type for that
7620/// instruction.
7622 switch (MI.getOpcode()) {
7623 default:
7625 case TargetOpcode::G_SHL:
7626 return AArch64_AM::LSL;
7627 case TargetOpcode::G_LSHR:
7628 return AArch64_AM::LSR;
7629 case TargetOpcode::G_ASHR:
7630 return AArch64_AM::ASR;
7631 case TargetOpcode::G_ROTR:
7632 return AArch64_AM::ROR;
7633 }
7634}
7635
7636/// Select a "shifted register" operand. If the value is not shifted, set the
7637/// shift operand to a default value of "lsl 0".
7638InstructionSelector::ComplexRendererFns
7639AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
7640 bool AllowROR) const {
7641 if (!Root.isReg())
7642 return std::nullopt;
7643 MachineRegisterInfo &MRI =
7644 Root.getParent()->getParent()->getParent()->getRegInfo();
7645
7646 // Check if the operand is defined by an instruction which corresponds to
7647 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
7648 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
7650 if (ShType == AArch64_AM::InvalidShiftExtend)
7651 return std::nullopt;
7652 if (ShType == AArch64_AM::ROR && !AllowROR)
7653 return std::nullopt;
7654 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI, false))
7655 return std::nullopt;
7656
7657 // Need an immediate on the RHS.
7658 MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
7659 auto Immed = getImmedFromMO(ShiftRHS);
7660 if (!Immed)
7661 return std::nullopt;
7662
7663 // We have something that we can fold. Fold in the shift's LHS and RHS into
7664 // the instruction.
7665 MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
7666 Register ShiftReg = ShiftLHS.getReg();
7667
7668 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
7669 unsigned Val = *Immed & (NumBits - 1);
7670 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
7671
7672 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
7673 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
7674}
7675
7676AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
7677 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
7678 unsigned Opc = MI.getOpcode();
7679
7680 // Handle explicit extend instructions first.
7681 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
7682 unsigned Size;
7683 if (Opc == TargetOpcode::G_SEXT)
7684 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7685 else
7686 Size = MI.getOperand(2).getImm();
7687 assert(Size != 64 && "Extend from 64 bits?");
7688 switch (Size) {
7689 case 8:
7690 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
7691 case 16:
7692 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
7693 case 32:
7694 return AArch64_AM::SXTW;
7695 default:
7697 }
7698 }
7699
7700 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
7701 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7702 assert(Size != 64 && "Extend from 64 bits?");
7703 switch (Size) {
7704 case 8:
7705 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
7706 case 16:
7707 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
7708 case 32:
7709 return AArch64_AM::UXTW;
7710 default:
7712 }
7713 }
7714
7715 // Don't have an explicit extend. Try to handle a G_AND with a constant mask
7716 // on the RHS.
7717 if (Opc != TargetOpcode::G_AND)
7719
7720 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
7721 if (!MaybeAndMask)
7723 uint64_t AndMask = *MaybeAndMask;
7724 switch (AndMask) {
7725 default:
7727 case 0xFF:
7728 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
7729 case 0xFFFF:
7730 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
7731 case 0xFFFFFFFF:
7732 return AArch64_AM::UXTW;
7733 }
7734}
7735
7736Register AArch64InstructionSelector::moveScalarRegClass(
7737 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
7738 MachineRegisterInfo &MRI = *MIB.getMRI();
7739 auto Ty = MRI.getType(Reg);
7740 assert(!Ty.isVector() && "Expected scalars only!");
7741 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
7742 return Reg;
7743
7744 // Create a copy and immediately select it.
7745 // FIXME: We should have an emitCopy function?
7746 auto Copy = MIB.buildCopy({&RC}, {Reg});
7747 selectCopy(*Copy, TII, MRI, TRI, RBI);
7748 return Copy.getReg(0);
7749}
7750
7751/// Select an "extended register" operand. This operand folds in an extend
7752/// followed by an optional left shift.
7753InstructionSelector::ComplexRendererFns
7754AArch64InstructionSelector::selectArithExtendedRegister(
7755 MachineOperand &Root) const {
7756 if (!Root.isReg())
7757 return std::nullopt;
7758 MachineRegisterInfo &MRI =
7759 Root.getParent()->getParent()->getParent()->getRegInfo();
7760
7761 uint64_t ShiftVal = 0;
7762 Register ExtReg;
7764 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
7765 if (!RootDef)
7766 return std::nullopt;
7767
7768 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI, false))
7769 return std::nullopt;
7770
7771 // Check if we can fold a shift and an extend.
7772 if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
7773 // Look for a constant on the RHS of the shift.
7774 MachineOperand &RHS = RootDef->getOperand(2);
7775 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
7776 if (!MaybeShiftVal)
7777 return std::nullopt;
7778 ShiftVal = *MaybeShiftVal;
7779 if (ShiftVal > 4)
7780 return std::nullopt;
7781 // Look for a valid extend instruction on the LHS of the shift.
7782 MachineOperand &LHS = RootDef->getOperand(1);
7783 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
7784 if (!ExtDef)
7785 return std::nullopt;
7786 Ext = getExtendTypeForInst(*ExtDef, MRI);
7788 return std::nullopt;
7789 ExtReg = ExtDef->getOperand(1).getReg();
7790 } else {
7791 // Didn't get a shift. Try just folding an extend.
7792 Ext = getExtendTypeForInst(*RootDef, MRI);
7794 return std::nullopt;
7795 ExtReg = RootDef->getOperand(1).getReg();
7796
7797 // If we have a 32 bit instruction which zeroes out the high half of a
7798 // register, we get an implicit zero extend for free. Check if we have one.
7799 // FIXME: We actually emit the extend right now even though we don't have
7800 // to.
7801 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
7802 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
7803 if (isDef32(*ExtInst))
7804 return std::nullopt;
7805 }
7806 }
7807
7808 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
7809 // copy.
7810 MachineIRBuilder MIB(*RootDef);
7811 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
7812
7813 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
7814 [=](MachineInstrBuilder &MIB) {
7815 MIB.addImm(getArithExtendImm(Ext, ShiftVal));
7816 }}};
7817}
7818
7819InstructionSelector::ComplexRendererFns
7820AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
7821 if (!Root.isReg())
7822 return std::nullopt;
7823 MachineRegisterInfo &MRI =
7824 Root.getParent()->getParent()->getParent()->getRegInfo();
7825
7826 auto Extract = getDefSrcRegIgnoringCopies(Root.getReg(), MRI);
7827 while (Extract && Extract->MI->getOpcode() == TargetOpcode::G_BITCAST &&
7828 STI.isLittleEndian())
7829 Extract =
7830 getDefSrcRegIgnoringCopies(Extract->MI->getOperand(1).getReg(), MRI);
7831 if (!Extract)
7832 return std::nullopt;
7833
7834 if (Extract->MI->getOpcode() == TargetOpcode::G_UNMERGE_VALUES) {
7835 if (Extract->Reg == Extract->MI->getOperand(1).getReg()) {
7836 Register ExtReg = Extract->MI->getOperand(2).getReg();
7837 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7838 }
7839 }
7840 if (Extract->MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) {
7841 LLT SrcTy = MRI.getType(Extract->MI->getOperand(1).getReg());
7843 Extract->MI->getOperand(2).getReg(), MRI);
7844 if (LaneIdx && SrcTy == LLT::fixed_vector(2, 64) &&
7845 LaneIdx->Value.getSExtValue() == 1) {
7846 Register ExtReg = Extract->MI->getOperand(1).getReg();
7847 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
7848 }
7849 }
7850
7851 return std::nullopt;
7852}
7853
7854InstructionSelector::ComplexRendererFns
7855AArch64InstructionSelector::selectCVTFixedPointVecBase(
7856 const MachineOperand &Root) const {
7857 if (!Root.isReg())
7858 return std::nullopt;
7859 const MachineRegisterInfo &MRI =
7860 Root.getParent()->getParent()->getParent()->getRegInfo();
7861
7862 MachineInstr *Dup = getDefIgnoringCopies(Root.getReg(), MRI);
7863 if (Dup->getOpcode() != AArch64::G_DUP)
7864 return std::nullopt;
7865 std::optional<ValueAndVReg> CstVal =
7867 if (!CstVal)
7868 return std::nullopt;
7869
7870 unsigned RegWidth = MRI.getType(Root.getReg()).getScalarSizeInBits();
7871 APFloat FVal(0.0);
7872 switch (RegWidth) {
7873 case 16:
7874 FVal = APFloat(APFloat::IEEEhalf(), CstVal->Value);
7875 break;
7876 case 32:
7877 FVal = APFloat(APFloat::IEEEsingle(), CstVal->Value);
7878 break;
7879 case 64:
7880 FVal = APFloat(APFloat::IEEEdouble(), CstVal->Value);
7881 break;
7882 default:
7883 return std::nullopt;
7884 };
7885 if (unsigned FBits = CheckFixedPointOperandConstant(FVal, RegWidth,
7886 /*isReciprocal*/ false))
7887 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(FBits); }}};
7888
7889 return std::nullopt;
7890}
7891
7892InstructionSelector::ComplexRendererFns
7893AArch64InstructionSelector::selectCVTFixedPointVec(MachineOperand &Root) const {
7894 return selectCVTFixedPointVecBase(Root);
7895}
7896
7897void AArch64InstructionSelector::renderFixedPointXForm(MachineInstrBuilder &MIB,
7898 const MachineInstr &MI,
7899 int OpIdx) const {
7900 // FIXME: This is only needed to satisfy the type checking in tablegen, and
7901 // should be able to reuse the Renderers already calculated by
7902 // selectCVTFixedPointVecBase.
7903 InstructionSelector::ComplexRendererFns Renderer =
7904 selectCVTFixedPointVecBase(MI.getOperand(2));
7905 assert((Renderer && Renderer->size() == 1) &&
7906 "Expected selectCVTFixedPointVec to provide a function\n");
7907 (Renderer->front())(MIB);
7908}
7909
7910void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
7911 const MachineInstr &MI,
7912 int OpIdx) const {
7913 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7914 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7915 "Expected G_CONSTANT");
7916 std::optional<int64_t> CstVal =
7917 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
7918 assert(CstVal && "Expected constant value");
7919 MIB.addImm(*CstVal);
7920}
7921
7922void AArch64InstructionSelector::renderLogicalImm32(
7923 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7924 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7925 "Expected G_CONSTANT");
7926 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7927 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
7928 MIB.addImm(Enc);
7929}
7930
7931void AArch64InstructionSelector::renderLogicalImm64(
7932 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
7933 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
7934 "Expected G_CONSTANT");
7935 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
7936 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
7937 MIB.addImm(Enc);
7938}
7939
7940void AArch64InstructionSelector::renderUbsanTrap(MachineInstrBuilder &MIB,
7941 const MachineInstr &MI,
7942 int OpIdx) const {
7943 assert(MI.getOpcode() == TargetOpcode::G_UBSANTRAP && OpIdx == 0 &&
7944 "Expected G_UBSANTRAP");
7945 MIB.addImm(MI.getOperand(0).getImm() | ('U' << 8));
7946}
7947
7948void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
7949 const MachineInstr &MI,
7950 int OpIdx) const {
7951 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7952 "Expected G_FCONSTANT");
7953 MIB.addImm(
7954 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7955}
7956
7957void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
7958 const MachineInstr &MI,
7959 int OpIdx) const {
7960 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7961 "Expected G_FCONSTANT");
7962 MIB.addImm(
7963 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7964}
7965
7966void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
7967 const MachineInstr &MI,
7968 int OpIdx) const {
7969 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7970 "Expected G_FCONSTANT");
7971 MIB.addImm(
7972 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
7973}
7974
7975void AArch64InstructionSelector::renderFPImm32SIMDModImmType4(
7976 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const {
7977 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
7978 "Expected G_FCONSTANT");
7980 .getFPImm()
7981 ->getValueAPF()
7982 .bitcastToAPInt()
7983 .getZExtValue()));
7984}
7985
7986bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
7987 const MachineInstr &MI, unsigned NumBytes) const {
7988 if (!MI.mayLoadOrStore())
7989 return false;
7990 assert(MI.hasOneMemOperand() &&
7991 "Expected load/store to have only one mem op!");
7992 return (*MI.memoperands_begin())->getSize() == NumBytes;
7993}
7994
7995bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
7996 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
7997 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
7998 return false;
7999
8000 // Only return true if we know the operation will zero-out the high half of
8001 // the 64-bit register. Truncates can be subregister copies, which don't
8002 // zero out the high bits. Copies and other copy-like instructions can be
8003 // fed by truncates, or could be lowered as subregister copies.
8004 switch (MI.getOpcode()) {
8005 default:
8006 return true;
8007 case TargetOpcode::COPY:
8008 case TargetOpcode::G_BITCAST:
8009 case TargetOpcode::G_TRUNC:
8010 case TargetOpcode::G_PHI:
8011 return false;
8012 }
8013}
8014
8015
8016// Perform fixups on the given PHI instruction's operands to force them all
8017// to be the same as the destination regbank.
8019 const AArch64RegisterBankInfo &RBI) {
8020 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
8021 Register DstReg = MI.getOperand(0).getReg();
8022 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
8023 assert(DstRB && "Expected PHI dst to have regbank assigned");
8024 MachineIRBuilder MIB(MI);
8025
8026 // Go through each operand and ensure it has the same regbank.
8027 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) {
8028 if (!MO.isReg())
8029 continue;
8030 Register OpReg = MO.getReg();
8031 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
8032 if (RB != DstRB) {
8033 // Insert a cross-bank copy.
8034 auto *OpDef = MRI.getVRegDef(OpReg);
8035 const LLT &Ty = MRI.getType(OpReg);
8036 MachineBasicBlock &OpDefBB = *OpDef->getParent();
8037
8038 // Any instruction we insert must appear after all PHIs in the block
8039 // for the block to be valid MIR.
8040 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
8041 if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
8042 InsertPt = OpDefBB.getFirstNonPHI();
8043 MIB.setInsertPt(*OpDef->getParent(), InsertPt);
8044 auto Copy = MIB.buildCopy(Ty, OpReg);
8045 MRI.setRegBank(Copy.getReg(0), *DstRB);
8046 MO.setReg(Copy.getReg(0));
8047 }
8048 }
8049}
8050
8051void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
8052 // We're looking for PHIs, build a list so we don't invalidate iterators.
8053 MachineRegisterInfo &MRI = MF.getRegInfo();
8055 for (auto &BB : MF) {
8056 for (auto &MI : BB) {
8057 if (MI.getOpcode() == TargetOpcode::G_PHI)
8058 Phis.emplace_back(&MI);
8059 }
8060 }
8061
8062 for (auto *MI : Phis) {
8063 // We need to do some work here if the operand types are < 16 bit and they
8064 // are split across fpr/gpr banks. Since all types <32b on gpr
8065 // end up being assigned gpr32 regclasses, we can end up with PHIs here
8066 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
8067 // be selecting heterogenous regbanks for operands if possible, but we
8068 // still need to be able to deal with it here.
8069 //
8070 // To fix this, if we have a gpr-bank operand < 32b in size and at least
8071 // one other operand is on the fpr bank, then we add cross-bank copies
8072 // to homogenize the operand banks. For simplicity the bank that we choose
8073 // to settle on is whatever bank the def operand has. For example:
8074 //
8075 // %endbb:
8076 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
8077 // =>
8078 // %bb2:
8079 // ...
8080 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
8081 // ...
8082 // %endbb:
8083 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
8084 bool HasGPROp = false, HasFPROp = false;
8085 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
8086 if (!MO.isReg())
8087 continue;
8088 const LLT &Ty = MRI.getType(MO.getReg());
8089 if (!Ty.isValid() || !Ty.isScalar())
8090 break;
8091 if (Ty.getSizeInBits() >= 32)
8092 break;
8093 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
8094 // If for some reason we don't have a regbank yet. Don't try anything.
8095 if (!RB)
8096 break;
8097
8098 if (RB->getID() == AArch64::GPRRegBankID)
8099 HasGPROp = true;
8100 else
8101 HasFPROp = true;
8102 }
8103 // We have heterogenous regbanks, need to fixup.
8104 if (HasGPROp && HasFPROp)
8105 fixupPHIOpBanks(*MI, MRI, RBI);
8106 }
8107}
8108
8109namespace llvm {
8110InstructionSelector *
8112 const AArch64Subtarget &Subtarget,
8113 const AArch64RegisterBankInfo &RBI) {
8114 return new AArch64InstructionSelector(TM, Subtarget, RBI);
8115}
8116}
#define Success
MachineInstrBuilder MachineInstrBuilder & DefMI
static std::tuple< SDValue, SDValue > extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG)
static bool isPreferredADD(int64_t ImmOff)
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue CCOp, AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, const SDLoc &DL, SelectionDAG &DAG)
can be transformed to: not (and (not (and (setCC (cmp C)) (setCD (cmp D)))) (and (not (setCA (cmp A))...
static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG)
static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp, AArch64CC::CondCode Predicate)
Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain of CCMP/CFCMP ops.
static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits)
static void changeFPCCToANDAArch64CC(ISD::CondCode CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Convert a DAG fp condition code to an AArch64 CC.
static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val, bool &CanNegate, bool &MustBeFirst, bool &PreferFirst, bool WillNegate, unsigned Depth=0)
Returns true if Val is a tree of AND/OR/SETCC operations that can be expressed as a conjunction.
static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG, const APInt &Bits, const SDValue *LHS=nullptr)
static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val, AArch64CC::CondCode &OutCC)
Emit expression as a conjunction (a series of CCMP/CFCMP ops).
#define GET_GLOBALISEL_PREDICATES_INIT
static std::pair< const TargetRegisterClass *, const TargetRegisterClass * > getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Helper function to get the source and destination register classes for a copy.
#define GET_GLOBALISEL_TEMPORARIES_INIT
static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, MachineRegisterInfo &MRI)
Return a register which can be used as a bit to test in a TB(N)Z.
static unsigned getMinSizeForRegBank(const RegisterBank &RB)
Returns the minimum size the given register bank can hold.
static std::optional< int64_t > getVectorShiftImm(Register Reg, MachineRegisterInfo &MRI)
Returns the element immediate value of a vector shift operand if found.
static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the G_LOAD or G_STORE operation GenericOpc, appropriate for the (value)...
static const TargetRegisterClass * getMinClassForRegBank(const RegisterBank &RB, TypeSize SizeInBits, bool GetAllRegSet=false)
Given a register bank, and size in bits, return the smallest register class that can represent that c...
static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, unsigned OpSize)
Select the AArch64 opcode for the basic binary operation GenericOpc (such as G_OR or G_SDIV),...
static bool getSubRegForClass(const TargetRegisterClass *RC, const TargetRegisterInfo &TRI, unsigned &SubReg)
Returns the correct subregister to use for a given register class.
static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register SrcReg, const TargetRegisterClass *To, unsigned SubReg)
Helper function for selectCopy.
static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P, Register RHS={}, MachineRegisterInfo *MRI=nullptr)
static Register createDTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of D-registers using the registers in Regs.
static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, const AArch64RegisterBankInfo &RBI)
static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, const RegisterBankInfo &RBI)
static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI)
Given a shift instruction, return the correct shift type for that instruction.
static bool unsupportedBinOp(const MachineInstr &I, const AArch64RegisterBankInfo &RBI, const MachineRegisterInfo &MRI, const AArch64RegisterInfo &TRI)
Check whether I is a currently unsupported binary operation:
static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, const unsigned EltSize)
static Register createQTuple(ArrayRef< Register > Regs, MachineIRBuilder &MIB)
Create a tuple of Q-registers using the registers in Regs.
static std::optional< uint64_t > getImmedFromMO(const MachineOperand &Root)
static std::pair< unsigned, unsigned > getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize)
Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given size and RB.
static Register createTuple(ArrayRef< Register > Regs, const unsigned RegClassIDs[], const unsigned SubRegs[], MachineIRBuilder &MIB)
Create a REG_SEQUENCE instruction using the registers in Regs.
static std::optional< int64_t > getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI)
Matches and returns the shift immediate value for a SHL instruction given a shift operand.
static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC.
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file declares the targeting of the RegisterBankInfo class for AArch64.
constexpr LLT S16
constexpr LLT S32
constexpr LLT S64
constexpr LLT S8
static bool isStore(int Opcode)
static bool selectMergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
static bool selectUnmergeValues(MachineInstrBuilder &MIB, const ARMBaseInstrInfo &TII, MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
#define P(N)
if(PassOpts->AAPipeline)
static StringRef getName(Value *V)
#define LLVM_DEBUG(...)
Definition Debug.h:114
static constexpr int Concat[]
Value * RHS
Value * LHS
This class provides the information for the target register banks.
std::optional< uint16_t > getPtrAuthBlockAddressDiscriminatorIfEnabled(const Function &ParentFn) const
Compute the integer discriminator for a given BlockAddress constant, if blockaddress signing is enabl...
const AArch64TargetLowering * getTargetLowering() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
bool isX16X17Safer() const
Returns whether the operating system makes it safer to store sensitive values in x16 and x17 as oppos...
bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
bool isIntPredicate() const
Definition InstrTypes.h:783
bool isUnsigned() const
Definition InstrTypes.h:936
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
const APFloat & getValueAPF() const
Definition Constants.h:325
bool isNegative() const
Return true if the sign bit is set.
Definition Constants.h:332
bool isZero() const
Return true if the value is positive or negative zero.
Definition Constants.h:329
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
LLVM_ABI bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constants.cpp:74
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition DataLayout.h:568
LLVM_ABI Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
virtual void setupMF(MachineFunction &mf, GISelValueTracking *vt, CodeGenCoverage *covinfo=nullptr, ProfileSummaryInfo *psi=nullptr, BlockFrequencyInfo *bfi=nullptr)
Setup per-MF executor state.
Represents indexed stores.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
Represents a G_SELECT.
Register getCondReg() const
Register getFalseReg() const
Register getTrueReg() const
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
bool hasExternalWeakLinkage() const
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
constexpr LLT multiplyElements(int Factor) const
Produce a vector type that is Factor times bigger, preserving the element type.
constexpr bool isPointerVector() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
TypeSize getValue() const
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineIRBuilderState & getState()
Getter for the State.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
void setState(const MachineIRBuilderState &NewState)
Setter for the State.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
Register getReg(unsigned Idx) const
Get the register for the operand index.
void constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addBlockAddress(const BlockAddress *BA, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI void addMemOperand(MachineFunction &MF, MachineMemOperand *MO)
Add a MachineMemOperand to the machine instruction.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
AtomicOrdering getSuccessOrdering() const
Return the atomic ordering requirements for this memory operation.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
const ConstantInt * getCImm() const
bool isCImm() const
isCImm - Test if this is a MO_CImmediate operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreatePredicate(unsigned Pred)
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
static MachineOperand CreateBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
const ConstantFP * getFPImm() const
unsigned getPredicate() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
const RegClassOrRegBank & getRegClassOrRegBank(Register Reg) const
Return the register bank or register class of Reg.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
def_instr_iterator def_instr_begin(Register RegNo) const
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
const RegisterBank * getRegBankOrNull(Register Reg) const
Return the register bank of Reg, or null if Reg has not been assigned a register bank or has been ass...
LLVM_ABI void setRegBank(Register Reg, const RegisterBank &RegBank)
Set the register bank to RegBank for Reg.
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI void setType(Register VReg, LLT Ty)
Set the low-level type of VReg to Ty.
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
LLVM_ABI void setRegClass(Register Reg, const TargetRegisterClass *RC)
setRegClass - Set the register class of the specified virtual register.
LLVM_ABI Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
Analysis providing profile information.
Holds all the information related to register banks.
static const TargetRegisterClass * constrainGenericRegister(Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI)
Constrain the (possibly generic) virtual register Reg to RC.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
TypeSize getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
This class implements the register bank concept.
unsigned getID() const
Get the identifier of this register bank.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
TargetInstrInfo - Interface to description of machine instruction set.
bool isPositionIndependent() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:967
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
void changeFCMPPredToAArch64CC(const CmpInst::Predicate P, AArch64CC::CondCode &CondCode, AArch64CC::CondCode &CondCode2)
Find the AArch64 condition codes necessary to represent P for a scalar floating point comparison.
std::optional< int64_t > getAArch64VectorSplatScalar(const MachineInstr &MI, const MachineRegisterInfo &MRI)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint8_t encodeAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType9(uint64_t Imm)
static bool isAdvSIMDModImmType4(uint64_t Imm)
static bool isAdvSIMDModImmType5(uint64_t Imm)
static int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType10(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType9(uint64_t Imm)
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static bool isAdvSIMDModImmType7(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType5(uint64_t Imm)
static int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
static bool isAdvSIMDModImmType10(uint64_t Imm)
static int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
static uint8_t encodeAdvSIMDModImmType8(uint64_t Imm)
static bool isAdvSIMDModImmType12(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType11(uint64_t Imm)
static bool isAdvSIMDModImmType11(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType6(uint64_t Imm)
static bool isAdvSIMDModImmType8(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType4(uint64_t Imm)
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
static bool isAdvSIMDModImmType6(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType1(uint64_t Imm)
static uint8_t encodeAdvSIMDModImmType3(uint64_t Imm)
static bool isAdvSIMDModImmType2(uint64_t Imm)
static bool isAdvSIMDModImmType3(uint64_t Imm)
static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type)
isSignExtendShiftType - Returns true if Type is sign extending.
static bool isAdvSIMDModImmType1(uint64_t Imm)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
constexpr double e
NodeAddr< InstrNode * > Instr
Definition RDFGraph.h:389
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI Register getFunctionLiveInPhysReg(MachineFunction &MF, const TargetInstrInfo &TII, MCRegister PhysReg, const TargetRegisterClass &RC, const DebugLoc &DL, LLT RegTy=LLT())
Return a virtual register corresponding to the incoming argument register PhysReg.
Definition Utils.cpp:917
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI Register constrainOperandRegClass(const MachineFunction &MF, const TargetRegisterInfo &TRI, MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, MachineOperand &RegMO)
Constrain the Register operand OpIdx, so that it is now constrained to the TargetRegisterClass passed...
Definition Utils.cpp:56
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
PointerUnion< const TargetRegisterClass *, const RegisterBank * > RegClassOrRegBank
Convenient type to represent either a register class or a register bank.
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:461
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
unsigned CheckFixedPointOperandConstant(APFloat &FVal, unsigned RegWidth, bool isReciprocal)
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool isStrongerThanMonotonic(AtomicOrdering AO)
LLVM_ABI void constrainSelectedInstRegOperands(MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI)
Mutate the newly-selected instruction I to constrain its (possibly generic) virtual register operands...
Definition Utils.cpp:155
bool isPreISelGenericOpcode(unsigned Opcode)
Check whether the given Opcode is a generic opcode that is not supposed to appear after ISel.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI std::optional< int64_t > getIConstantVRegSExtVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT fits in int64_t returns it.
Definition Utils.cpp:314
constexpr bool isShiftedMask_64(uint64_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (64 bit ver...
Definition MathExtras.h:273
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, const AArch64Subtarget &, const AArch64RegisterBankInfo &)
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
Definition STLExtras.h:2026
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI std::optional< ValueAndVReg > getAnyConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true, bool LookThroughAnyExt=false)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT or G_FCONST...
Definition Utils.cpp:439
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:469
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:501
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.