LLVM 23.0.0git
AArch64PreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64.h"
34#include <memory>
35
36#define GET_GICOMBINER_DEPS
37#include "AArch64GenPreLegalizeGICombiner.inc"
38#undef GET_GICOMBINER_DEPS
39
40#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
41
42using namespace llvm;
43using namespace MIPatternMatch;
44
45#define GET_GICOMBINER_TYPES
46#include "AArch64GenPreLegalizeGICombiner.inc"
47#undef GET_GICOMBINER_TYPES
48
49namespace {
50
51/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
52/// are sign bits. In this case, we can transform the G_ICMP to directly compare
53/// the wide value with a zero.
54bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
55 GISelValueTracking *VT, Register &MatchInfo) {
56 assert(MI.getOpcode() == TargetOpcode::G_ICMP && VT);
57
58 auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
59 if (!ICmpInst::isEquality(Pred))
60 return false;
61
62 Register LHS = MI.getOperand(2).getReg();
63 LLT LHSTy = MRI.getType(LHS);
64 if (!LHSTy.isScalar())
65 return false;
66
67 Register RHS = MI.getOperand(3).getReg();
68 Register WideReg;
69
70 if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
71 !mi_match(RHS, MRI, m_SpecificICst(0)))
72 return false;
73
74 LLT WideTy = MRI.getType(WideReg);
75 if (VT->computeNumSignBits(WideReg) <=
76 WideTy.getSizeInBits() - LHSTy.getSizeInBits())
77 return false;
78
79 MatchInfo = WideReg;
80 return true;
81}
82
83void applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
84 MachineIRBuilder &Builder,
85 GISelChangeObserver &Observer, Register &WideReg) {
86 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
87
88 LLT WideTy = MRI.getType(WideReg);
89 // We're going to directly use the wide register as the LHS, and then use an
90 // equivalent size zero for RHS.
91 Builder.setInstrAndDebugLoc(MI);
92 auto WideZero = Builder.buildConstant(WideTy, 0);
93 Observer.changingInstr(MI);
94 MI.getOperand(2).setReg(WideReg);
95 MI.getOperand(3).setReg(WideZero.getReg(0));
96 Observer.changedInstr(MI);
97}
98
99/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
100///
101/// e.g.
102///
103/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
104bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
105 std::pair<uint64_t, uint64_t> &MatchInfo) {
106 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
107 MachineFunction &MF = *MI.getMF();
108 auto &GlobalOp = MI.getOperand(1);
109 auto *GV = GlobalOp.getGlobal();
110 if (GV->isThreadLocal())
111 return false;
112
113 // Don't allow anything that could represent offsets etc.
115 GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
116 return false;
117
118 // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
119 //
120 // %g = G_GLOBAL_VALUE @x
121 // %ptr1 = G_PTR_ADD %g, cst1
122 // %ptr2 = G_PTR_ADD %g, cst2
123 // ...
124 // %ptrN = G_PTR_ADD %g, cstN
125 //
126 // Identify the *smallest* constant. We want to be able to form this:
127 //
128 // %offset_g = G_GLOBAL_VALUE @x + min_cst
129 // %g = G_PTR_ADD %offset_g, -min_cst
130 // %ptr1 = G_PTR_ADD %g, cst1
131 // ...
132 Register Dst = MI.getOperand(0).getReg();
133 uint64_t MinOffset = -1ull;
134 for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
135 if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
136 return false;
138 UseInstr.getOperand(2).getReg(), MRI);
139 if (!Cst)
140 return false;
141 MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
142 }
143
144 // Require that the new offset is larger than the existing one to avoid
145 // infinite loops.
146 uint64_t CurrOffset = GlobalOp.getOffset();
147 uint64_t NewOffset = MinOffset + CurrOffset;
148 if (NewOffset <= CurrOffset)
149 return false;
150
151 // Check whether folding this offset is legal. It must not go out of bounds of
152 // the referenced object to avoid violating the code model, and must be
153 // smaller than 2^20 because this is the largest offset expressible in all
154 // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
155 // stores an immediate signed 21 bit offset.)
156 //
157 // This check also prevents us from folding negative offsets, which will end
158 // up being treated in the same way as large positive ones. They could also
159 // cause code model violations, and aren't really common enough to matter.
160 if (NewOffset >= (1 << 20))
161 return false;
162
163 Type *T = GV->getValueType();
164 if (!T->isSized() ||
165 NewOffset > GV->getDataLayout().getTypeAllocSize(T))
166 return false;
167 MatchInfo = std::make_pair(NewOffset, MinOffset);
168 return true;
169}
170
171void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
173 std::pair<uint64_t, uint64_t> &MatchInfo) {
174 // Change:
175 //
176 // %g = G_GLOBAL_VALUE @x
177 // %ptr1 = G_PTR_ADD %g, cst1
178 // %ptr2 = G_PTR_ADD %g, cst2
179 // ...
180 // %ptrN = G_PTR_ADD %g, cstN
181 //
182 // To:
183 //
184 // %offset_g = G_GLOBAL_VALUE @x + min_cst
185 // %g = G_PTR_ADD %offset_g, -min_cst
186 // %ptr1 = G_PTR_ADD %g, cst1
187 // ...
188 // %ptrN = G_PTR_ADD %g, cstN
189 //
190 // Then, the original G_PTR_ADDs should be folded later on so that they look
191 // like this:
192 //
193 // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
194 uint64_t Offset, MinOffset;
195 std::tie(Offset, MinOffset) = MatchInfo;
196 B.setInstrAndDebugLoc(*std::next(MI.getIterator()));
197 Observer.changingInstr(MI);
198 auto &GlobalOp = MI.getOperand(1);
199 auto *GV = GlobalOp.getGlobal();
200 GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
201 Register Dst = MI.getOperand(0).getReg();
202 Register NewGVDst = MRI.cloneVirtualRegister(Dst);
203 MI.getOperand(0).setReg(NewGVDst);
204 Observer.changedInstr(MI);
205 B.buildPtrAdd(
206 Dst, NewGVDst,
207 B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
208}
209
210// Combines vecreduce_add(mul(ext(x), ext(y))) -> vecreduce_add([us]dot(x, y))
211// Or vecreduce_add(ext(mul(ext(x), ext(y)))) -> vecreduce_add([us]dot(x, y))
212// Or vecreduce_add(ext(x)) -> vecreduce_add([us]dot(x, 1))
213// Similar to performVecReduceAddCombine in SelectionDAG
214bool matchExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
215 const AArch64Subtarget &STI,
216 std::tuple<Register, Register, bool> &MatchInfo) {
217 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
218 "Expected a G_VECREDUCE_ADD instruction");
219 assert(STI.hasDotProd() && "Target should have Dot Product feature");
220
221 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
222 Register DstReg = MI.getOperand(0).getReg();
223 Register MidReg = I1->getOperand(0).getReg();
224 LLT DstTy = MRI.getType(DstReg);
225 LLT MidTy = MRI.getType(MidReg);
226 if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32)
227 return false;
228
229 // Detect mul(ext, ext) with symmetric ext's. If I1Opc is G_ZEXT or G_SEXT
230 // then the ext's must match the same opcode. It is set to the ext opcode on
231 // output.
232 auto tryMatchingMulOfExt = [&MRI](MachineInstr *MI, Register &Out1,
233 Register &Out2, unsigned &I1Opc) {
234 // If result of this has more than 1 use, then there is no point in creating
235 // a dot instruction
236 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
237 return false;
238
239 MachineInstr *ExtMI1 =
240 getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI);
241 MachineInstr *ExtMI2 =
242 getDefIgnoringCopies(MI->getOperand(2).getReg(), MRI);
243 LLT Ext1DstTy = MRI.getType(ExtMI1->getOperand(0).getReg());
244 LLT Ext2DstTy = MRI.getType(ExtMI2->getOperand(0).getReg());
245
246 if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy)
247 return false;
248 if ((I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) &&
249 I1Opc != ExtMI1->getOpcode())
250 return false;
251 Out1 = ExtMI1->getOperand(1).getReg();
252 Out2 = ExtMI2->getOperand(1).getReg();
253 I1Opc = ExtMI1->getOpcode();
254 return true;
255 };
256
257 LLT SrcTy;
258 unsigned I1Opc = I1->getOpcode();
259 if (I1Opc == TargetOpcode::G_MUL) {
260 Register Out1, Out2;
261 if (!tryMatchingMulOfExt(I1, Out1, Out2, I1Opc))
262 return false;
263 SrcTy = MRI.getType(Out1);
264 std::get<0>(MatchInfo) = Out1;
265 std::get<1>(MatchInfo) = Out2;
266 } else if (I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) {
267 Register I1Op = I1->getOperand(1).getReg();
268 MachineInstr *M = getDefIgnoringCopies(I1Op, MRI);
269 Register Out1, Out2;
270 if (M->getOpcode() == TargetOpcode::G_MUL &&
271 tryMatchingMulOfExt(M, Out1, Out2, I1Opc)) {
272 SrcTy = MRI.getType(Out1);
273 std::get<0>(MatchInfo) = Out1;
274 std::get<1>(MatchInfo) = Out2;
275 } else {
276 SrcTy = MRI.getType(I1Op);
277 std::get<0>(MatchInfo) = I1Op;
278 std::get<1>(MatchInfo) = 0;
279 }
280 } else {
281 return false;
282 }
283
284 if (I1Opc == TargetOpcode::G_ZEXT)
285 std::get<2>(MatchInfo) = 0;
286 else if (I1Opc == TargetOpcode::G_SEXT)
287 std::get<2>(MatchInfo) = 1;
288 else
289 return false;
290
291 if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0)
292 return false;
293
294 return true;
295}
296
297void applyExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
298 MachineIRBuilder &Builder,
299 GISelChangeObserver &Observer,
300 const AArch64Subtarget &STI,
301 std::tuple<Register, Register, bool> &MatchInfo) {
302 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
303 "Expected a G_VECREDUCE_ADD instruction");
304 assert(STI.hasDotProd() && "Target should have Dot Product feature");
305
306 // Initialise the variables
307 unsigned DotOpcode =
308 std::get<2>(MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT;
309 Register Ext1SrcReg = std::get<0>(MatchInfo);
310
311 // If there is one source register, create a vector of 0s as the second
312 // source register
313 Register Ext2SrcReg;
314 if (std::get<1>(MatchInfo) == 0)
315 Ext2SrcReg = Builder.buildConstant(MRI.getType(Ext1SrcReg), 1)
316 ->getOperand(0)
317 .getReg();
318 else
319 Ext2SrcReg = std::get<1>(MatchInfo);
320
321 // Find out how many DOT instructions are needed
322 LLT SrcTy = MRI.getType(Ext1SrcReg);
323 LLT MidTy;
324 unsigned NumOfDotMI;
325 if (SrcTy.getNumElements() % 16 == 0) {
326 NumOfDotMI = SrcTy.getNumElements() / 16;
327 MidTy = LLT::fixed_vector(4, LLT::integer(32));
328 } else if (SrcTy.getNumElements() % 8 == 0) {
329 NumOfDotMI = SrcTy.getNumElements() / 8;
330 MidTy = LLT::fixed_vector(2, LLT::integer(32));
331 } else {
332 llvm_unreachable("Source type number of elements is not multiple of 8");
333 }
334
335 // Handle case where one DOT instruction is needed
336 if (NumOfDotMI == 1) {
337 auto Zeroes = Builder.buildConstant(MidTy, 0)->getOperand(0).getReg();
338 auto Dot = Builder.buildInstr(DotOpcode, {MidTy},
339 {Zeroes, Ext1SrcReg, Ext2SrcReg});
340 Builder.buildVecReduceAdd(MI.getOperand(0), Dot->getOperand(0));
341 } else {
342 // If not pad the last v8 element with 0s to a v16
343 SmallVector<Register, 4> Ext1UnmergeReg;
344 SmallVector<Register, 4> Ext2UnmergeReg;
345 if (SrcTy.getNumElements() % 16 != 0) {
346 SmallVector<Register> Leftover1;
347 SmallVector<Register> Leftover2;
348
349 // Split the elements into v16i8 and v8i8
350 LLT MainTy = LLT::fixed_vector(16, LLT::integer(8));
351 LLT LeftoverTy1, LeftoverTy2;
352 if ((!extractParts(Ext1SrcReg, MRI.getType(Ext1SrcReg), MainTy,
353 LeftoverTy1, Ext1UnmergeReg, Leftover1, Builder,
354 MRI)) ||
355 (!extractParts(Ext2SrcReg, MRI.getType(Ext2SrcReg), MainTy,
356 LeftoverTy2, Ext2UnmergeReg, Leftover2, Builder,
357 MRI))) {
358 llvm_unreachable("Unable to split this vector properly");
359 }
360
361 // Pad the leftover v8i8 vector with register of 0s of type v8i8
362 Register v8Zeroes = Builder.buildConstant(LLT::fixed_vector(8, 8), 0)
363 ->getOperand(0)
364 .getReg();
365
366 Ext1UnmergeReg.push_back(
367 Builder
368 .buildMergeLikeInstr(LLT::fixed_vector(16, LLT::integer(8)),
369 {Leftover1[0], v8Zeroes})
370 .getReg(0));
371 Ext2UnmergeReg.push_back(
372 Builder
373 .buildMergeLikeInstr(LLT::fixed_vector(16, LLT::integer(8)),
374 {Leftover2[0], v8Zeroes})
375 .getReg(0));
376
377 } else {
378 // Unmerge the source vectors to v16i8
379 unsigned SrcNumElts = SrcTy.getNumElements();
380 extractParts(Ext1SrcReg, LLT::fixed_vector(16, LLT::integer(8)),
381 SrcNumElts / 16, Ext1UnmergeReg, Builder, MRI);
382 extractParts(Ext2SrcReg, LLT::fixed_vector(16, LLT::integer(8)),
383 SrcNumElts / 16, Ext2UnmergeReg, Builder, MRI);
384 }
385
386 // Build the UDOT instructions
388 unsigned NumElements = 0;
389 for (unsigned i = 0; i < Ext1UnmergeReg.size(); i++) {
390 LLT ZeroesLLT;
391 // Check if it is 16 or 8 elements. Set Zeroes to the according size
392 if (MRI.getType(Ext1UnmergeReg[i]).getNumElements() == 16) {
393 ZeroesLLT = LLT::fixed_vector(4, LLT::integer(32));
394 NumElements += 4;
395 } else {
396 ZeroesLLT = LLT::fixed_vector(2, LLT::integer(32));
397 NumElements += 2;
398 }
399 auto Zeroes = Builder.buildConstant(ZeroesLLT, 0)->getOperand(0).getReg();
400 DotReg.push_back(
401 Builder
402 .buildInstr(DotOpcode, {MRI.getType(Zeroes)},
403 {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]})
404 .getReg(0));
405 }
406
407 // Merge the output
408 auto ConcatMI = Builder.buildConcatVectors(
409 LLT::fixed_vector(NumElements, LLT::integer(32)), DotReg);
410
411 // Put it through a vector reduction
412 Builder.buildVecReduceAdd(MI.getOperand(0).getReg(),
413 ConcatMI->getOperand(0).getReg());
414 }
415
416 // Erase the dead instructions
417 MI.eraseFromParent();
418}
419
420// Matches {U/S}ADDV(ext(x)) => {U/S}ADDLV(x)
421// Ensure that the type coming from the extend instruction is the right size
422bool matchExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
423 std::pair<Register, bool> &MatchInfo) {
424 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
425 "Expected G_VECREDUCE_ADD Opcode");
426
427 // Check if the last instruction is an extend
428 MachineInstr *ExtMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
429 auto ExtOpc = ExtMI->getOpcode();
430
431 if (ExtOpc == TargetOpcode::G_ZEXT)
432 std::get<1>(MatchInfo) = 0;
433 else if (ExtOpc == TargetOpcode::G_SEXT)
434 std::get<1>(MatchInfo) = 1;
435 else
436 return false;
437
438 // Check if the source register is a valid type
439 Register ExtSrcReg = ExtMI->getOperand(1).getReg();
440 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
441 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
442 if (ExtSrcTy.getScalarSizeInBits() * 2 > DstTy.getScalarSizeInBits())
443 return false;
444 if ((DstTy.getScalarSizeInBits() == 16 &&
445 ExtSrcTy.getNumElements() % 8 == 0 && ExtSrcTy.getNumElements() < 256) ||
446 (DstTy.getScalarSizeInBits() == 32 &&
447 ExtSrcTy.getNumElements() % 4 == 0) ||
448 (DstTy.getScalarSizeInBits() == 64 &&
449 ExtSrcTy.getNumElements() % 4 == 0)) {
450 std::get<0>(MatchInfo) = ExtSrcReg;
451 return true;
452 }
453 return false;
454}
455
456void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
458 std::pair<Register, bool> &MatchInfo) {
459 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
460 "Expected G_VECREDUCE_ADD Opcode");
461
462 unsigned Opc = std::get<1>(MatchInfo) ? AArch64::G_SADDLV : AArch64::G_UADDLV;
463 Register SrcReg = std::get<0>(MatchInfo);
464 Register DstReg = MI.getOperand(0).getReg();
465 LLT SrcTy = MRI.getType(SrcReg);
466 LLT DstTy = MRI.getType(DstReg);
467
468 // If SrcTy has more elements than expected, split them into multiple
469 // instructions and sum the results
470 LLT MainTy;
471 SmallVector<Register, 1> WorkingRegisters;
472 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
473 unsigned SrcNumElem = SrcTy.getNumElements();
474 if ((SrcScalSize == 8 && SrcNumElem > 16) ||
475 (SrcScalSize == 16 && SrcNumElem > 8) ||
476 (SrcScalSize == 32 && SrcNumElem > 4)) {
477
478 LLT LeftoverTy;
479 SmallVector<Register, 4> LeftoverRegs;
480 if (SrcScalSize == 8)
481 MainTy = LLT::fixed_vector(16, LLT::integer(8));
482 else if (SrcScalSize == 16)
483 MainTy = LLT::fixed_vector(8, LLT::integer(16));
484 else if (SrcScalSize == 32)
485 MainTy = LLT::fixed_vector(4, LLT::integer(32));
486 else
487 llvm_unreachable("Source's Scalar Size not supported");
488
489 // Extract the parts and put each extracted sources through U/SADDLV and put
490 // the values inside a small vec
491 extractParts(SrcReg, SrcTy, MainTy, LeftoverTy, WorkingRegisters,
492 LeftoverRegs, B, MRI);
493 llvm::append_range(WorkingRegisters, LeftoverRegs);
494 } else {
495 WorkingRegisters.push_back(SrcReg);
496 MainTy = SrcTy;
497 }
498
499 unsigned MidScalarSize = MainTy.getScalarSizeInBits() * 2;
500 LLT MidScalarLLT = LLT::integer(MidScalarSize);
501 Register ZeroReg = B.buildConstant(LLT::integer(64), 0).getReg(0);
502 for (unsigned I = 0; I < WorkingRegisters.size(); I++) {
503 // If the number of elements is too small to build an instruction, extend
504 // its size before applying addlv
505 LLT WorkingRegTy = MRI.getType(WorkingRegisters[I]);
506 if ((WorkingRegTy.getScalarSizeInBits() == 8) &&
507 (WorkingRegTy.getNumElements() == 4)) {
508 WorkingRegisters[I] =
509 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
510 : TargetOpcode::G_ZEXT,
512 {WorkingRegisters[I]})
513 .getReg(0);
514 }
515
516 // Generate the {U/S}ADDLV instruction, whose output is always double of the
517 // Src's Scalar size
518 LLT AddlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(4, LLT::integer(32))
520 Register AddlvReg =
521 B.buildInstr(Opc, {AddlvTy}, {WorkingRegisters[I]}).getReg(0);
522
523 // The output from {U/S}ADDLV gets placed in the lowest lane of a v4i32 or
524 // v2i64 register.
525 // i16, i32 results uses v4i32 registers
526 // i64 results uses v2i64 registers
527 // Therefore we have to extract/truncate the the value to the right type
528 if (MidScalarSize == 32 || MidScalarSize == 64) {
529 WorkingRegisters[I] = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
530 {MidScalarLLT}, {AddlvReg, ZeroReg})
531 .getReg(0);
532 } else {
533 Register ExtractReg =
534 B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {LLT::integer(32)},
535 {AddlvReg, ZeroReg})
536 .getReg(0);
537 WorkingRegisters[I] =
538 B.buildTrunc({MidScalarLLT}, {ExtractReg}).getReg(0);
539 }
540 }
541
542 Register OutReg;
543 if (WorkingRegisters.size() > 1) {
544 OutReg = B.buildAdd(MidScalarLLT, WorkingRegisters[0], WorkingRegisters[1])
545 .getReg(0);
546 for (unsigned I = 2; I < WorkingRegisters.size(); I++) {
547 OutReg = B.buildAdd(MidScalarLLT, OutReg, WorkingRegisters[I]).getReg(0);
548 }
549 } else {
550 OutReg = WorkingRegisters[0];
551 }
552
553 if (DstTy.getScalarSizeInBits() > MidScalarSize) {
554 // Handle the scalar value if the DstTy's Scalar Size is more than double
555 // Src's ScalarType
556 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
557 : TargetOpcode::G_ZEXT,
558 {DstReg}, {OutReg});
559 } else {
560 B.buildCopy(DstReg, OutReg);
561 }
562
563 MI.eraseFromParent();
564}
565
566// Pushes ADD/SUB/MUL through extend instructions to decrease the number of
567// extend instruction at the end by allowing selection of {s|u}addl sooner
568// i32 add(i32 ext i8, i32 ext i8) => i32 ext(i16 add(i16 ext i8, i16 ext i8))
569bool matchPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
570 Register DstReg, Register SrcReg1, Register SrcReg2) {
571 assert((MI.getOpcode() == TargetOpcode::G_ADD ||
572 MI.getOpcode() == TargetOpcode::G_SUB ||
573 MI.getOpcode() == TargetOpcode::G_MUL) &&
574 "Expected a G_ADD, G_SUB or G_MUL instruction\n");
575
576 // Deal with vector types only
577 LLT DstTy = MRI.getType(DstReg);
578 if (!DstTy.isVector())
579 return false;
580
581 // Return true if G_{S|Z}EXT instruction is more than 2* source
582 Register ExtDstReg = MI.getOperand(1).getReg();
583 LLT Ext1SrcTy = MRI.getType(SrcReg1);
584 LLT Ext2SrcTy = MRI.getType(SrcReg2);
585 unsigned ExtDstScal = MRI.getType(ExtDstReg).getScalarSizeInBits();
586 unsigned Ext1SrcScal = Ext1SrcTy.getScalarSizeInBits();
587 if (((Ext1SrcScal == 8 && ExtDstScal == 32) ||
588 ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) &&
589 Ext1SrcTy == Ext2SrcTy)
590 return true;
591
592 return false;
593}
594
595void applyPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
596 MachineIRBuilder &B, bool isSExt, Register DstReg,
597 Register SrcReg1, Register SrcReg2) {
598 LLT SrcTy = MRI.getType(SrcReg1);
599 LLT MidTy = SrcTy.changeElementSize(SrcTy.getScalarSizeInBits() * 2);
600 unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
601 Register Ext1Reg = B.buildInstr(Opc, {MidTy}, {SrcReg1}).getReg(0);
602 Register Ext2Reg = B.buildInstr(Opc, {MidTy}, {SrcReg2}).getReg(0);
603 Register AddReg =
604 B.buildInstr(MI.getOpcode(), {MidTy}, {Ext1Reg, Ext2Reg}).getReg(0);
605
606 // G_SUB has to sign-extend the result.
607 // G_ADD needs to sext from sext and can sext or zext from zext, and G_MUL
608 // needs to use the original opcode so the original opcode is used for both.
609 if (MI.getOpcode() == TargetOpcode::G_ADD ||
610 MI.getOpcode() == TargetOpcode::G_MUL)
611 B.buildInstr(Opc, {DstReg}, {AddReg});
612 else
613 B.buildSExt(DstReg, AddReg);
614
615 MI.eraseFromParent();
616}
617
618bool matchSimplifyUADDO(MachineInstr &MI, MachineRegisterInfo &MRI,
619 std::pair<Register, Register> &MatchInfo) {
620 // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
621 // result is only used in the no-overflow case. It is restricted to cases
622 // where we know that the high-bits of the operands are 0. If there's an
623 // overflow, then the 9th or 17th bit must be set, which can be checked
624 // using TBNZ.
625 //
626 // Change (for UADDOs on 8 and 16 bits):
627 //
628 // %z0 = G_ASSERT_ZEXT _
629 // %op0 = G_TRUNC %z0
630 // %z1 = G_ASSERT_ZEXT _
631 // %op1 = G_TRUNC %z1
632 // %val, %cond = G_UADDO %op0, %op1
633 // G_BRCOND %cond, %error.bb
634 //
635 // error.bb:
636 // (no successors and no uses of %val)
637 //
638 // To:
639 //
640 // %z0 = G_ASSERT_ZEXT _
641 // %z1 = G_ASSERT_ZEXT _
642 // %add = G_ADD %z0, %z1
643 // %val = G_TRUNC %add
644 // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
645 // %cond = G_ICMP NE, %bit, 0
646 // G_BRCOND %cond, %error.bb
647
648 MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
649 MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
650 Register Op0Wide;
651 Register Op1Wide;
652 if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
653 !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
654 return false;
655 LLT WideTy0 = MRI.getType(Op0Wide);
656 LLT WideTy1 = MRI.getType(Op1Wide);
657 Register ResVal = MI.getOperand(0).getReg();
658 LLT OpTy = MRI.getType(ResVal);
659 MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
660 MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
661
662 unsigned OpTySize = OpTy.getScalarSizeInBits();
663 // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
664 // inputs have been zero-extended.
665 if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
666 Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
667 OpTySize != Op0WideDef->getOperand(2).getImm() ||
668 OpTySize != Op1WideDef->getOperand(2).getImm())
669 return false;
670
671 // Only scalar UADDO with either 8 or 16 bit operands are handled.
672 if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
673 OpTySize >= WideTy0.getScalarSizeInBits() ||
674 (OpTySize != 8 && OpTySize != 16))
675 return false;
676
677 // The overflow-status result must be used by a branch only.
678 Register ResStatus = MI.getOperand(1).getReg();
679 if (!MRI.hasOneNonDBGUse(ResStatus))
680 return false;
681 MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
682 if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
683 return false;
684
685 // Make sure the computed result is only used in the no-overflow blocks.
686 MachineBasicBlock *CurrentMBB = MI.getParent();
687 MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
688 if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
689 return false;
690 if (any_of(MRI.use_nodbg_instructions(ResVal),
691 [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
692 return &MI != &I &&
693 (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
694 }))
695 return false;
696
697 MatchInfo = {Op0Wide, Op1Wide};
698 return true;
699}
700
701void applySimplifyUADDO(MachineInstr &MI, MachineRegisterInfo &MRI,
703 const CombinerHelper &Helper,
704 const std::pair<Register, Register> &MatchInfo) {
705 Register Op0Wide = MatchInfo.first;
706 Register Op1Wide = MatchInfo.second;
707 Register ResVal = MI.getOperand(0).getReg();
708 Register ResStatus = MI.getOperand(1).getReg();
709 unsigned OpTySize = MRI.getType(ResVal).getScalarSizeInBits();
710
711 // Remove G_UADDO.
712 B.setInstrAndDebugLoc(*MI.getNextNode());
713 MI.eraseFromParent();
714
715 // Emit wide add.
716 Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
717 B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
718
719 // Emit check of the 9th or 17th bit and update users (the branch). This will
720 // later be folded to TBNZ.
721 Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
722 B.buildAnd(
723 CondBit, AddDst,
724 B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
725 B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
726 B.buildConstant(LLT::scalar(32), 0));
727
728 // Update ZEXts users of the result value. Because all uses are in the
729 // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
730 B.buildZExtOrTrunc(ResVal, AddDst);
731 for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
732 Register WideReg;
733 if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
734 auto OldR = U.getParent()->getOperand(0).getReg();
735 Observer.erasingInstr(*U.getParent());
736 U.getParent()->eraseFromParent();
737 Helper.replaceRegWith(MRI, OldR, AddDst);
738 }
739 }
740}
741
742class AArch64PreLegalizerCombinerImpl : public Combiner {
743protected:
744 const CombinerHelper Helper;
745 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
746 const AArch64Subtarget &STI;
747 const LibcallLoweringInfo &Libcalls;
748
749public:
750 AArch64PreLegalizerCombinerImpl(
752 GISelCSEInfo *CSEInfo,
753 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
754 const AArch64Subtarget &STI, const LibcallLoweringInfo &Libcalls,
755 MachineDominatorTree *MDT, const LegalizerInfo *LI);
756
757 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
758
759 bool tryCombineAll(MachineInstr &I) const override;
760
761 bool tryCombineAllImpl(MachineInstr &I) const;
762
763private:
764#define GET_GICOMBINER_CLASS_MEMBERS
765#include "AArch64GenPreLegalizeGICombiner.inc"
766#undef GET_GICOMBINER_CLASS_MEMBERS
767};
768
769#define GET_GICOMBINER_IMPL
770#include "AArch64GenPreLegalizeGICombiner.inc"
771#undef GET_GICOMBINER_IMPL
772
773AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
775 GISelCSEInfo *CSEInfo,
776 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
777 const AArch64Subtarget &STI, const LibcallLoweringInfo &Libcalls,
778 MachineDominatorTree *MDT, const LegalizerInfo *LI)
779 : Combiner(MF, CInfo, &VT, CSEInfo),
780 Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI),
781 RuleConfig(RuleConfig), STI(STI), Libcalls(Libcalls),
783#include "AArch64GenPreLegalizeGICombiner.inc"
785{
786}
787
788bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
789 if (tryCombineAllImpl(MI))
790 return true;
791
792 unsigned Opc = MI.getOpcode();
793 switch (Opc) {
794 case TargetOpcode::G_MEMCPY_INLINE:
795 return Helper.tryEmitMemcpyInline(MI);
796 case TargetOpcode::G_MEMCPY:
797 case TargetOpcode::G_MEMMOVE:
798 case TargetOpcode::G_MEMSET: {
799 // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
800 // heuristics decide.
801 unsigned MaxLen = CInfo.EnableOpt ? 0 : 32;
802 // Try to inline memcpy type calls if optimizations are enabled.
803 if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
804 return true;
805 if (Opc == TargetOpcode::G_MEMSET)
807 CInfo.EnableMinSize);
808 return false;
809 }
810 }
811
812 return false;
813}
814
815bool runCombiner(MachineFunction &MF, GISelCSEInfo *CSEInfo,
816 GISelValueTracking *VT, MachineDominatorTree *MDT,
817 const LibcallLoweringInfo &Libcalls,
818 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
819 bool EnableOpt) {
820 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
821 const auto *LI = ST.getLegalizerInfo();
822
823 const Function &F = MF.getFunction();
824
825 CombinerInfo CInfo(/*AllowIllegalOps=*/true, /*ShouldLegalizeIllegal=*/false,
826 /*LegalizerInfo=*/nullptr, EnableOpt, F.hasOptSize(),
827 F.hasMinSize());
828 // Disable fixed-point iteration to reduce compile-time
829 CInfo.MaxIterations = 1;
830 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
831 // This is the first Combiner, so the input IR might contain dead
832 // instructions.
833 CInfo.EnableFullDCE = true;
834 AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, ST,
835 Libcalls, MDT, LI);
836 return Impl.combineMachineInstrs();
837}
838
839// Pass boilerplate
840// ================
841
842class AArch64PreLegalizerCombinerLegacy : public MachineFunctionPass {
843public:
844 static char ID;
845
846 AArch64PreLegalizerCombinerLegacy();
847
848 StringRef getPassName() const override {
849 return "AArch64PreLegalizerCombiner";
850 }
851
852 bool runOnMachineFunction(MachineFunction &MF) override;
853
854 void getAnalysisUsage(AnalysisUsage &AU) const override;
855
856private:
857 AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
858};
859} // end anonymous namespace
860
861void AArch64PreLegalizerCombinerLegacy::getAnalysisUsage(
862 AnalysisUsage &AU) const {
863 AU.setPreservesCFG();
865 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
866 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
867 AU.addRequired<MachineDominatorTreeWrapperPass>();
868 AU.addPreserved<MachineDominatorTreeWrapperPass>();
869 AU.addRequired<GISelCSEAnalysisWrapperPass>();
870 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
871 AU.addRequired<LibcallLoweringInfoWrapper>();
873}
874
875AArch64PreLegalizerCombinerLegacy::AArch64PreLegalizerCombinerLegacy()
876 : MachineFunctionPass(ID) {
877 if (!RuleConfig.parseCommandLineOption())
878 report_fatal_error("Invalid rule identifier");
879}
880
881bool AArch64PreLegalizerCombinerLegacy::runOnMachineFunction(
882 MachineFunction &MF) {
883 if (MF.getProperties().hasFailedISel())
884 return false;
885 // Enable CSE.
886 GISelCSEAnalysisWrapper &Wrapper =
887 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
888 auto *CSEInfo =
890
891 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
892 const LibcallLoweringInfo &Libcalls =
893 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
894 *MF.getFunction().getParent(), ST);
895
896 GISelValueTracking *VT =
897 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
898 MachineDominatorTree *MDT =
899 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
900 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None &&
901 !skipFunction(MF.getFunction());
902 return runCombiner(MF, CSEInfo, VT, MDT, Libcalls, RuleConfig, EnableOpt);
903}
904
905char AArch64PreLegalizerCombinerLegacy::ID = 0;
906INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombinerLegacy, DEBUG_TYPE,
907 "Combine AArch64 machine instrs before legalization",
908 false, false)
912INITIALIZE_PASS_END(AArch64PreLegalizerCombinerLegacy, DEBUG_TYPE,
913 "Combine AArch64 machine instrs before legalization", false,
914 false)
915
917 : RuleConfig(
918 std::make_unique<AArch64PreLegalizerCombinerImplRuleConfig>()) {
919 if (!RuleConfig->parseCommandLineOption())
920 reportFatalUsageError("invalid rule identifier");
921}
922
925
927
931 if (MF.getProperties().hasFailedISel())
932 return PreservedAnalyses::all();
933
934 auto *CSEInfo = MFAM.getResult<GISelCSEAnalysis>(MF).get();
937
939 auto &MAMProxy =
941 const LibcallLoweringModuleAnalysisResult *LibcallResult =
942 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(
943 *MF.getFunction().getParent());
944 if (!LibcallResult)
945 reportFatalUsageError("LibcallLoweringModuleAnalysis result not available");
946
947 const LibcallLoweringInfo &Libcalls = LibcallResult->getLibcallLowering(ST);
948
949 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None;
950
951 if (!runCombiner(MF, CSEInfo, &VT, &MDT, Libcalls, *RuleConfig, EnableOpt))
952 return PreservedAnalyses::all();
953
959 return PA;
960}
961
962namespace llvm {
964 return new AArch64PreLegalizerCombinerLegacy();
965}
966} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Value * RHS
Value * LHS
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_NE
not equal
Definition InstrTypes.h:762
LLVM_ABI void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
LLVM_ABI bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
LLVM_ABI bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
Combiner implementation.
Definition Combiner.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The actual analysis pass wrapper.
Definition CSEInfo.h:242
The CSE Analysis object.
Definition CSEInfo.h:72
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
Module * getParent()
Get the module that this global value is contained inside of...
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
iterator_range< use_iterator > use_operands(Register Reg) const
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, const LibcallLoweringInfo &Libcalls, bool MinSize)
Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is supported and beneficial to ...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAArch64PreLegalizerCombiner()
@ Offset
Definition DWP.cpp:558
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOptLevel Level)
Definition CSEInfo.cpp:85
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:508
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1147
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:861