LLVM 23.0.0git
CombinerHelper.cpp
Go to the documentation of this file.
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
9#include "llvm/ADT/APFloat.h"
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SetVector.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/InstrTypes.h"
42#include <cmath>
43#include <optional>
44#include <tuple>
45
46#define DEBUG_TYPE "gi-combiner"
47
48using namespace llvm;
49using namespace MIPatternMatch;
50
51// Option to allow testing of the combiner while no targets know about indexed
52// addressing.
53static cl::opt<bool>
54 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
55 cl::desc("Force all indexed operations to be "
56 "legal for the GlobalISel combiner"));
57
62 const LegalizerInfo *LI)
63 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), VT(VT),
65 RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
66 TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
67 (void)this->VT;
68}
69
71 return *Builder.getMF().getSubtarget().getTargetLowering();
72}
73
75 return Builder.getMF();
76}
77
81
82LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
83
84/// \returns The little endian in-memory byte position of byte \p I in a
85/// \p ByteWidth bytes wide type.
86///
87/// E.g. Given a 4-byte type x, x[0] -> byte 0
88static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
89 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
90 return I;
91}
92
93/// Determines the LogBase2 value for a non-null input value using the
94/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
96 auto &MRI = *MIB.getMRI();
97 LLT Ty = MRI.getType(V);
98 auto Ctlz = MIB.buildCTLZ(Ty, V);
99 auto Base = MIB.buildConstant(Ty, Ty.getScalarSizeInBits() - 1);
100 return MIB.buildSub(Ty, Base, Ctlz).getReg(0);
101}
102
103/// \returns The big endian in-memory byte position of byte \p I in a
104/// \p ByteWidth bytes wide type.
105///
106/// E.g. Given a 4-byte type x, x[0] -> byte 3
107static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
108 assert(I < ByteWidth && "I must be in [0, ByteWidth)");
109 return ByteWidth - I - 1;
110}
111
112/// Given a map from byte offsets in memory to indices in a load/store,
113/// determine if that map corresponds to a little or big endian byte pattern.
114///
115/// \param MemOffset2Idx maps memory offsets to address offsets.
116/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
117///
118/// \returns true if the map corresponds to a big endian byte pattern, false if
119/// it corresponds to a little endian byte pattern, and std::nullopt otherwise.
120///
121/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
122/// are as follows:
123///
124/// AddrOffset Little endian Big endian
125/// 0 0 3
126/// 1 1 2
127/// 2 2 1
128/// 3 3 0
129static std::optional<bool>
131 int64_t LowestIdx) {
132 // Need at least two byte positions to decide on endianness.
133 unsigned Width = MemOffset2Idx.size();
134 if (Width < 2)
135 return std::nullopt;
136 bool BigEndian = true, LittleEndian = true;
137 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
138 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
139 if (MemOffsetAndIdx == MemOffset2Idx.end())
140 return std::nullopt;
141 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
142 assert(Idx >= 0 && "Expected non-negative byte offset?");
143 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
144 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
145 if (!BigEndian && !LittleEndian)
146 return std::nullopt;
147 }
148
149 assert((BigEndian != LittleEndian) &&
150 "Pattern cannot be both big and little endian!");
151 return BigEndian;
152}
153
155
156bool CombinerHelper::isLegal(const LegalityQuery &Query) const {
157 assert(LI && "Must have LegalizerInfo to query isLegal!");
158 return LI->getAction(Query).Action == LegalizeActions::Legal;
159}
160
162 const LegalityQuery &Query) const {
163 return isPreLegalize() || isLegal(Query);
164}
165
167 return isLegal(Query) ||
168 LI->getAction(Query).Action == LegalizeActions::WidenScalar;
169}
170
172 const LegalityQuery &Query) const {
173 LegalizeAction Action = LI->getAction(Query).Action;
174 return Action == LegalizeActions::Legal ||
176}
177
179 if (!Ty.isVector())
180 return isLegalOrBeforeLegalizer({TargetOpcode::G_CONSTANT, {Ty}});
181 // Vector constants are represented as a G_BUILD_VECTOR of scalar G_CONSTANTs.
182 if (isPreLegalize())
183 return true;
184 LLT EltTy = Ty.getElementType();
185 return isLegal({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}) &&
186 isLegal({TargetOpcode::G_CONSTANT, {EltTy}});
187}
188
190 Register ToReg) const {
191 Observer.changingAllUsesOfReg(MRI, FromReg);
192
193 if (MRI.constrainRegAttrs(ToReg, FromReg))
194 MRI.replaceRegWith(FromReg, ToReg);
195 else
196 Builder.buildCopy(FromReg, ToReg);
197
198 Observer.finishedChangingAllUsesOfReg();
199}
200
202 MachineOperand &FromRegOp,
203 Register ToReg) const {
204 assert(FromRegOp.getParent() && "Expected an operand in an MI");
205 Observer.changingInstr(*FromRegOp.getParent());
206
207 FromRegOp.setReg(ToReg);
208
209 Observer.changedInstr(*FromRegOp.getParent());
210}
211
213 unsigned ToOpcode) const {
214 Observer.changingInstr(FromMI);
215
216 FromMI.setDesc(Builder.getTII().get(ToOpcode));
217
218 Observer.changedInstr(FromMI);
219}
220
222 return RBI->getRegBank(Reg, MRI, *TRI);
223}
224
226 const RegisterBank *RegBank) const {
227 if (RegBank)
228 MRI.setRegBank(Reg, *RegBank);
229}
230
232 if (matchCombineCopy(MI)) {
234 return true;
235 }
236 return false;
237}
239 if (MI.getOpcode() != TargetOpcode::COPY)
240 return false;
241 Register DstReg = MI.getOperand(0).getReg();
242 Register SrcReg = MI.getOperand(1).getReg();
243 return canReplaceReg(DstReg, SrcReg, MRI);
244}
246 Register DstReg = MI.getOperand(0).getReg();
247 Register SrcReg = MI.getOperand(1).getReg();
248 replaceRegWith(MRI, DstReg, SrcReg);
249 MI.eraseFromParent();
250}
251
253 MachineInstr &MI, BuildFnTy &MatchInfo) const {
254 // Ported from InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating.
255 Register DstOp = MI.getOperand(0).getReg();
256 Register OrigOp = MI.getOperand(1).getReg();
257
258 if (!MRI.hasOneNonDBGUse(OrigOp))
259 return false;
260
261 MachineInstr *OrigDef = MRI.getUniqueVRegDef(OrigOp);
262 // Even if only a single operand of the PHI is not guaranteed non-poison,
263 // moving freeze() backwards across a PHI can cause optimization issues for
264 // other users of that operand.
265 //
266 // Moving freeze() from one of the output registers of a G_UNMERGE_VALUES to
267 // the source register is unprofitable because it makes the freeze() more
268 // strict than is necessary (it would affect the whole register instead of
269 // just the subreg being frozen).
270 if (OrigDef->isPHI() || isa<GUnmerge>(OrigDef))
271 return false;
272
273 if (canCreateUndefOrPoison(OrigOp, MRI,
274 /*ConsiderFlagsAndMetadata=*/false))
275 return false;
276
277 std::optional<MachineOperand> MaybePoisonOperand;
278 for (MachineOperand &Operand : OrigDef->uses()) {
279 if (!Operand.isReg())
280 return false;
281
282 if (isGuaranteedNotToBeUndefOrPoison(Operand.getReg(), MRI))
283 continue;
284
285 if (!MaybePoisonOperand)
286 MaybePoisonOperand = Operand;
287 else {
288 // We have more than one maybe-poison operand. Moving the freeze is
289 // unsafe.
290 return false;
291 }
292 }
293
294 // Eliminate freeze if all operands are guaranteed non-poison.
295 if (!MaybePoisonOperand) {
296 MatchInfo = [=](MachineIRBuilder &B) {
297 Observer.changingInstr(*OrigDef);
298 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
299 Observer.changedInstr(*OrigDef);
300 B.buildCopy(DstOp, OrigOp);
301 };
302 return true;
303 }
304
305 Register MaybePoisonOperandReg = MaybePoisonOperand->getReg();
306 LLT MaybePoisonOperandRegTy = MRI.getType(MaybePoisonOperandReg);
307
308 MatchInfo = [=](MachineIRBuilder &B) mutable {
309 Observer.changingInstr(*OrigDef);
310 cast<GenericMachineInstr>(OrigDef)->dropPoisonGeneratingFlags();
311 Observer.changedInstr(*OrigDef);
312 B.setInsertPt(*OrigDef->getParent(), OrigDef->getIterator());
313 auto Freeze = B.buildFreeze(MaybePoisonOperandRegTy, MaybePoisonOperandReg);
315 MRI, *OrigDef->findRegisterUseOperand(MaybePoisonOperandReg, TRI),
316 Freeze.getReg(0));
317 replaceRegWith(MRI, DstOp, OrigOp);
318 };
319 return true;
320}
321
324 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
325 "Invalid instruction");
326 bool IsUndef = true;
327 MachineInstr *Undef = nullptr;
328
329 // Walk over all the operands of concat vectors and check if they are
330 // build_vector themselves or undef.
331 // Then collect their operands in Ops.
332 for (const MachineOperand &MO : MI.uses()) {
333 Register Reg = MO.getReg();
334 MachineInstr *Def = MRI.getVRegDef(Reg);
335 assert(Def && "Operand not defined");
336 if (!MRI.hasOneNonDBGUse(Reg))
337 return false;
338 switch (Def->getOpcode()) {
339 case TargetOpcode::G_BUILD_VECTOR:
340 IsUndef = false;
341 // Remember the operands of the build_vector to fold
342 // them into the yet-to-build flattened concat vectors.
343 for (const MachineOperand &BuildVecMO : Def->uses())
344 Ops.push_back(BuildVecMO.getReg());
345 break;
346 case TargetOpcode::G_IMPLICIT_DEF: {
347 LLT OpType = MRI.getType(Reg);
348 // Keep one undef value for all the undef operands.
349 if (!Undef) {
350 Builder.setInsertPt(*MI.getParent(), MI);
351 Undef = Builder.buildUndef(OpType.getScalarType());
352 }
353 assert(MRI.getType(Undef->getOperand(0).getReg()) ==
354 OpType.getScalarType() &&
355 "All undefs should have the same type");
356 // Break the undef vector in as many scalar elements as needed
357 // for the flattening.
358 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
359 EltIdx != EltEnd; ++EltIdx)
360 Ops.push_back(Undef->getOperand(0).getReg());
361 break;
362 }
363 default:
364 return false;
365 }
366 }
367
368 // Check if the combine is illegal
369 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
371 {TargetOpcode::G_BUILD_VECTOR, {DstTy, MRI.getType(Ops[0])}})) {
372 return false;
373 }
374
375 if (IsUndef)
376 Ops.clear();
377
378 return true;
379}
382 // We determined that the concat_vectors can be flatten.
383 // Generate the flattened build_vector.
384 Register DstReg = MI.getOperand(0).getReg();
385 Builder.setInsertPt(*MI.getParent(), MI);
386 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
387
388 // Note: IsUndef is sort of redundant. We could have determine it by
389 // checking that at all Ops are undef. Alternatively, we could have
390 // generate a build_vector of undefs and rely on another combine to
391 // clean that up. For now, given we already gather this information
392 // in matchCombineConcatVectors, just save compile time and issue the
393 // right thing.
394 if (Ops.empty())
395 Builder.buildUndef(NewDstReg);
396 else
397 Builder.buildBuildVector(NewDstReg, Ops);
398 replaceRegWith(MRI, DstReg, NewDstReg);
399 MI.eraseFromParent();
400}
401
403 auto &Shuffle = cast<GShuffleVector>(MI);
404
405 Register SrcVec1 = Shuffle.getSrc1Reg();
406 Register SrcVec2 = Shuffle.getSrc2Reg();
407 LLT EltTy = MRI.getType(SrcVec1).getElementType();
408 int Width = MRI.getType(SrcVec1).getNumElements();
409
410 auto Unmerge1 = Builder.buildUnmerge(EltTy, SrcVec1);
411 auto Unmerge2 = Builder.buildUnmerge(EltTy, SrcVec2);
412
413 SmallVector<Register> Extracts;
414 // Select only applicable elements from unmerged values.
415 for (int Val : Shuffle.getMask()) {
416 if (Val == -1)
417 Extracts.push_back(Builder.buildUndef(EltTy).getReg(0));
418 else if (Val < Width)
419 Extracts.push_back(Unmerge1.getReg(Val));
420 else
421 Extracts.push_back(Unmerge2.getReg(Val - Width));
422 }
423 assert(Extracts.size() > 0 && "Expected at least one element in the shuffle");
424 if (Extracts.size() == 1)
425 Builder.buildCopy(MI.getOperand(0).getReg(), Extracts[0]);
426 else
427 Builder.buildBuildVector(MI.getOperand(0).getReg(), Extracts);
428 MI.eraseFromParent();
429}
430
433 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
434 auto ConcatMI1 =
435 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(1).getReg()));
436 auto ConcatMI2 =
437 dyn_cast<GConcatVectors>(MRI.getVRegDef(MI.getOperand(2).getReg()));
438 if (!ConcatMI1 || !ConcatMI2)
439 return false;
440
441 // Check that the sources of the Concat instructions have the same type
442 if (MRI.getType(ConcatMI1->getSourceReg(0)) !=
443 MRI.getType(ConcatMI2->getSourceReg(0)))
444 return false;
445
446 LLT ConcatSrcTy = MRI.getType(ConcatMI1->getReg(1));
447 LLT ShuffleSrcTy1 = MRI.getType(MI.getOperand(1).getReg());
448 unsigned ConcatSrcNumElt = ConcatSrcTy.getNumElements();
449 for (unsigned i = 0; i < Mask.size(); i += ConcatSrcNumElt) {
450 // Check if the index takes a whole source register from G_CONCAT_VECTORS
451 // Assumes that all Sources of G_CONCAT_VECTORS are the same type
452 if (Mask[i] == -1) {
453 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
454 if (i + j >= Mask.size())
455 return false;
456 if (Mask[i + j] != -1)
457 return false;
458 }
460 {TargetOpcode::G_IMPLICIT_DEF, {ConcatSrcTy}}))
461 return false;
462 Ops.push_back(0);
463 } else if (Mask[i] % ConcatSrcNumElt == 0) {
464 for (unsigned j = 1; j < ConcatSrcNumElt; j++) {
465 if (i + j >= Mask.size())
466 return false;
467 if (Mask[i + j] != Mask[i] + static_cast<int>(j))
468 return false;
469 }
470 // Retrieve the source register from its respective G_CONCAT_VECTORS
471 // instruction
472 if (Mask[i] < ShuffleSrcTy1.getNumElements()) {
473 Ops.push_back(ConcatMI1->getSourceReg(Mask[i] / ConcatSrcNumElt));
474 } else {
475 Ops.push_back(ConcatMI2->getSourceReg(Mask[i] / ConcatSrcNumElt -
476 ConcatMI1->getNumSources()));
477 }
478 } else {
479 return false;
480 }
481 }
482
484 {TargetOpcode::G_CONCAT_VECTORS,
485 {MRI.getType(MI.getOperand(0).getReg()), ConcatSrcTy}}))
486 return false;
487
488 return !Ops.empty();
489}
490
493 LLT SrcTy;
494 for (Register &Reg : Ops) {
495 if (Reg != 0)
496 SrcTy = MRI.getType(Reg);
497 }
498 assert(SrcTy.isValid() && "Unexpected full undef vector in concat combine");
499
500 Register UndefReg = 0;
501
502 for (Register &Reg : Ops) {
503 if (Reg == 0) {
504 if (UndefReg == 0)
505 UndefReg = Builder.buildUndef(SrcTy).getReg(0);
506 Reg = UndefReg;
507 }
508 }
509
510 if (Ops.size() > 1)
511 Builder.buildConcatVectors(MI.getOperand(0).getReg(), Ops);
512 else
513 Builder.buildCopy(MI.getOperand(0).getReg(), Ops[0]);
514 MI.eraseFromParent();
515}
516
521 return true;
522 }
523 return false;
524}
525
528 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
529 "Invalid instruction kind");
530 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
531 Register Src1 = MI.getOperand(1).getReg();
532 LLT SrcType = MRI.getType(Src1);
533
534 unsigned DstNumElts = DstType.getNumElements();
535 unsigned SrcNumElts = SrcType.getNumElements();
536
537 // If the resulting vector is smaller than the size of the source
538 // vectors being concatenated, we won't be able to replace the
539 // shuffle vector into a concat_vectors.
540 //
541 // Note: We may still be able to produce a concat_vectors fed by
542 // extract_vector_elt and so on. It is less clear that would
543 // be better though, so don't bother for now.
544 //
545 // If the destination is a scalar, the size of the sources doesn't
546 // matter. we will lower the shuffle to a plain copy. This will
547 // work only if the source and destination have the same size. But
548 // that's covered by the next condition.
549 //
550 // TODO: If the size between the source and destination don't match
551 // we could still emit an extract vector element in that case.
552 if (DstNumElts < 2 * SrcNumElts)
553 return false;
554
555 // Check that the shuffle mask can be broken evenly between the
556 // different sources.
557 if (DstNumElts % SrcNumElts != 0)
558 return false;
559
560 // Mask length is a multiple of the source vector length.
561 // Check if the shuffle is some kind of concatenation of the input
562 // vectors.
563 unsigned NumConcat = DstNumElts / SrcNumElts;
564 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
565 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
566 for (unsigned i = 0; i != DstNumElts; ++i) {
567 int Idx = Mask[i];
568 // Undef value.
569 if (Idx < 0)
570 continue;
571 // Ensure the indices in each SrcType sized piece are sequential and that
572 // the same source is used for the whole piece.
573 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
574 (ConcatSrcs[i / SrcNumElts] >= 0 &&
575 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
576 return false;
577 // Remember which source this index came from.
578 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
579 }
580
581 // The shuffle is concatenating multiple vectors together.
582 // Collect the different operands for that.
583 Register UndefReg;
584 Register Src2 = MI.getOperand(2).getReg();
585 for (auto Src : ConcatSrcs) {
586 if (Src < 0) {
587 if (!UndefReg) {
588 Builder.setInsertPt(*MI.getParent(), MI);
589 UndefReg = Builder.buildUndef(SrcType).getReg(0);
590 }
591 Ops.push_back(UndefReg);
592 } else if (Src == 0)
593 Ops.push_back(Src1);
594 else
595 Ops.push_back(Src2);
596 }
597 return true;
598}
599
601 ArrayRef<Register> Ops) const {
602 Register DstReg = MI.getOperand(0).getReg();
603 Builder.setInsertPt(*MI.getParent(), MI);
604 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
605
606 if (Ops.size() == 1)
607 Builder.buildCopy(NewDstReg, Ops[0]);
608 else
609 Builder.buildMergeLikeInstr(NewDstReg, Ops);
610
611 replaceRegWith(MRI, DstReg, NewDstReg);
612 MI.eraseFromParent();
613}
614
615namespace {
616
617/// Select a preference between two uses. CurrentUse is the current preference
618/// while *ForCandidate is attributes of the candidate under consideration.
619PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
620 PreferredTuple &CurrentUse,
621 const LLT TyForCandidate,
622 unsigned OpcodeForCandidate,
623 MachineInstr *MIForCandidate) {
624 if (!CurrentUse.Ty.isValid()) {
625 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
626 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
627 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
628 return CurrentUse;
629 }
630
631 // We permit the extend to hoist through basic blocks but this is only
632 // sensible if the target has extending loads. If you end up lowering back
633 // into a load and extend during the legalizer then the end result is
634 // hoisting the extend up to the load.
635
636 // Prefer defined extensions to undefined extensions as these are more
637 // likely to reduce the number of instructions.
638 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
639 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
640 return CurrentUse;
641 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
642 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
643 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
644
645 // Prefer sign extensions to zero extensions as sign-extensions tend to be
646 // more expensive. Don't do this if the load is already a zero-extend load
647 // though, otherwise we'll rewrite a zero-extend load into a sign-extend
648 // later.
649 if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
650 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
651 OpcodeForCandidate == TargetOpcode::G_ZEXT)
652 return CurrentUse;
653 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
654 OpcodeForCandidate == TargetOpcode::G_SEXT)
655 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
656 }
657
658 // This is potentially target specific. We've chosen the largest type
659 // because G_TRUNC is usually free. One potential catch with this is that
660 // some targets have a reduced number of larger registers than smaller
661 // registers and this choice potentially increases the live-range for the
662 // larger value.
663 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
664 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
665 }
666 return CurrentUse;
667}
668
669/// Find a suitable place to insert some instructions and insert them. This
670/// function accounts for special cases like inserting before a PHI node.
671/// The current strategy for inserting before PHI's is to duplicate the
672/// instructions for each predecessor. However, while that's ok for G_TRUNC
673/// on most targets since it generally requires no code, other targets/cases may
674/// want to try harder to find a dominating block.
675static void InsertInsnsWithoutSideEffectsBeforeUse(
678 MachineOperand &UseMO)>
679 Inserter) {
680 MachineInstr &UseMI = *UseMO.getParent();
681
682 MachineBasicBlock *InsertBB = UseMI.getParent();
683
684 // If the use is a PHI then we want the predecessor block instead.
685 if (UseMI.isPHI()) {
686 MachineOperand *PredBB = std::next(&UseMO);
687 InsertBB = PredBB->getMBB();
688 }
689
690 // If the block is the same block as the def then we want to insert just after
691 // the def instead of at the start of the block.
692 if (InsertBB == DefMI.getParent()) {
694 Inserter(InsertBB, std::next(InsertPt), UseMO);
695 return;
696 }
697
698 // Otherwise we want the start of the BB
699 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
700}
701} // end anonymous namespace
702
704 PreferredTuple Preferred;
705 if (matchCombineExtendingLoads(MI, Preferred)) {
706 applyCombineExtendingLoads(MI, Preferred);
707 return true;
708 }
709 return false;
710}
711
712static unsigned getExtLoadOpcForExtend(unsigned ExtOpc) {
713 unsigned CandidateLoadOpc;
714 switch (ExtOpc) {
715 case TargetOpcode::G_ANYEXT:
716 CandidateLoadOpc = TargetOpcode::G_LOAD;
717 break;
718 case TargetOpcode::G_SEXT:
719 CandidateLoadOpc = TargetOpcode::G_SEXTLOAD;
720 break;
721 case TargetOpcode::G_ZEXT:
722 CandidateLoadOpc = TargetOpcode::G_ZEXTLOAD;
723 break;
724 default:
725 llvm_unreachable("Unexpected extend opc");
726 }
727 return CandidateLoadOpc;
728}
729
731 MachineInstr &MI, PreferredTuple &Preferred) const {
732 // We match the loads and follow the uses to the extend instead of matching
733 // the extends and following the def to the load. This is because the load
734 // must remain in the same position for correctness (unless we also add code
735 // to find a safe place to sink it) whereas the extend is freely movable.
736 // It also prevents us from duplicating the load for the volatile case or just
737 // for performance.
738 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
739 if (!LoadMI)
740 return false;
741
742 Register LoadReg = LoadMI->getDstReg();
743
744 LLT LoadValueTy = MRI.getType(LoadReg);
745 if (!LoadValueTy.isScalar())
746 return false;
747
748 // Most architectures are going to legalize <s8 loads into at least a 1 byte
749 // load, and the MMOs can only describe memory accesses in multiples of bytes.
750 // If we try to perform extload combining on those, we can end up with
751 // %a(s8) = extload %ptr (load 1 byte from %ptr)
752 // ... which is an illegal extload instruction.
753 if (LoadValueTy.getSizeInBits() < 8)
754 return false;
755
756 // For non power-of-2 types, they will very likely be legalized into multiple
757 // loads. Don't bother trying to match them into extending loads.
759 return false;
760
761 // Find the preferred type aside from the any-extends (unless it's the only
762 // one) and non-extending ops. We'll emit an extending load to that type and
763 // and emit a variant of (extend (trunc X)) for the others according to the
764 // relative type sizes. At the same time, pick an extend to use based on the
765 // extend involved in the chosen type.
766 unsigned PreferredOpcode =
767 isa<GLoad>(&MI)
768 ? TargetOpcode::G_ANYEXT
769 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
770 Preferred = {LLT(), PreferredOpcode, nullptr};
771 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
772 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
773 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
774 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
775 const auto &MMO = LoadMI->getMMO();
776 // Don't do anything for atomics.
777 if (MMO.isAtomic())
778 continue;
779 // Check for legality.
780 if (!isPreLegalize()) {
781 LegalityQuery::MemDesc MMDesc(MMO);
782 unsigned CandidateLoadOpc = getExtLoadOpcForExtend(UseMI.getOpcode());
783 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
784 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
785 if (LI->getAction({CandidateLoadOpc, {UseTy, SrcTy}, {MMDesc}})
786 .Action != LegalizeActions::Legal)
787 continue;
788 }
789 Preferred = ChoosePreferredUse(MI, Preferred,
790 MRI.getType(UseMI.getOperand(0).getReg()),
791 UseMI.getOpcode(), &UseMI);
792 }
793 }
794
795 // There were no extends
796 if (!Preferred.MI)
797 return false;
798 // It should be impossible to chose an extend without selecting a different
799 // type since by definition the result of an extend is larger.
800 assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
801
802 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
803 return true;
804}
805
807 MachineInstr &MI, PreferredTuple &Preferred) const {
808 // Rewrite the load to the chosen extending load.
809 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
810
811 // Inserter to insert a truncate back to the original type at a given point
812 // with some basic CSE to limit truncate duplication to one per BB.
814 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
815 MachineBasicBlock::iterator InsertBefore,
816 MachineOperand &UseMO) {
817 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
818 if (PreviouslyEmitted) {
819 Observer.changingInstr(*UseMO.getParent());
820 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
821 Observer.changedInstr(*UseMO.getParent());
822 return;
823 }
824
825 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
826 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
827 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
828 EmittedInsns[InsertIntoBB] = NewMI;
829 replaceRegOpWith(MRI, UseMO, NewDstReg);
830 };
831
832 Observer.changingInstr(MI);
833 unsigned LoadOpc = getExtLoadOpcForExtend(Preferred.ExtendOpcode);
834 MI.setDesc(Builder.getTII().get(LoadOpc));
835
836 // Rewrite all the uses to fix up the types.
837 auto &LoadValue = MI.getOperand(0);
839 llvm::make_pointer_range(MRI.use_operands(LoadValue.getReg())));
840
841 for (auto *UseMO : Uses) {
842 MachineInstr *UseMI = UseMO->getParent();
843
844 // If the extend is compatible with the preferred extend then we should fix
845 // up the type and extend so that it uses the preferred use.
846 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
847 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
848 Register UseDstReg = UseMI->getOperand(0).getReg();
849 MachineOperand &UseSrcMO = UseMI->getOperand(1);
850 const LLT UseDstTy = MRI.getType(UseDstReg);
851 if (UseDstReg != ChosenDstReg) {
852 if (Preferred.Ty == UseDstTy) {
853 // If the use has the same type as the preferred use, then merge
854 // the vregs and erase the extend. For example:
855 // %1:_(s8) = G_LOAD ...
856 // %2:_(s32) = G_SEXT %1(s8)
857 // %3:_(s32) = G_ANYEXT %1(s8)
858 // ... = ... %3(s32)
859 // rewrites to:
860 // %2:_(s32) = G_SEXTLOAD ...
861 // ... = ... %2(s32)
862 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
863 Observer.erasingInstr(*UseMO->getParent());
864 UseMO->getParent()->eraseFromParent();
865 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
866 // If the preferred size is smaller, then keep the extend but extend
867 // from the result of the extending load. For example:
868 // %1:_(s8) = G_LOAD ...
869 // %2:_(s32) = G_SEXT %1(s8)
870 // %3:_(s64) = G_ANYEXT %1(s8)
871 // ... = ... %3(s64)
872 /// rewrites to:
873 // %2:_(s32) = G_SEXTLOAD ...
874 // %3:_(s64) = G_ANYEXT %2:_(s32)
875 // ... = ... %3(s64)
876 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
877 } else {
878 // If the preferred size is large, then insert a truncate. For
879 // example:
880 // %1:_(s8) = G_LOAD ...
881 // %2:_(s64) = G_SEXT %1(s8)
882 // %3:_(s32) = G_ZEXT %1(s8)
883 // ... = ... %3(s32)
884 /// rewrites to:
885 // %2:_(s64) = G_SEXTLOAD ...
886 // %4:_(s8) = G_TRUNC %2:_(s32)
887 // %3:_(s64) = G_ZEXT %2:_(s8)
888 // ... = ... %3(s64)
889 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
890 InsertTruncAt);
891 }
892 continue;
893 }
894 // The use is (one of) the uses of the preferred use we chose earlier.
895 // We're going to update the load to def this value later so just erase
896 // the old extend.
897 Observer.erasingInstr(*UseMO->getParent());
898 UseMO->getParent()->eraseFromParent();
899 continue;
900 }
901
902 // The use isn't an extend. Truncate back to the type we originally loaded.
903 // This is free on many targets.
904 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
905 }
906
907 MI.getOperand(0).setReg(ChosenDstReg);
908 Observer.changedInstr(MI);
909}
910
912 BuildFnTy &MatchInfo) const {
913 assert(MI.getOpcode() == TargetOpcode::G_AND);
914
915 // If we have the following code:
916 // %mask = G_CONSTANT 255
917 // %ld = G_LOAD %ptr, (load s16)
918 // %and = G_AND %ld, %mask
919 //
920 // Try to fold it into
921 // %ld = G_ZEXTLOAD %ptr, (load s8)
922
923 Register Dst = MI.getOperand(0).getReg();
924 if (MRI.getType(Dst).isVector())
925 return false;
926
927 auto MaybeMask =
928 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
929 if (!MaybeMask)
930 return false;
931
932 APInt MaskVal = MaybeMask->Value;
933
934 if (!MaskVal.isMask())
935 return false;
936
937 Register SrcReg = MI.getOperand(1).getReg();
938 // Don't use getOpcodeDef() here since intermediate instructions may have
939 // multiple users.
940 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(MRI.getVRegDef(SrcReg));
941 if (!LoadMI || !MRI.hasOneNonDBGUse(LoadMI->getDstReg()))
942 return false;
943
944 Register LoadReg = LoadMI->getDstReg();
945 LLT RegTy = MRI.getType(LoadReg);
946 Register PtrReg = LoadMI->getPointerReg();
947 unsigned RegSize = RegTy.getSizeInBits();
948 LocationSize LoadSizeBits = LoadMI->getMemSizeInBits();
949 unsigned MaskSizeBits = MaskVal.countr_one();
950
951 // The mask may not be larger than the in-memory type, as it might cover sign
952 // extended bits
953 if (MaskSizeBits > LoadSizeBits.getValue())
954 return false;
955
956 // If the mask covers the whole destination register, there's nothing to
957 // extend
958 if (MaskSizeBits >= RegSize)
959 return false;
960
961 // Most targets cannot deal with loads of size < 8 and need to re-legalize to
962 // at least byte loads. Avoid creating such loads here
963 if (MaskSizeBits < 8 || !isPowerOf2_32(MaskSizeBits))
964 return false;
965
966 const MachineMemOperand &MMO = LoadMI->getMMO();
967 LegalityQuery::MemDesc MemDesc(MMO);
968
969 // Don't modify the memory access size if this is atomic/volatile, but we can
970 // still adjust the opcode to indicate the high bit behavior.
971 if (LoadMI->isSimple())
972 MemDesc.MemoryTy = LLT::scalar(MaskSizeBits);
973 else if (LoadSizeBits.getValue() > MaskSizeBits ||
974 LoadSizeBits.getValue() == RegSize)
975 return false;
976
977 // TODO: Could check if it's legal with the reduced or original memory size.
979 {TargetOpcode::G_ZEXTLOAD, {RegTy, MRI.getType(PtrReg)}, {MemDesc}}))
980 return false;
981
982 MatchInfo = [=](MachineIRBuilder &B) {
983 B.setInstrAndDebugLoc(*LoadMI);
984 auto &MF = B.getMF();
985 auto PtrInfo = MMO.getPointerInfo();
986 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, MemDesc.MemoryTy);
987 B.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, Dst, PtrReg, *NewMMO);
988 LoadMI->eraseFromParent();
989 };
990 return true;
991}
992
994 const MachineInstr &UseMI) const {
995 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
996 "shouldn't consider debug uses");
997 assert(DefMI.getParent() == UseMI.getParent());
998 if (&DefMI == &UseMI)
999 return true;
1000 const MachineBasicBlock &MBB = *DefMI.getParent();
1001 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
1002 return &MI == &DefMI || &MI == &UseMI;
1003 });
1004 if (DefOrUse == MBB.end())
1005 llvm_unreachable("Block must contain both DefMI and UseMI!");
1006 return &*DefOrUse == &DefMI;
1007}
1008
1010 const MachineInstr &UseMI) const {
1011 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
1012 "shouldn't consider debug uses");
1013 if (MDT)
1014 return MDT->dominates(&DefMI, &UseMI);
1015 else if (DefMI.getParent() != UseMI.getParent())
1016 return false;
1017
1018 return isPredecessor(DefMI, UseMI);
1019}
1020
1022 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1023 Register SrcReg = MI.getOperand(1).getReg();
1024 Register LoadUser = SrcReg;
1025
1026 if (MRI.getType(SrcReg).isVector())
1027 return false;
1028
1029 Register TruncSrc;
1030 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
1031 LoadUser = TruncSrc;
1032
1033 uint64_t SizeInBits = MI.getOperand(2).getImm();
1034 // If the source is a G_SEXTLOAD from the same bit width, then we don't
1035 // need any extend at all, just a truncate.
1036 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
1037 // If truncating more than the original extended value, abort.
1038 auto LoadSizeBits = LoadMI->getMemSizeInBits();
1039 if (TruncSrc &&
1040 MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits.getValue())
1041 return false;
1042 if (LoadSizeBits == SizeInBits)
1043 return true;
1044 }
1045 return false;
1046}
1047
1049 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1050 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1051 MI.eraseFromParent();
1052}
1053
1055 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1056 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1057
1058 Register DstReg = MI.getOperand(0).getReg();
1059 LLT RegTy = MRI.getType(DstReg);
1060
1061 // Only supports scalars for now.
1062 if (RegTy.isVector())
1063 return false;
1064
1065 Register SrcReg = MI.getOperand(1).getReg();
1066 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
1067 if (!LoadDef || !MRI.hasOneNonDBGUse(SrcReg))
1068 return false;
1069
1070 uint64_t MemBits = LoadDef->getMemSizeInBits().getValue();
1071
1072 // If the sign extend extends from a narrower width than the load's width,
1073 // then we can narrow the load width when we combine to a G_SEXTLOAD.
1074 // Avoid widening the load at all.
1075 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(), MemBits);
1076
1077 // Don't generate G_SEXTLOADs with a < 1 byte width.
1078 if (NewSizeBits < 8)
1079 return false;
1080 // Don't bother creating a non-power-2 sextload, it will likely be broken up
1081 // anyway for most targets.
1082 if (!isPowerOf2_32(NewSizeBits))
1083 return false;
1084
1085 const MachineMemOperand &MMO = LoadDef->getMMO();
1086 LegalityQuery::MemDesc MMDesc(MMO);
1087
1088 // Don't modify the memory access size if this is atomic/volatile, but we can
1089 // still adjust the opcode to indicate the high bit behavior.
1090 if (LoadDef->isSimple())
1091 MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
1092 else if (MemBits > NewSizeBits || MemBits == RegTy.getSizeInBits())
1093 return false;
1094
1095 // TODO: Could check if it's legal with the reduced or original memory size.
1096 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
1097 {MRI.getType(LoadDef->getDstReg()),
1098 MRI.getType(LoadDef->getPointerReg())},
1099 {MMDesc}}))
1100 return false;
1101
1102 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
1103 return true;
1104}
1105
1107 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) const {
1108 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
1109 Register LoadReg;
1110 unsigned ScalarSizeBits;
1111 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
1112 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
1113
1114 // If we have the following:
1115 // %ld = G_LOAD %ptr, (load 2)
1116 // %ext = G_SEXT_INREG %ld, 8
1117 // ==>
1118 // %ld = G_SEXTLOAD %ptr (load 1)
1119
1120 auto &MMO = LoadDef->getMMO();
1121 Builder.setInstrAndDebugLoc(*LoadDef);
1122 auto &MF = Builder.getMF();
1123 auto PtrInfo = MMO.getPointerInfo();
1124 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
1125 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
1126 LoadDef->getPointerReg(), *NewMMO);
1127 MI.eraseFromParent();
1128
1129 // Not all loads can be deleted, so make sure the old one is removed.
1130 LoadDef->eraseFromParent();
1131}
1132
1133/// Return true if 'MI' is a load or a store that may be fold it's address
1134/// operand into the load / store addressing mode.
1136 MachineRegisterInfo &MRI) {
1138 auto *MF = MI->getMF();
1139 auto *Addr = getOpcodeDef<GPtrAdd>(MI->getPointerReg(), MRI);
1140 if (!Addr)
1141 return false;
1142
1143 AM.HasBaseReg = true;
1144 if (auto CstOff = getIConstantVRegVal(Addr->getOffsetReg(), MRI))
1145 AM.BaseOffs = CstOff->getSExtValue(); // [reg +/- imm]
1146 else
1147 AM.Scale = 1; // [reg +/- reg]
1148
1149 return TLI.isLegalAddressingMode(
1150 MF->getDataLayout(), AM,
1151 getTypeForLLT(MI->getMMO().getMemoryType(),
1152 MF->getFunction().getContext()),
1153 MI->getMMO().getAddrSpace());
1154}
1155
1156static unsigned getIndexedOpc(unsigned LdStOpc) {
1157 switch (LdStOpc) {
1158 case TargetOpcode::G_LOAD:
1159 return TargetOpcode::G_INDEXED_LOAD;
1160 case TargetOpcode::G_STORE:
1161 return TargetOpcode::G_INDEXED_STORE;
1162 case TargetOpcode::G_ZEXTLOAD:
1163 return TargetOpcode::G_INDEXED_ZEXTLOAD;
1164 case TargetOpcode::G_SEXTLOAD:
1165 return TargetOpcode::G_INDEXED_SEXTLOAD;
1166 default:
1167 llvm_unreachable("Unexpected opcode");
1168 }
1169}
1170
1171bool CombinerHelper::isIndexedLoadStoreLegal(GLoadStore &LdSt) const {
1172 // Check for legality.
1173 LLT PtrTy = MRI.getType(LdSt.getPointerReg());
1174 LLT Ty = MRI.getType(LdSt.getReg(0));
1175 LLT MemTy = LdSt.getMMO().getMemoryType();
1177 {{MemTy, MemTy.getSizeInBits().getKnownMinValue(),
1179 unsigned IndexedOpc = getIndexedOpc(LdSt.getOpcode());
1180 SmallVector<LLT> OpTys;
1181 if (IndexedOpc == TargetOpcode::G_INDEXED_STORE)
1182 OpTys = {PtrTy, Ty, Ty};
1183 else
1184 OpTys = {Ty, PtrTy}; // For G_INDEXED_LOAD, G_INDEXED_[SZ]EXTLOAD
1185
1186 LegalityQuery Q(IndexedOpc, OpTys, MemDescrs);
1187 return isLegal(Q);
1188}
1189
1191 "post-index-use-threshold", cl::Hidden, cl::init(32),
1192 cl::desc("Number of uses of a base pointer to check before it is no longer "
1193 "considered for post-indexing."));
1194
1195bool CombinerHelper::findPostIndexCandidate(GLoadStore &LdSt, Register &Addr,
1197 bool &RematOffset) const {
1198 // We're looking for the following pattern, for either load or store:
1199 // %baseptr:_(p0) = ...
1200 // G_STORE %val(s64), %baseptr(p0)
1201 // %offset:_(s64) = G_CONSTANT i64 -256
1202 // %new_addr:_(p0) = G_PTR_ADD %baseptr, %offset(s64)
1203 const auto &TLI = getTargetLowering();
1204
1205 Register Ptr = LdSt.getPointerReg();
1206 // If the store is the only use, don't bother.
1207 if (MRI.hasOneNonDBGUse(Ptr))
1208 return false;
1209
1210 if (!isIndexedLoadStoreLegal(LdSt))
1211 return false;
1212
1213 if (getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Ptr, MRI))
1214 return false;
1215
1216 MachineInstr *StoredValDef = getDefIgnoringCopies(LdSt.getReg(0), MRI);
1217 auto *PtrDef = MRI.getVRegDef(Ptr);
1218
1219 unsigned NumUsesChecked = 0;
1220 for (auto &Use : MRI.use_nodbg_instructions(Ptr)) {
1221 if (++NumUsesChecked > PostIndexUseThreshold)
1222 return false; // Try to avoid exploding compile time.
1223
1224 auto *PtrAdd = dyn_cast<GPtrAdd>(&Use);
1225 // The use itself might be dead. This can happen during combines if DCE
1226 // hasn't had a chance to run yet. Don't allow it to form an indexed op.
1227 if (!PtrAdd || MRI.use_nodbg_empty(PtrAdd->getReg(0)))
1228 continue;
1229
1230 // Check the user of this isn't the store, otherwise we'd be generate a
1231 // indexed store defining its own use.
1232 if (StoredValDef == &Use)
1233 continue;
1234
1235 Offset = PtrAdd->getOffsetReg();
1236 if (!ForceLegalIndexing &&
1237 !TLI.isIndexingLegal(LdSt, PtrAdd->getBaseReg(), Offset,
1238 /*IsPre*/ false, MRI))
1239 continue;
1240
1241 // Make sure the offset calculation is before the potentially indexed op.
1242 MachineInstr *OffsetDef = MRI.getVRegDef(Offset);
1243 RematOffset = false;
1244 if (!dominates(*OffsetDef, LdSt)) {
1245 // If the offset however is just a G_CONSTANT, we can always just
1246 // rematerialize it where we need it.
1247 if (OffsetDef->getOpcode() != TargetOpcode::G_CONSTANT)
1248 continue;
1249 RematOffset = true;
1250 }
1251
1252 for (auto &BasePtrUse : MRI.use_nodbg_instructions(PtrAdd->getBaseReg())) {
1253 if (&BasePtrUse == PtrDef)
1254 continue;
1255
1256 // If the user is a later load/store that can be post-indexed, then don't
1257 // combine this one.
1258 auto *BasePtrLdSt = dyn_cast<GLoadStore>(&BasePtrUse);
1259 if (BasePtrLdSt && BasePtrLdSt != &LdSt &&
1260 dominates(LdSt, *BasePtrLdSt) &&
1261 isIndexedLoadStoreLegal(*BasePtrLdSt))
1262 return false;
1263
1264 // Now we're looking for the key G_PTR_ADD instruction, which contains
1265 // the offset add that we want to fold.
1266 if (auto *BasePtrUseDef = dyn_cast<GPtrAdd>(&BasePtrUse)) {
1267 Register PtrAddDefReg = BasePtrUseDef->getReg(0);
1268 for (auto &BaseUseUse : MRI.use_nodbg_instructions(PtrAddDefReg)) {
1269 // If the use is in a different block, then we may produce worse code
1270 // due to the extra register pressure.
1271 if (BaseUseUse.getParent() != LdSt.getParent())
1272 return false;
1273
1274 if (auto *UseUseLdSt = dyn_cast<GLoadStore>(&BaseUseUse))
1275 if (canFoldInAddressingMode(UseUseLdSt, TLI, MRI))
1276 return false;
1277 }
1278 if (!dominates(LdSt, BasePtrUse))
1279 return false; // All use must be dominated by the load/store.
1280 }
1281 }
1282
1283 Addr = PtrAdd->getReg(0);
1284 Base = PtrAdd->getBaseReg();
1285 return true;
1286 }
1287
1288 return false;
1289}
1290
1291bool CombinerHelper::findPreIndexCandidate(GLoadStore &LdSt, Register &Addr,
1292 Register &Base,
1293 Register &Offset) const {
1294 auto &MF = *LdSt.getParent()->getParent();
1295 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1296
1297 Addr = LdSt.getPointerReg();
1298 if (!mi_match(Addr, MRI, m_GPtrAdd(m_Reg(Base), m_Reg(Offset))) ||
1299 MRI.hasOneNonDBGUse(Addr))
1300 return false;
1301
1302 if (!ForceLegalIndexing &&
1303 !TLI.isIndexingLegal(LdSt, Base, Offset, /*IsPre*/ true, MRI))
1304 return false;
1305
1306 if (!isIndexedLoadStoreLegal(LdSt))
1307 return false;
1308
1309 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
1310 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
1311 return false;
1312
1313 if (auto *St = dyn_cast<GStore>(&LdSt)) {
1314 // Would require a copy.
1315 if (Base == St->getValueReg())
1316 return false;
1317
1318 // We're expecting one use of Addr in MI, but it could also be the
1319 // value stored, which isn't actually dominated by the instruction.
1320 if (St->getValueReg() == Addr)
1321 return false;
1322 }
1323
1324 // Avoid increasing cross-block register pressure.
1325 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr))
1326 if (AddrUse.getParent() != LdSt.getParent())
1327 return false;
1328
1329 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
1330 // That might allow us to end base's liveness here by adjusting the constant.
1331 bool RealUse = false;
1332 for (auto &AddrUse : MRI.use_nodbg_instructions(Addr)) {
1333 if (!dominates(LdSt, AddrUse))
1334 return false; // All use must be dominated by the load/store.
1335
1336 // If Ptr may be folded in addressing mode of other use, then it's
1337 // not profitable to do this transformation.
1338 if (auto *UseLdSt = dyn_cast<GLoadStore>(&AddrUse)) {
1339 if (!canFoldInAddressingMode(UseLdSt, TLI, MRI))
1340 RealUse = true;
1341 } else {
1342 RealUse = true;
1343 }
1344 }
1345 return RealUse;
1346}
1347
1349 MachineInstr &MI, BuildFnTy &MatchInfo) const {
1350 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
1351
1352 // Check if there is a load that defines the vector being extracted from.
1353 auto *LoadMI = getOpcodeDef<GLoad>(MI.getOperand(1).getReg(), MRI);
1354 if (!LoadMI)
1355 return false;
1356
1357 Register Vector = MI.getOperand(1).getReg();
1358 LLT VecEltTy = MRI.getType(Vector).getElementType();
1359
1360 assert(MRI.getType(MI.getOperand(0).getReg()) == VecEltTy);
1361
1362 // Checking whether we should reduce the load width.
1363 if (!MRI.hasOneNonDBGUse(Vector))
1364 return false;
1365
1366 // Check if the defining load is simple.
1367 if (!LoadMI->isSimple())
1368 return false;
1369
1370 // If the vector element type is not a multiple of a byte then we are unable
1371 // to correctly compute an address to load only the extracted element as a
1372 // scalar.
1373 if (!VecEltTy.isByteSized())
1374 return false;
1375
1376 // Check for load fold barriers between the extraction and the load.
1377 if (MI.getParent() != LoadMI->getParent())
1378 return false;
1379 const unsigned MaxIter = 20;
1380 unsigned Iter = 0;
1381 for (auto II = LoadMI->getIterator(), IE = MI.getIterator(); II != IE; ++II) {
1382 if (II->isLoadFoldBarrier())
1383 return false;
1384 if (Iter++ == MaxIter)
1385 return false;
1386 }
1387
1388 // Check if the new load that we are going to create is legal
1389 // if we are in the post-legalization phase.
1390 MachineMemOperand MMO = LoadMI->getMMO();
1391 Align Alignment = MMO.getAlign();
1392 MachinePointerInfo PtrInfo;
1394
1395 // Finding the appropriate PtrInfo if offset is a known constant.
1396 // This is required to create the memory operand for the narrowed load.
1397 // This machine memory operand object helps us infer about legality
1398 // before we proceed to combine the instruction.
1399 if (auto CVal = getIConstantVRegVal(Vector, MRI)) {
1400 int Elt = CVal->getZExtValue();
1401 // FIXME: should be (ABI size)*Elt.
1402 Offset = VecEltTy.getSizeInBits() * Elt / 8;
1403 PtrInfo = MMO.getPointerInfo().getWithOffset(Offset);
1404 } else {
1405 // Discard the pointer info except the address space because the memory
1406 // operand can't represent this new access since the offset is variable.
1407 Offset = VecEltTy.getSizeInBits() / 8;
1409 }
1410
1411 Alignment = commonAlignment(Alignment, Offset);
1412
1413 Register VecPtr = LoadMI->getPointerReg();
1414 LLT PtrTy = MRI.getType(VecPtr);
1415
1416 MachineFunction &MF = *MI.getMF();
1417 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, VecEltTy);
1418
1419 LegalityQuery::MemDesc MMDesc(*NewMMO);
1420
1422 {TargetOpcode::G_LOAD, {VecEltTy, PtrTy}, {MMDesc}}))
1423 return false;
1424
1425 // Load must be allowed and fast on the target.
1427 auto &DL = MF.getDataLayout();
1428 unsigned Fast = 0;
1429 if (!getTargetLowering().allowsMemoryAccess(C, DL, VecEltTy, *NewMMO,
1430 &Fast) ||
1431 !Fast)
1432 return false;
1433
1434 Register Result = MI.getOperand(0).getReg();
1435 Register Index = MI.getOperand(2).getReg();
1436
1437 MatchInfo = [=](MachineIRBuilder &B) {
1438 GISelObserverWrapper DummyObserver;
1439 LegalizerHelper Helper(B.getMF(), DummyObserver, B);
1440 //// Get pointer to the vector element.
1441 Register finalPtr = Helper.getVectorElementPointer(
1442 LoadMI->getPointerReg(), MRI.getType(LoadMI->getOperand(0).getReg()),
1443 Index);
1444 // New G_LOAD instruction.
1445 B.buildLoad(Result, finalPtr, PtrInfo, Alignment);
1446 // Remove original GLOAD instruction.
1447 LoadMI->eraseFromParent();
1448 };
1449
1450 return true;
1451}
1452
1454 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1455 auto &LdSt = cast<GLoadStore>(MI);
1456
1457 if (LdSt.isAtomic())
1458 return false;
1459
1460 MatchInfo.IsPre = findPreIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1461 MatchInfo.Offset);
1462 if (!MatchInfo.IsPre &&
1463 !findPostIndexCandidate(LdSt, MatchInfo.Addr, MatchInfo.Base,
1464 MatchInfo.Offset, MatchInfo.RematOffset))
1465 return false;
1466
1467 return true;
1468}
1469
1471 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const {
1472 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
1473 unsigned Opcode = MI.getOpcode();
1474 bool IsStore = Opcode == TargetOpcode::G_STORE;
1475 unsigned NewOpcode = getIndexedOpc(Opcode);
1476
1477 // If the offset constant didn't happen to dominate the load/store, we can
1478 // just clone it as needed.
1479 if (MatchInfo.RematOffset) {
1480 auto *OldCst = MRI.getVRegDef(MatchInfo.Offset);
1481 auto NewCst = Builder.buildConstant(MRI.getType(MatchInfo.Offset),
1482 *OldCst->getOperand(1).getCImm());
1483 MatchInfo.Offset = NewCst.getReg(0);
1484 }
1485
1486 auto MIB = Builder.buildInstr(NewOpcode);
1487 if (IsStore) {
1488 MIB.addDef(MatchInfo.Addr);
1489 MIB.addUse(MI.getOperand(0).getReg());
1490 } else {
1491 MIB.addDef(MI.getOperand(0).getReg());
1492 MIB.addDef(MatchInfo.Addr);
1493 }
1494
1495 MIB.addUse(MatchInfo.Base);
1496 MIB.addUse(MatchInfo.Offset);
1497 MIB.addImm(MatchInfo.IsPre);
1498 MIB->cloneMemRefs(*MI.getMF(), MI);
1499 MI.eraseFromParent();
1500 AddrDef.eraseFromParent();
1501
1502 LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
1503}
1504
1506 MachineInstr *&OtherMI) const {
1507 unsigned Opcode = MI.getOpcode();
1508 bool IsDiv, IsSigned;
1509
1510 switch (Opcode) {
1511 default:
1512 llvm_unreachable("Unexpected opcode!");
1513 case TargetOpcode::G_SDIV:
1514 case TargetOpcode::G_UDIV: {
1515 IsDiv = true;
1516 IsSigned = Opcode == TargetOpcode::G_SDIV;
1517 break;
1518 }
1519 case TargetOpcode::G_SREM:
1520 case TargetOpcode::G_UREM: {
1521 IsDiv = false;
1522 IsSigned = Opcode == TargetOpcode::G_SREM;
1523 break;
1524 }
1525 }
1526
1527 Register Src1 = MI.getOperand(1).getReg();
1528 unsigned DivOpcode, RemOpcode, DivremOpcode;
1529 if (IsSigned) {
1530 DivOpcode = TargetOpcode::G_SDIV;
1531 RemOpcode = TargetOpcode::G_SREM;
1532 DivremOpcode = TargetOpcode::G_SDIVREM;
1533 } else {
1534 DivOpcode = TargetOpcode::G_UDIV;
1535 RemOpcode = TargetOpcode::G_UREM;
1536 DivremOpcode = TargetOpcode::G_UDIVREM;
1537 }
1538
1539 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1540 return false;
1541
1542 // Combine:
1543 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1544 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1545 // into:
1546 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1547
1548 // Combine:
1549 // %rem:_ = G_[SU]REM %src1:_, %src2:_
1550 // %div:_ = G_[SU]DIV %src1:_, %src2:_
1551 // into:
1552 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1553
1554 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1555 if (MI.getParent() == UseMI.getParent() &&
1556 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1557 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1558 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2)) &&
1559 matchEqualDefs(MI.getOperand(1), UseMI.getOperand(1))) {
1560 OtherMI = &UseMI;
1561 return true;
1562 }
1563 }
1564
1565 return false;
1566}
1567
1569 MachineInstr *&OtherMI) const {
1570 unsigned Opcode = MI.getOpcode();
1571 assert(OtherMI && "OtherMI shouldn't be empty.");
1572
1573 Register DestDivReg, DestRemReg;
1574 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1575 DestDivReg = MI.getOperand(0).getReg();
1576 DestRemReg = OtherMI->getOperand(0).getReg();
1577 } else {
1578 DestDivReg = OtherMI->getOperand(0).getReg();
1579 DestRemReg = MI.getOperand(0).getReg();
1580 }
1581
1582 bool IsSigned =
1583 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1584
1585 // Check which instruction is first in the block so we don't break def-use
1586 // deps by "moving" the instruction incorrectly. Also keep track of which
1587 // instruction is first so we pick it's operands, avoiding use-before-def
1588 // bugs.
1589 MachineInstr *FirstInst = dominates(MI, *OtherMI) ? &MI : OtherMI;
1590 Builder.setInstrAndDebugLoc(*FirstInst);
1591
1592 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1593 : TargetOpcode::G_UDIVREM,
1594 {DestDivReg, DestRemReg},
1595 { FirstInst->getOperand(1), FirstInst->getOperand(2) });
1596 MI.eraseFromParent();
1597 OtherMI->eraseFromParent();
1598}
1599
1601 MachineInstr &MI, MachineInstr *&BrCond) const {
1602 assert(MI.getOpcode() == TargetOpcode::G_BR);
1603
1604 // Try to match the following:
1605 // bb1:
1606 // G_BRCOND %c1, %bb2
1607 // G_BR %bb3
1608 // bb2:
1609 // ...
1610 // bb3:
1611
1612 // The above pattern does not have a fall through to the successor bb2, always
1613 // resulting in a branch no matter which path is taken. Here we try to find
1614 // and replace that pattern with conditional branch to bb3 and otherwise
1615 // fallthrough to bb2. This is generally better for branch predictors.
1616
1617 MachineBasicBlock *MBB = MI.getParent();
1619 if (BrIt == MBB->begin())
1620 return false;
1621 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1622
1623 BrCond = &*std::prev(BrIt);
1624 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1625 return false;
1626
1627 // Check that the next block is the conditional branch target. Also make sure
1628 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1629 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1630 return BrCondTarget != MI.getOperand(0).getMBB() &&
1631 MBB->isLayoutSuccessor(BrCondTarget);
1632}
1633
1635 MachineInstr &MI, MachineInstr *&BrCond) const {
1636 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1637 Builder.setInstrAndDebugLoc(*BrCond);
1638 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1639 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1640 // this to i1 only since we might not know for sure what kind of
1641 // compare generated the condition value.
1642 auto True = Builder.buildConstant(
1643 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1644 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1645
1646 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1647 Observer.changingInstr(MI);
1648 MI.getOperand(0).setMBB(FallthroughBB);
1649 Observer.changedInstr(MI);
1650
1651 // Change the conditional branch to use the inverted condition and
1652 // new target block.
1653 Observer.changingInstr(*BrCond);
1654 BrCond->getOperand(0).setReg(Xor.getReg(0));
1655 BrCond->getOperand(1).setMBB(BrTarget);
1656 Observer.changedInstr(*BrCond);
1657}
1658
1660 MachineIRBuilder HelperBuilder(MI);
1661 GISelObserverWrapper DummyObserver;
1662 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1663 return Helper.lowerMemcpyInline(MI) ==
1665}
1666
1668 unsigned MaxLen) const {
1669 MachineIRBuilder HelperBuilder(MI);
1670 GISelObserverWrapper DummyObserver;
1671 LegalizerHelper Helper(HelperBuilder.getMF(), DummyObserver, HelperBuilder);
1672 return Helper.lowerMemCpyFamily(MI, MaxLen) ==
1674}
1675
1677 const MachineRegisterInfo &MRI,
1678 const APFloat &Val) {
1679 APFloat Result(Val);
1680 switch (MI.getOpcode()) {
1681 default:
1682 llvm_unreachable("Unexpected opcode!");
1683 case TargetOpcode::G_FNEG: {
1684 Result.changeSign();
1685 return Result;
1686 }
1687 case TargetOpcode::G_FABS: {
1688 Result.clearSign();
1689 return Result;
1690 }
1691 case TargetOpcode::G_FCEIL:
1692 Result.roundToIntegral(APFloat::rmTowardPositive);
1693 return Result;
1694 case TargetOpcode::G_FFLOOR:
1695 Result.roundToIntegral(APFloat::rmTowardNegative);
1696 return Result;
1697 case TargetOpcode::G_INTRINSIC_TRUNC:
1698 Result.roundToIntegral(APFloat::rmTowardZero);
1699 return Result;
1700 case TargetOpcode::G_INTRINSIC_ROUND:
1701 Result.roundToIntegral(APFloat::rmNearestTiesToAway);
1702 return Result;
1703 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
1704 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1705 return Result;
1706 case TargetOpcode::G_FRINT:
1707 case TargetOpcode::G_FNEARBYINT:
1708 // Use default rounding mode (round to nearest, ties to even)
1709 Result.roundToIntegral(APFloat::rmNearestTiesToEven);
1710 return Result;
1711 case TargetOpcode::G_FPEXT:
1712 case TargetOpcode::G_FPTRUNC: {
1713 bool Unused;
1714 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1716 &Unused);
1717 return Result;
1718 }
1719 case TargetOpcode::G_FSQRT: {
1720 bool Unused;
1722 &Unused);
1723 Result = APFloat(sqrt(Result.convertToDouble()));
1724 break;
1725 }
1726 case TargetOpcode::G_FLOG2: {
1727 bool Unused;
1729 &Unused);
1730 Result = APFloat(log2(Result.convertToDouble()));
1731 break;
1732 }
1733 }
1734 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1735 // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
1736 // `G_FLOG2` reach here.
1737 bool Unused;
1738 Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
1739 return Result;
1740}
1741
1743 MachineInstr &MI, const ConstantFP *Cst) const {
1744 APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
1745 const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
1746 Builder.buildFConstant(MI.getOperand(0), *NewCst);
1747 MI.eraseFromParent();
1748}
1749
1751 PtrAddChain &MatchInfo) const {
1752 // We're trying to match the following pattern:
1753 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1754 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1755 // -->
1756 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1757
1758 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1759 return false;
1760
1761 Register Add2 = MI.getOperand(1).getReg();
1762 Register Imm1 = MI.getOperand(2).getReg();
1763 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1764 if (!MaybeImmVal)
1765 return false;
1766
1767 MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1768 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1769 return false;
1770
1771 Register Base = Add2Def->getOperand(1).getReg();
1772 Register Imm2 = Add2Def->getOperand(2).getReg();
1773 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1774 if (!MaybeImm2Val)
1775 return false;
1776
1777 // Check if the new combined immediate forms an illegal addressing mode.
1778 // Do not combine if it was legal before but would get illegal.
1779 // To do so, we need to find a load/store user of the pointer to get
1780 // the access type.
1781 Type *AccessTy = nullptr;
1782 auto &MF = *MI.getMF();
1783 for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1784 if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1785 AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1786 MF.getFunction().getContext());
1787 break;
1788 }
1789 }
1791 APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1792 AMNew.BaseOffs = CombinedImm.getSExtValue();
1793 if (AccessTy) {
1794 AMNew.HasBaseReg = true;
1796 AMOld.BaseOffs = MaybeImmVal->Value.getSExtValue();
1797 AMOld.HasBaseReg = true;
1798 unsigned AS = MRI.getType(Add2).getAddressSpace();
1799 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1800 if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1801 !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1802 return false;
1803 }
1804
1805 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
1806 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
1807 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
1808 // largest signed integer that fits into the index type, which is the maximum
1809 // size of allocated objects according to the IR Language Reference.
1810 unsigned PtrAddFlags = MI.getFlags();
1811 unsigned LHSPtrAddFlags = Add2Def->getFlags();
1812 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
1813 bool IsInBounds =
1814 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
1815 unsigned Flags = 0;
1816 if (IsNoUWrap)
1818 if (IsInBounds) {
1821 }
1822
1823 // Pass the combined immediate to the apply function.
1824 MatchInfo.Imm = AMNew.BaseOffs;
1825 MatchInfo.Base = Base;
1826 MatchInfo.Bank = getRegBank(Imm2);
1827 MatchInfo.Flags = Flags;
1828 return true;
1829}
1830
1832 PtrAddChain &MatchInfo) const {
1833 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1834 MachineIRBuilder MIB(MI);
1835 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1836 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1837 setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1838 Observer.changingInstr(MI);
1839 MI.getOperand(1).setReg(MatchInfo.Base);
1840 MI.getOperand(2).setReg(NewOffset.getReg(0));
1841 MI.setFlags(MatchInfo.Flags);
1842 Observer.changedInstr(MI);
1843}
1844
1846 RegisterImmPair &MatchInfo) const {
1847 // We're trying to match the following pattern with any of
1848 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1849 // %t1 = SHIFT %base, G_CONSTANT imm1
1850 // %root = SHIFT %t1, G_CONSTANT imm2
1851 // -->
1852 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1853
1854 unsigned Opcode = MI.getOpcode();
1855 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1856 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1857 Opcode == TargetOpcode::G_USHLSAT) &&
1858 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1859
1860 Register Shl2 = MI.getOperand(1).getReg();
1861 Register Imm1 = MI.getOperand(2).getReg();
1862 auto MaybeImmVal = getIConstantVRegValWithLookThrough(Imm1, MRI);
1863 if (!MaybeImmVal)
1864 return false;
1865
1866 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1867 if (Shl2Def->getOpcode() != Opcode)
1868 return false;
1869
1870 Register Base = Shl2Def->getOperand(1).getReg();
1871 Register Imm2 = Shl2Def->getOperand(2).getReg();
1872 auto MaybeImm2Val = getIConstantVRegValWithLookThrough(Imm2, MRI);
1873 if (!MaybeImm2Val)
1874 return false;
1875
1876 // Pass the combined immediate to the apply function.
1877 MatchInfo.Imm =
1878 (MaybeImmVal->Value.getZExtValue() + MaybeImm2Val->Value).getZExtValue();
1879 MatchInfo.Reg = Base;
1880
1881 // There is no simple replacement for a saturating unsigned left shift that
1882 // exceeds the scalar size.
1883 if (Opcode == TargetOpcode::G_USHLSAT &&
1884 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1885 return false;
1886
1887 return true;
1888}
1889
1891 RegisterImmPair &MatchInfo) const {
1892 unsigned Opcode = MI.getOpcode();
1893 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1894 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1895 Opcode == TargetOpcode::G_USHLSAT) &&
1896 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1897
1898 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1899 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1900 auto Imm = MatchInfo.Imm;
1901
1902 if (Imm >= ScalarSizeInBits) {
1903 // Any logical shift that exceeds scalar size will produce zero.
1904 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1905 Builder.buildConstant(MI.getOperand(0), 0);
1906 MI.eraseFromParent();
1907 return;
1908 }
1909 // Arithmetic shift and saturating signed left shift have no effect beyond
1910 // scalar size.
1911 Imm = ScalarSizeInBits - 1;
1912 }
1913
1914 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1915 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1916 Observer.changingInstr(MI);
1917 MI.getOperand(1).setReg(MatchInfo.Reg);
1918 MI.getOperand(2).setReg(NewImm);
1919 Observer.changedInstr(MI);
1920}
1921
1923 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
1924 // We're trying to match the following pattern with any of
1925 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1926 // with any of G_AND/G_OR/G_XOR logic instructions.
1927 // %t1 = SHIFT %X, G_CONSTANT C0
1928 // %t2 = LOGIC %t1, %Y
1929 // %root = SHIFT %t2, G_CONSTANT C1
1930 // -->
1931 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1932 // %t4 = SHIFT %Y, G_CONSTANT C1
1933 // %root = LOGIC %t3, %t4
1934 unsigned ShiftOpcode = MI.getOpcode();
1935 assert((ShiftOpcode == TargetOpcode::G_SHL ||
1936 ShiftOpcode == TargetOpcode::G_ASHR ||
1937 ShiftOpcode == TargetOpcode::G_LSHR ||
1938 ShiftOpcode == TargetOpcode::G_USHLSAT ||
1939 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1940 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1941
1942 // Match a one-use bitwise logic op.
1943 Register LogicDest = MI.getOperand(1).getReg();
1944 if (!MRI.hasOneNonDBGUse(LogicDest))
1945 return false;
1946
1947 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1948 unsigned LogicOpcode = LogicMI->getOpcode();
1949 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1950 LogicOpcode != TargetOpcode::G_XOR)
1951 return false;
1952
1953 // Find a matching one-use shift by constant.
1954 const Register C1 = MI.getOperand(2).getReg();
1955 auto MaybeImmVal = getIConstantVRegValWithLookThrough(C1, MRI);
1956 if (!MaybeImmVal || MaybeImmVal->Value == 0)
1957 return false;
1958
1959 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1960
1961 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1962 // Shift should match previous one and should be a one-use.
1963 if (MI->getOpcode() != ShiftOpcode ||
1964 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1965 return false;
1966
1967 // Must be a constant.
1968 auto MaybeImmVal =
1969 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1970 if (!MaybeImmVal)
1971 return false;
1972
1973 ShiftVal = MaybeImmVal->Value.getSExtValue();
1974 return true;
1975 };
1976
1977 // Logic ops are commutative, so check each operand for a match.
1978 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1979 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1980 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1981 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1982 uint64_t C0Val;
1983
1984 if (matchFirstShift(LogicMIOp1, C0Val)) {
1985 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1986 MatchInfo.Shift2 = LogicMIOp1;
1987 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1988 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1989 MatchInfo.Shift2 = LogicMIOp2;
1990 } else
1991 return false;
1992
1993 MatchInfo.ValSum = C0Val + C1Val;
1994
1995 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1996 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1997 return false;
1998
1999 MatchInfo.Logic = LogicMI;
2000 return true;
2001}
2002
2004 MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const {
2005 unsigned Opcode = MI.getOpcode();
2006 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
2007 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
2008 Opcode == TargetOpcode::G_SSHLSAT) &&
2009 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
2010
2011 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
2012 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
2013
2014 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
2015
2016 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
2017 Register Shift1 =
2018 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
2019
2020 // If LogicNonShiftReg is the same to Shift1Base, and shift1 const is the same
2021 // to MatchInfo.Shift2 const, CSEMIRBuilder will reuse the old shift1 when
2022 // build shift2. So, if we erase MatchInfo.Shift2 at the end, actually we
2023 // remove old shift1. And it will cause crash later. So erase it earlier to
2024 // avoid the crash.
2025 MatchInfo.Shift2->eraseFromParent();
2026
2027 Register Shift2Const = MI.getOperand(2).getReg();
2028 Register Shift2 = Builder
2029 .buildInstr(Opcode, {DestType},
2030 {MatchInfo.LogicNonShiftReg, Shift2Const})
2031 .getReg(0);
2032
2033 Register Dest = MI.getOperand(0).getReg();
2034 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
2035
2036 // This was one use so it's safe to remove it.
2037 MatchInfo.Logic->eraseFromParent();
2038
2039 MI.eraseFromParent();
2040}
2041
2043 BuildFnTy &MatchInfo) const {
2044 assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
2045 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2046 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2047 auto &Shl = cast<GenericMachineInstr>(MI);
2048 Register DstReg = Shl.getReg(0);
2049 Register SrcReg = Shl.getReg(1);
2050 Register ShiftReg = Shl.getReg(2);
2051 Register X, C1;
2052
2053 if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
2054 return false;
2055
2056 if (!mi_match(SrcReg, MRI,
2058 m_GOr(m_Reg(X), m_Reg(C1))))))
2059 return false;
2060
2061 APInt C1Val, C2Val;
2062 if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
2063 !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
2064 return false;
2065
2066 auto *SrcDef = MRI.getVRegDef(SrcReg);
2067 assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
2068 SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
2069 LLT SrcTy = MRI.getType(SrcReg);
2070 MatchInfo = [=](MachineIRBuilder &B) {
2071 auto S1 = B.buildShl(SrcTy, X, ShiftReg);
2072 auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
2073 B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
2074 };
2075 return true;
2076}
2077
2079 LshrOfTruncOfLshr &MatchInfo,
2080 MachineInstr &ShiftMI) const {
2081 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2082
2083 Register N0 = MI.getOperand(1).getReg();
2084 Register N1 = MI.getOperand(2).getReg();
2085 unsigned OpSizeInBits = MRI.getType(N0).getScalarSizeInBits();
2086
2087 APInt N1C, N001C;
2088 if (!mi_match(N1, MRI, m_ICstOrSplat(N1C)))
2089 return false;
2090 auto N001 = ShiftMI.getOperand(2).getReg();
2091 if (!mi_match(N001, MRI, m_ICstOrSplat(N001C)))
2092 return false;
2093
2094 if (N001C.getBitWidth() > N1C.getBitWidth())
2095 N1C = N1C.zext(N001C.getBitWidth());
2096 else
2097 N001C = N001C.zext(N1C.getBitWidth());
2098
2099 Register InnerShift = ShiftMI.getOperand(0).getReg();
2100 LLT InnerShiftTy = MRI.getType(InnerShift);
2101 uint64_t InnerShiftSize = InnerShiftTy.getScalarSizeInBits();
2102 if ((N1C + N001C).ult(InnerShiftSize)) {
2103 MatchInfo.Src = ShiftMI.getOperand(1).getReg();
2104 MatchInfo.ShiftAmt = N1C + N001C;
2105 MatchInfo.ShiftAmtTy = MRI.getType(N001);
2106 MatchInfo.InnerShiftTy = InnerShiftTy;
2107
2108 if ((N001C + OpSizeInBits) == InnerShiftSize)
2109 return true;
2110 if (MRI.hasOneUse(N0) && MRI.hasOneUse(InnerShift)) {
2111 MatchInfo.Mask = true;
2112 MatchInfo.MaskVal = APInt(N1C.getBitWidth(), OpSizeInBits) - N1C;
2113 return true;
2114 }
2115 }
2116 return false;
2117}
2118
2120 MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const {
2121 assert(MI.getOpcode() == TargetOpcode::G_LSHR && "Expected a G_LSHR");
2122
2123 Register Dst = MI.getOperand(0).getReg();
2124 auto ShiftAmt =
2125 Builder.buildConstant(MatchInfo.ShiftAmtTy, MatchInfo.ShiftAmt);
2126 auto Shift =
2127 Builder.buildLShr(MatchInfo.InnerShiftTy, MatchInfo.Src, ShiftAmt);
2128 if (MatchInfo.Mask == true) {
2129 APInt MaskVal =
2131 MatchInfo.MaskVal.getZExtValue());
2132 auto Mask = Builder.buildConstant(MatchInfo.InnerShiftTy, MaskVal);
2133 auto And = Builder.buildAnd(MatchInfo.InnerShiftTy, Shift, Mask);
2134 Builder.buildTrunc(Dst, And);
2135 } else
2136 Builder.buildTrunc(Dst, Shift);
2137 MI.eraseFromParent();
2138}
2139
2141 unsigned &ShiftVal) const {
2142 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2143 auto MaybeImmVal =
2144 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2145 if (!MaybeImmVal)
2146 return false;
2147
2148 ShiftVal = MaybeImmVal->Value.exactLogBase2();
2149 return (static_cast<int32_t>(ShiftVal) != -1);
2150}
2151
2153 unsigned &ShiftVal) const {
2154 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2155 MachineIRBuilder MIB(MI);
2156 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2157 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2158 Observer.changingInstr(MI);
2159 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2160 MI.getOperand(2).setReg(ShiftCst.getReg(0));
2161 if (ShiftVal == ShiftTy.getScalarSizeInBits() - 1)
2163 Observer.changedInstr(MI);
2164}
2165
2167 BuildFnTy &MatchInfo) const {
2168 GSub &Sub = cast<GSub>(MI);
2169
2170 LLT Ty = MRI.getType(Sub.getReg(0));
2171
2172 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {Ty}}))
2173 return false;
2174
2176 return false;
2177
2178 APInt Imm = getIConstantFromReg(Sub.getRHSReg(), MRI);
2179
2180 MatchInfo = [=, &MI](MachineIRBuilder &B) {
2181 auto NegCst = B.buildConstant(Ty, -Imm);
2182 Observer.changingInstr(MI);
2183 MI.setDesc(B.getTII().get(TargetOpcode::G_ADD));
2184 MI.getOperand(2).setReg(NegCst.getReg(0));
2186 if (Imm.isMinSignedValue())
2188 Observer.changedInstr(MI);
2189 };
2190 return true;
2191}
2192
2193// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2195 RegisterImmPair &MatchData) const {
2196 assert(MI.getOpcode() == TargetOpcode::G_SHL && VT);
2197 if (!getTargetLowering().isDesirableToPullExtFromShl(MI))
2198 return false;
2199
2200 Register LHS = MI.getOperand(1).getReg();
2201
2202 Register ExtSrc;
2203 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2204 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2205 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2206 return false;
2207
2208 Register RHS = MI.getOperand(2).getReg();
2209 MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
2210 auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
2211 if (!MaybeShiftAmtVal)
2212 return false;
2213
2214 if (LI) {
2215 LLT SrcTy = MRI.getType(ExtSrc);
2216
2217 // We only really care about the legality with the shifted value. We can
2218 // pick any type the constant shift amount, so ask the target what to
2219 // use. Otherwise we would have to guess and hope it is reported as legal.
2220 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2221 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2222 return false;
2223 }
2224
2225 int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
2226 MatchData.Reg = ExtSrc;
2227 MatchData.Imm = ShiftAmt;
2228
2229 unsigned MinLeadingZeros = VT->getKnownZeroes(ExtSrc).countl_one();
2230 unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
2231 return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
2232}
2233
2235 MachineInstr &MI, const RegisterImmPair &MatchData) const {
2236 Register ExtSrcReg = MatchData.Reg;
2237 int64_t ShiftAmtVal = MatchData.Imm;
2238
2239 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2240 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2241 auto NarrowShift =
2242 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2243 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2244 MI.eraseFromParent();
2245}
2246
2248 Register &MatchInfo) const {
2250 SmallVector<Register, 16> MergedValues;
2251 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2252 MergedValues.emplace_back(Merge.getSourceReg(I));
2253
2254 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2255 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2256 return false;
2257
2258 for (unsigned I = 0; I < MergedValues.size(); ++I)
2259 if (MergedValues[I] != Unmerge->getReg(I))
2260 return false;
2261
2262 MatchInfo = Unmerge->getSourceReg();
2263 return true;
2264}
2265
2267 const MachineRegisterInfo &MRI) {
2268 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2269 ;
2270
2271 return Reg;
2272}
2273
2275 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2276 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2277 "Expected an unmerge");
2278 auto &Unmerge = cast<GUnmerge>(MI);
2279 Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2280
2281 auto *SrcInstr = getOpcodeDef<GMergeLikeInstr>(SrcReg, MRI);
2282 if (!SrcInstr)
2283 return false;
2284
2285 // Check the source type of the merge.
2286 LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2287 LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2288 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2289 if (SrcMergeTy != Dst0Ty && !SameSize)
2290 return false;
2291 // They are the same now (modulo a bitcast).
2292 // We can collect all the src registers.
2293 for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2294 Operands.push_back(SrcInstr->getSourceReg(Idx));
2295 return true;
2296}
2297
2299 MachineInstr &MI, SmallVectorImpl<Register> &Operands) const {
2300 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2301 "Expected an unmerge");
2302 assert((MI.getNumOperands() - 1 == Operands.size()) &&
2303 "Not enough operands to replace all defs");
2304 unsigned NumElems = MI.getNumOperands() - 1;
2305
2306 LLT SrcTy = MRI.getType(Operands[0]);
2307 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2308 bool CanReuseInputDirectly = DstTy == SrcTy;
2309 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2310 Register DstReg = MI.getOperand(Idx).getReg();
2311 Register SrcReg = Operands[Idx];
2312
2313 // This combine may run after RegBankSelect, so we need to be aware of
2314 // register banks.
2315 const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
2316 if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
2317 SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
2318 MRI.setRegClassOrRegBank(SrcReg, DstCB);
2319 }
2320
2321 if (CanReuseInputDirectly)
2322 replaceRegWith(MRI, DstReg, SrcReg);
2323 else
2324 Builder.buildCast(DstReg, SrcReg);
2325 }
2326 MI.eraseFromParent();
2327}
2328
2330 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2331 unsigned SrcIdx = MI.getNumOperands() - 1;
2332 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2333 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2334 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2335 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2336 return false;
2337 // Break down the big constant in smaller ones.
2338 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2339 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2340 ? CstVal.getCImm()->getValue()
2341 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2342
2343 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2344 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2345 // Unmerge a constant.
2346 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2347 Csts.emplace_back(Val.trunc(ShiftAmt));
2348 Val = Val.lshr(ShiftAmt);
2349 }
2350
2351 return true;
2352}
2353
2355 MachineInstr &MI, SmallVectorImpl<APInt> &Csts) const {
2356 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2357 "Expected an unmerge");
2358 assert((MI.getNumOperands() - 1 == Csts.size()) &&
2359 "Not enough operands to replace all defs");
2360 unsigned NumElems = MI.getNumOperands() - 1;
2361 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2362 Register DstReg = MI.getOperand(Idx).getReg();
2363 Builder.buildConstant(DstReg, Csts[Idx]);
2364 }
2365
2366 MI.eraseFromParent();
2367}
2368
2371 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
2372 unsigned SrcIdx = MI.getNumOperands() - 1;
2373 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2374 MatchInfo = [&MI](MachineIRBuilder &B) {
2375 unsigned NumElems = MI.getNumOperands() - 1;
2376 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2377 Register DstReg = MI.getOperand(Idx).getReg();
2378 B.buildUndef(DstReg);
2379 }
2380 };
2381 return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
2382}
2383
2385 MachineInstr &MI) const {
2386 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2387 "Expected an unmerge");
2388 if (MRI.getType(MI.getOperand(0).getReg()).isVector() ||
2389 MRI.getType(MI.getOperand(MI.getNumDefs()).getReg()).isVector())
2390 return false;
2391 // Check that all the lanes are dead except the first one.
2392 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2393 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2394 return false;
2395 }
2396 return true;
2397}
2398
2400 MachineInstr &MI) const {
2401 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2402 Register Dst0Reg = MI.getOperand(0).getReg();
2403 Builder.buildTrunc(Dst0Reg, SrcReg);
2404 MI.eraseFromParent();
2405}
2406
2408 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2409 "Expected an unmerge");
2410 Register Dst0Reg = MI.getOperand(0).getReg();
2411 LLT Dst0Ty = MRI.getType(Dst0Reg);
2412 // G_ZEXT on vector applies to each lane, so it will
2413 // affect all destinations. Therefore we won't be able
2414 // to simplify the unmerge to just the first definition.
2415 if (Dst0Ty.isVector())
2416 return false;
2417 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2418 LLT SrcTy = MRI.getType(SrcReg);
2419 if (SrcTy.isVector())
2420 return false;
2421
2422 Register ZExtSrcReg;
2423 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2424 return false;
2425
2426 // Finally we can replace the first definition with
2427 // a zext of the source if the definition is big enough to hold
2428 // all of ZExtSrc bits.
2429 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2430 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2431}
2432
2434 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2435 "Expected an unmerge");
2436
2437 Register Dst0Reg = MI.getOperand(0).getReg();
2438
2439 MachineInstr *ZExtInstr =
2440 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2441 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2442 "Expecting a G_ZEXT");
2443
2444 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2445 LLT Dst0Ty = MRI.getType(Dst0Reg);
2446 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2447
2448 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2449 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2450 } else {
2451 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2452 "ZExt src doesn't fit in destination");
2453 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2454 }
2455
2456 Register ZeroReg;
2457 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2458 if (!ZeroReg)
2459 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2460 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2461 }
2462 MI.eraseFromParent();
2463}
2464
2466 unsigned TargetShiftSize,
2467 unsigned &ShiftVal) const {
2468 assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2469 MI.getOpcode() == TargetOpcode::G_LSHR ||
2470 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2471
2472 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2473 if (Ty.isVector()) // TODO:
2474 return false;
2475
2476 // Don't narrow further than the requested size.
2477 unsigned Size = Ty.getSizeInBits();
2478 if (Size <= TargetShiftSize)
2479 return false;
2480
2481 auto MaybeImmVal =
2482 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2483 if (!MaybeImmVal)
2484 return false;
2485
2486 ShiftVal = MaybeImmVal->Value.getSExtValue();
2487 return ShiftVal >= Size / 2 && ShiftVal < Size;
2488}
2489
2491 MachineInstr &MI, const unsigned &ShiftVal) const {
2492 Register DstReg = MI.getOperand(0).getReg();
2493 Register SrcReg = MI.getOperand(1).getReg();
2494 LLT Ty = MRI.getType(SrcReg);
2495 unsigned Size = Ty.getSizeInBits();
2496 unsigned HalfSize = Size / 2;
2497 assert(ShiftVal >= HalfSize);
2498
2499 LLT HalfTy = LLT::scalar(HalfSize);
2500
2501 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2502 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2503
2504 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2505 Register Narrowed = Unmerge.getReg(1);
2506
2507 // dst = G_LSHR s64:x, C for C >= 32
2508 // =>
2509 // lo, hi = G_UNMERGE_VALUES x
2510 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2511
2512 if (NarrowShiftAmt != 0) {
2513 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2514 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2515 }
2516
2517 auto Zero = Builder.buildConstant(HalfTy, 0);
2518 Builder.buildMergeLikeInstr(DstReg, {Narrowed, Zero});
2519 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2520 Register Narrowed = Unmerge.getReg(0);
2521 // dst = G_SHL s64:x, C for C >= 32
2522 // =>
2523 // lo, hi = G_UNMERGE_VALUES x
2524 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2525 if (NarrowShiftAmt != 0) {
2526 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2527 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2528 }
2529
2530 auto Zero = Builder.buildConstant(HalfTy, 0);
2531 Builder.buildMergeLikeInstr(DstReg, {Zero, Narrowed});
2532 } else {
2533 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2534 auto Hi = Builder.buildAShr(
2535 HalfTy, Unmerge.getReg(1),
2536 Builder.buildConstant(HalfTy, HalfSize - 1));
2537
2538 if (ShiftVal == HalfSize) {
2539 // (G_ASHR i64:x, 32) ->
2540 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2541 Builder.buildMergeLikeInstr(DstReg, {Unmerge.getReg(1), Hi});
2542 } else if (ShiftVal == Size - 1) {
2543 // Don't need a second shift.
2544 // (G_ASHR i64:x, 63) ->
2545 // %narrowed = (G_ASHR hi_32(x), 31)
2546 // G_MERGE_VALUES %narrowed, %narrowed
2547 Builder.buildMergeLikeInstr(DstReg, {Hi, Hi});
2548 } else {
2549 auto Lo = Builder.buildAShr(
2550 HalfTy, Unmerge.getReg(1),
2551 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2552
2553 // (G_ASHR i64:x, C) ->, for C >= 32
2554 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2555 Builder.buildMergeLikeInstr(DstReg, {Lo, Hi});
2556 }
2557 }
2558
2559 MI.eraseFromParent();
2560}
2561
2563 MachineInstr &MI, unsigned TargetShiftAmount) const {
2564 unsigned ShiftAmt;
2565 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2566 applyCombineShiftToUnmerge(MI, ShiftAmt);
2567 return true;
2568 }
2569
2570 return false;
2571}
2572
2574 Register &Reg) const {
2575 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2576 Register DstReg = MI.getOperand(0).getReg();
2577 LLT DstTy = MRI.getType(DstReg);
2578 Register SrcReg = MI.getOperand(1).getReg();
2579 return mi_match(SrcReg, MRI,
2580 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2581}
2582
2584 Register &Reg) const {
2585 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2586 Register DstReg = MI.getOperand(0).getReg();
2587 Builder.buildCopy(DstReg, Reg);
2588 MI.eraseFromParent();
2589}
2590
2592 Register &Reg) const {
2593 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2594 Register DstReg = MI.getOperand(0).getReg();
2595 Builder.buildZExtOrTrunc(DstReg, Reg);
2596 MI.eraseFromParent();
2597}
2598
2600 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2601 assert(MI.getOpcode() == TargetOpcode::G_ADD);
2602 Register LHS = MI.getOperand(1).getReg();
2603 Register RHS = MI.getOperand(2).getReg();
2604 LLT IntTy = MRI.getType(LHS);
2605
2606 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2607 // instruction.
2608 PtrReg.second = false;
2609 for (Register SrcReg : {LHS, RHS}) {
2610 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2611 // Don't handle cases where the integer is implicitly converted to the
2612 // pointer width.
2613 LLT PtrTy = MRI.getType(PtrReg.first);
2614 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2615 return true;
2616 }
2617
2618 PtrReg.second = true;
2619 }
2620
2621 return false;
2622}
2623
2625 MachineInstr &MI, std::pair<Register, bool> &PtrReg) const {
2626 Register Dst = MI.getOperand(0).getReg();
2627 Register LHS = MI.getOperand(1).getReg();
2628 Register RHS = MI.getOperand(2).getReg();
2629
2630 const bool DoCommute = PtrReg.second;
2631 if (DoCommute)
2632 std::swap(LHS, RHS);
2633 LHS = PtrReg.first;
2634
2635 LLT PtrTy = MRI.getType(LHS);
2636
2637 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2638 Builder.buildPtrToInt(Dst, PtrAdd);
2639 MI.eraseFromParent();
2640}
2641
2643 APInt &NewCst) const {
2644 auto &PtrAdd = cast<GPtrAdd>(MI);
2645 Register LHS = PtrAdd.getBaseReg();
2646 Register RHS = PtrAdd.getOffsetReg();
2647 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2648
2649 if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
2650 APInt Cst;
2651 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2652 auto DstTy = MRI.getType(PtrAdd.getReg(0));
2653 // G_INTTOPTR uses zero-extension
2654 NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
2655 NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
2656 return true;
2657 }
2658 }
2659
2660 return false;
2661}
2662
2664 APInt &NewCst) const {
2665 auto &PtrAdd = cast<GPtrAdd>(MI);
2666 Register Dst = PtrAdd.getReg(0);
2667
2668 Builder.buildConstant(Dst, NewCst);
2669 PtrAdd.eraseFromParent();
2670}
2671
2673 Register &Reg) const {
2674 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2675 Register DstReg = MI.getOperand(0).getReg();
2676 Register SrcReg = MI.getOperand(1).getReg();
2677 Register OriginalSrcReg = getSrcRegIgnoringCopies(SrcReg, MRI);
2678 if (OriginalSrcReg.isValid())
2679 SrcReg = OriginalSrcReg;
2680 LLT DstTy = MRI.getType(DstReg);
2681 return mi_match(SrcReg, MRI,
2682 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2683 canReplaceReg(DstReg, Reg, MRI);
2684}
2685
2687 Register &Reg) const {
2688 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2689 Register DstReg = MI.getOperand(0).getReg();
2690 Register SrcReg = MI.getOperand(1).getReg();
2691 LLT DstTy = MRI.getType(DstReg);
2692 if (mi_match(SrcReg, MRI,
2693 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2694 canReplaceReg(DstReg, Reg, MRI)) {
2695 unsigned DstSize = DstTy.getScalarSizeInBits();
2696 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2697 return VT->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2698 }
2699 return false;
2700}
2701
2703 const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
2704 const unsigned TruncSize = TruncTy.getScalarSizeInBits();
2705
2706 // ShiftTy > 32 > TruncTy -> 32
2707 if (ShiftSize > 32 && TruncSize < 32)
2708 return ShiftTy.changeElementSize(32);
2709
2710 // TODO: We could also reduce to 16 bits, but that's more target-dependent.
2711 // Some targets like it, some don't, some only like it under certain
2712 // conditions/processor versions, etc.
2713 // A TL hook might be needed for this.
2714
2715 // Don't combine
2716 return ShiftTy;
2717}
2718
2720 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2721 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2722 Register DstReg = MI.getOperand(0).getReg();
2723 Register SrcReg = MI.getOperand(1).getReg();
2724
2725 if (!MRI.hasOneNonDBGUse(SrcReg))
2726 return false;
2727
2728 LLT SrcTy = MRI.getType(SrcReg);
2729 LLT DstTy = MRI.getType(DstReg);
2730
2731 MachineInstr *SrcMI = getDefIgnoringCopies(SrcReg, MRI);
2732 const auto &TL = getTargetLowering();
2733
2734 LLT NewShiftTy;
2735 switch (SrcMI->getOpcode()) {
2736 default:
2737 return false;
2738 case TargetOpcode::G_SHL: {
2739 NewShiftTy = DstTy;
2740
2741 // Make sure new shift amount is legal.
2742 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2743 if (Known.getMaxValue().uge(NewShiftTy.getScalarSizeInBits()))
2744 return false;
2745 break;
2746 }
2747 case TargetOpcode::G_LSHR:
2748 case TargetOpcode::G_ASHR: {
2749 // For right shifts, we conservatively do not do the transform if the TRUNC
2750 // has any STORE users. The reason is that if we change the type of the
2751 // shift, we may break the truncstore combine.
2752 //
2753 // TODO: Fix truncstore combine to handle (trunc(lshr (trunc x), k)).
2754 for (auto &User : MRI.use_instructions(DstReg))
2755 if (User.getOpcode() == TargetOpcode::G_STORE)
2756 return false;
2757
2758 NewShiftTy = getMidVTForTruncRightShiftCombine(SrcTy, DstTy);
2759 if (NewShiftTy == SrcTy)
2760 return false;
2761
2762 // Make sure we won't lose information by truncating the high bits.
2763 KnownBits Known = VT->getKnownBits(SrcMI->getOperand(2).getReg());
2764 if (Known.getMaxValue().ugt(NewShiftTy.getScalarSizeInBits() -
2765 DstTy.getScalarSizeInBits()))
2766 return false;
2767 break;
2768 }
2769 }
2770
2772 {SrcMI->getOpcode(),
2773 {NewShiftTy, TL.getPreferredShiftAmountTy(NewShiftTy)}}))
2774 return false;
2775
2776 MatchInfo = std::make_pair(SrcMI, NewShiftTy);
2777 return true;
2778}
2779
2781 MachineInstr &MI, std::pair<MachineInstr *, LLT> &MatchInfo) const {
2782 MachineInstr *ShiftMI = MatchInfo.first;
2783 LLT NewShiftTy = MatchInfo.second;
2784
2785 Register Dst = MI.getOperand(0).getReg();
2786 LLT DstTy = MRI.getType(Dst);
2787
2788 Register ShiftAmt = ShiftMI->getOperand(2).getReg();
2789 Register ShiftSrc = ShiftMI->getOperand(1).getReg();
2790 ShiftSrc = Builder.buildTrunc(NewShiftTy, ShiftSrc).getReg(0);
2791
2792 Register NewShift =
2793 Builder
2794 .buildInstr(ShiftMI->getOpcode(), {NewShiftTy}, {ShiftSrc, ShiftAmt})
2795 .getReg(0);
2796
2797 if (NewShiftTy == DstTy)
2798 replaceRegWith(MRI, Dst, NewShift);
2799 else
2800 Builder.buildTrunc(Dst, NewShift);
2801
2802 eraseInst(MI);
2803}
2804
2806 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2807 return MO.isReg() &&
2808 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2809 });
2810}
2811
2813 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2814 return !MO.isReg() ||
2815 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2816 });
2817}
2818
2820 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2821 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2822 return all_of(Mask, [](int Elt) { return Elt < 0; });
2823}
2824
2826 assert(MI.getOpcode() == TargetOpcode::G_STORE);
2827 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2828 MRI);
2829}
2830
2832 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2833 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2834 MRI);
2835}
2836
2838 MachineInstr &MI) const {
2839 assert((MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT ||
2840 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT) &&
2841 "Expected an insert/extract element op");
2842 LLT VecTy = MRI.getType(MI.getOperand(1).getReg());
2843 if (VecTy.isScalableVector())
2844 return false;
2845
2846 unsigned IdxIdx =
2847 MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
2848 auto Idx = getIConstantVRegVal(MI.getOperand(IdxIdx).getReg(), MRI);
2849 if (!Idx)
2850 return false;
2851 return Idx->getZExtValue() >= VecTy.getNumElements();
2852}
2853
2855 unsigned &OpIdx) const {
2856 GSelect &SelMI = cast<GSelect>(MI);
2857 auto Cst =
2858 isConstantOrConstantSplatVector(*MRI.getVRegDef(SelMI.getCondReg()), MRI);
2859 if (!Cst)
2860 return false;
2861 OpIdx = Cst->isZero() ? 3 : 2;
2862 return true;
2863}
2864
2865void CombinerHelper::eraseInst(MachineInstr &MI) const { MI.eraseFromParent(); }
2866
2868 const MachineOperand &MOP2) const {
2869 if (!MOP1.isReg() || !MOP2.isReg())
2870 return false;
2871 auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2872 if (!InstAndDef1)
2873 return false;
2874 auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2875 if (!InstAndDef2)
2876 return false;
2877 MachineInstr *I1 = InstAndDef1->MI;
2878 MachineInstr *I2 = InstAndDef2->MI;
2879
2880 // Handle a case like this:
2881 //
2882 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2883 //
2884 // Even though %0 and %1 are produced by the same instruction they are not
2885 // the same values.
2886 if (I1 == I2)
2887 return MOP1.getReg() == MOP2.getReg();
2888
2889 // If we have an instruction which loads or stores, we can't guarantee that
2890 // it is identical.
2891 //
2892 // For example, we may have
2893 //
2894 // %x1 = G_LOAD %addr (load N from @somewhere)
2895 // ...
2896 // call @foo
2897 // ...
2898 // %x2 = G_LOAD %addr (load N from @somewhere)
2899 // ...
2900 // %or = G_OR %x1, %x2
2901 //
2902 // It's possible that @foo will modify whatever lives at the address we're
2903 // loading from. To be safe, let's just assume that all loads and stores
2904 // are different (unless we have something which is guaranteed to not
2905 // change.)
2906 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad())
2907 return false;
2908
2909 // If both instructions are loads or stores, they are equal only if both
2910 // are dereferenceable invariant loads with the same number of bits.
2911 if (I1->mayLoadOrStore() && I2->mayLoadOrStore()) {
2914 if (!LS1 || !LS2)
2915 return false;
2916
2917 if (!I2->isDereferenceableInvariantLoad() ||
2918 (LS1->getMemSizeInBits() != LS2->getMemSizeInBits()))
2919 return false;
2920 }
2921
2922 // Check for physical registers on the instructions first to avoid cases
2923 // like this:
2924 //
2925 // %a = COPY $physreg
2926 // ...
2927 // SOMETHING implicit-def $physreg
2928 // ...
2929 // %b = COPY $physreg
2930 //
2931 // These copies are not equivalent.
2932 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2933 return MO.isReg() && MO.getReg().isPhysical();
2934 })) {
2935 // Check if we have a case like this:
2936 //
2937 // %a = COPY $physreg
2938 // %b = COPY %a
2939 //
2940 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2941 // From that, we know that they must have the same value, since they must
2942 // have come from the same COPY.
2943 return I1->isIdenticalTo(*I2);
2944 }
2945
2946 // We don't have any physical registers, so we don't necessarily need the
2947 // same vreg defs.
2948 //
2949 // On the off-chance that there's some target instruction feeding into the
2950 // instruction, let's use produceSameValue instead of isIdenticalTo.
2951 if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2952 // Handle instructions with multiple defs that produce same values. Values
2953 // are same for operands with same index.
2954 // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2955 // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2956 // I1 and I2 are different instructions but produce same values,
2957 // %1 and %6 are same, %1 and %7 are not the same value.
2958 return I1->findRegisterDefOperandIdx(InstAndDef1->Reg, /*TRI=*/nullptr) ==
2959 I2->findRegisterDefOperandIdx(InstAndDef2->Reg, /*TRI=*/nullptr);
2960 }
2961 return false;
2962}
2963
2965 int64_t C) const {
2966 if (!MOP.isReg())
2967 return false;
2968 auto *MI = MRI.getVRegDef(MOP.getReg());
2969 auto MaybeCst = isConstantOrConstantSplatVector(*MI, MRI);
2970 return MaybeCst && MaybeCst->getBitWidth() <= 64 &&
2971 MaybeCst->getSExtValue() == C;
2972}
2973
2975 double C) const {
2976 if (!MOP.isReg())
2977 return false;
2978 std::optional<FPValueAndVReg> MaybeCst;
2979 if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
2980 return false;
2981
2982 return MaybeCst->Value.isExactlyValue(C);
2983}
2984
2986 unsigned OpIdx) const {
2987 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2988 Register OldReg = MI.getOperand(0).getReg();
2989 Register Replacement = MI.getOperand(OpIdx).getReg();
2990 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2991 replaceRegWith(MRI, OldReg, Replacement);
2992 MI.eraseFromParent();
2993}
2994
2996 Register Replacement) const {
2997 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2998 Register OldReg = MI.getOperand(0).getReg();
2999 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
3000 replaceRegWith(MRI, OldReg, Replacement);
3001 MI.eraseFromParent();
3002}
3003
3005 unsigned ConstIdx) const {
3006 Register ConstReg = MI.getOperand(ConstIdx).getReg();
3007 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3008
3009 // Get the shift amount
3010 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3011 if (!VRegAndVal)
3012 return false;
3013
3014 // Return true of shift amount >= Bitwidth
3015 return (VRegAndVal->Value.uge(DstTy.getSizeInBits()));
3016}
3017
3019 assert((MI.getOpcode() == TargetOpcode::G_FSHL ||
3020 MI.getOpcode() == TargetOpcode::G_FSHR) &&
3021 "This is not a funnel shift operation");
3022
3023 Register ConstReg = MI.getOperand(3).getReg();
3024 LLT ConstTy = MRI.getType(ConstReg);
3025 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3026
3027 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstReg, MRI);
3028 assert((VRegAndVal) && "Value is not a constant");
3029
3030 // Calculate the new Shift Amount = Old Shift Amount % BitWidth
3031 APInt NewConst = VRegAndVal->Value.urem(
3032 APInt(ConstTy.getSizeInBits(), DstTy.getScalarSizeInBits()));
3033
3034 auto NewConstInstr = Builder.buildConstant(ConstTy, NewConst.getZExtValue());
3035 Builder.buildInstr(
3036 MI.getOpcode(), {MI.getOperand(0)},
3037 {MI.getOperand(1), MI.getOperand(2), NewConstInstr.getReg(0)});
3038
3039 MI.eraseFromParent();
3040}
3041
3043 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
3044 // Match (cond ? x : x)
3045 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
3046 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
3047 MRI);
3048}
3049
3051 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
3052 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
3053 MRI);
3054}
3055
3057 unsigned OpIdx) const {
3058 MachineOperand &MO = MI.getOperand(OpIdx);
3059 return MO.isReg() &&
3060 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
3061}
3062
3064 unsigned OpIdx) const {
3065 MachineOperand &MO = MI.getOperand(OpIdx);
3066 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, VT);
3067}
3068
3070 double C) const {
3071 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3072 Builder.buildFConstant(MI.getOperand(0), C);
3073 MI.eraseFromParent();
3074}
3075
3077 int64_t C) const {
3078 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3079 Builder.buildConstant(MI.getOperand(0), C);
3080 MI.eraseFromParent();
3081}
3082
3084 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3085 Builder.buildConstant(MI.getOperand(0), C);
3086 MI.eraseFromParent();
3087}
3088
3090 ConstantFP *CFP) const {
3091 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3092 Builder.buildFConstant(MI.getOperand(0), CFP->getValueAPF());
3093 MI.eraseFromParent();
3094}
3095
3097 assert(MI.getNumDefs() == 1 && "Expected only one def?");
3098 Builder.buildUndef(MI.getOperand(0));
3099 MI.eraseFromParent();
3100}
3101
3103 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3104 Register LHS = MI.getOperand(1).getReg();
3105 Register RHS = MI.getOperand(2).getReg();
3106 Register &NewLHS = std::get<0>(MatchInfo);
3107 Register &NewRHS = std::get<1>(MatchInfo);
3108
3109 // Helper lambda to check for opportunities for
3110 // ((0-A) + B) -> B - A
3111 // (A + (0-B)) -> A - B
3112 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
3113 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
3114 return false;
3115 NewLHS = MaybeNewLHS;
3116 return true;
3117 };
3118
3119 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
3120}
3121
3123 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3124 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
3125 "Invalid opcode");
3126 Register DstReg = MI.getOperand(0).getReg();
3127 LLT DstTy = MRI.getType(DstReg);
3128 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
3129
3130 if (DstTy.isScalableVector())
3131 return false;
3132
3133 unsigned NumElts = DstTy.getNumElements();
3134 // If this MI is part of a sequence of insert_vec_elts, then
3135 // don't do the combine in the middle of the sequence.
3136 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
3137 TargetOpcode::G_INSERT_VECTOR_ELT)
3138 return false;
3139 MachineInstr *CurrInst = &MI;
3140 MachineInstr *TmpInst;
3141 int64_t IntImm;
3142 Register TmpReg;
3143 MatchInfo.resize(NumElts);
3144 while (mi_match(
3145 CurrInst->getOperand(0).getReg(), MRI,
3146 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
3147 if (IntImm >= NumElts || IntImm < 0)
3148 return false;
3149 if (!MatchInfo[IntImm])
3150 MatchInfo[IntImm] = TmpReg;
3151 CurrInst = TmpInst;
3152 }
3153 // Variable index.
3154 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
3155 return false;
3156 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
3157 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
3158 if (!MatchInfo[I - 1].isValid())
3159 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
3160 }
3161 return true;
3162 }
3163 // If we didn't end in a G_IMPLICIT_DEF and the source is not fully
3164 // overwritten, bail out.
3165 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
3166 all_of(MatchInfo, [](Register Reg) { return !!Reg; });
3167}
3168
3170 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) const {
3171 Register UndefReg;
3172 auto GetUndef = [&]() {
3173 if (UndefReg)
3174 return UndefReg;
3175 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3176 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
3177 return UndefReg;
3178 };
3179 for (Register &Reg : MatchInfo) {
3180 if (!Reg)
3181 Reg = GetUndef();
3182 }
3183 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
3184 MI.eraseFromParent();
3185}
3186
3188 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) const {
3189 Register SubLHS, SubRHS;
3190 std::tie(SubLHS, SubRHS) = MatchInfo;
3191 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
3192 MI.eraseFromParent();
3193}
3194
3196 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3197 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
3198 //
3199 // Creates the new hand + logic instruction (but does not insert them.)
3200 //
3201 // On success, MatchInfo is populated with the new instructions. These are
3202 // inserted in applyHoistLogicOpWithSameOpcodeHands.
3203 unsigned LogicOpcode = MI.getOpcode();
3204 assert(LogicOpcode == TargetOpcode::G_AND ||
3205 LogicOpcode == TargetOpcode::G_OR ||
3206 LogicOpcode == TargetOpcode::G_XOR);
3207 MachineIRBuilder MIB(MI);
3208 Register Dst = MI.getOperand(0).getReg();
3209 Register LHSReg = MI.getOperand(1).getReg();
3210 Register RHSReg = MI.getOperand(2).getReg();
3211
3212 // Don't recompute anything.
3213 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
3214 return false;
3215
3216 // Make sure we have (hand x, ...), (hand y, ...)
3217 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
3218 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
3219 if (!LeftHandInst || !RightHandInst)
3220 return false;
3221 unsigned HandOpcode = LeftHandInst->getOpcode();
3222 if (HandOpcode != RightHandInst->getOpcode())
3223 return false;
3224 if (LeftHandInst->getNumOperands() < 2 ||
3225 !LeftHandInst->getOperand(1).isReg() ||
3226 RightHandInst->getNumOperands() < 2 ||
3227 !RightHandInst->getOperand(1).isReg())
3228 return false;
3229
3230 // Make sure the types match up, and if we're doing this post-legalization,
3231 // we end up with legal types.
3232 Register X = LeftHandInst->getOperand(1).getReg();
3233 Register Y = RightHandInst->getOperand(1).getReg();
3234 LLT XTy = MRI.getType(X);
3235 LLT YTy = MRI.getType(Y);
3236 if (!XTy.isValid() || XTy != YTy)
3237 return false;
3238
3239 // Optional extra source register.
3240 Register ExtraHandOpSrcReg;
3241 switch (HandOpcode) {
3242 default:
3243 return false;
3244 case TargetOpcode::G_ANYEXT:
3245 case TargetOpcode::G_SEXT:
3246 case TargetOpcode::G_ZEXT: {
3247 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3248 break;
3249 }
3250 case TargetOpcode::G_TRUNC: {
3251 // Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
3252 const MachineFunction *MF = MI.getMF();
3253 LLVMContext &Ctx = MF->getFunction().getContext();
3254
3255 LLT DstTy = MRI.getType(Dst);
3256 const TargetLowering &TLI = getTargetLowering();
3257
3258 // Be extra careful sinking truncate. If it's free, there's no benefit in
3259 // widening a binop.
3260 if (TLI.isZExtFree(DstTy, XTy, Ctx) && TLI.isTruncateFree(XTy, DstTy, Ctx))
3261 return false;
3262 break;
3263 }
3264 case TargetOpcode::G_AND:
3265 case TargetOpcode::G_ASHR:
3266 case TargetOpcode::G_LSHR:
3267 case TargetOpcode::G_SHL: {
3268 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3269 MachineOperand &ZOp = LeftHandInst->getOperand(2);
3270 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3271 return false;
3272 ExtraHandOpSrcReg = ZOp.getReg();
3273 break;
3274 }
3275 }
3276
3277 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3278 return false;
3279
3280 // Record the steps to build the new instructions.
3281 //
3282 // Steps to build (logic x, y)
3283 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3284 OperandBuildSteps LogicBuildSteps = {
3285 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3286 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3287 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3288 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3289
3290 // Steps to build hand (logic x, y), ...z
3291 OperandBuildSteps HandBuildSteps = {
3292 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3293 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3294 if (ExtraHandOpSrcReg.isValid())
3295 HandBuildSteps.push_back(
3296 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3297 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3298
3299 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3300 return true;
3301}
3302
3304 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const {
3305 assert(MatchInfo.InstrsToBuild.size() &&
3306 "Expected at least one instr to build?");
3307 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3308 assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3309 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3310 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3311 for (auto &OperandFn : InstrToBuild.OperandFns)
3312 OperandFn(Instr);
3313 }
3314 MI.eraseFromParent();
3315}
3316
3318 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3319 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3320 int64_t ShlCst, AshrCst;
3321 Register Src;
3322 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3323 m_GAShr(m_GShl(m_Reg(Src), m_ICstOrSplat(ShlCst)),
3324 m_ICstOrSplat(AshrCst))))
3325 return false;
3326 if (ShlCst != AshrCst)
3327 return false;
3329 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3330 return false;
3331 MatchInfo = std::make_tuple(Src, ShlCst);
3332 return true;
3333}
3334
3336 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) const {
3337 assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3338 Register Src;
3339 int64_t ShiftAmt;
3340 std::tie(Src, ShiftAmt) = MatchInfo;
3341 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3342 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3343 MI.eraseFromParent();
3344}
3345
3346/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3349 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
3350 assert(MI.getOpcode() == TargetOpcode::G_AND);
3351
3352 Register Dst = MI.getOperand(0).getReg();
3353 LLT Ty = MRI.getType(Dst);
3354
3355 Register R;
3356 int64_t C1;
3357 int64_t C2;
3358 if (!mi_match(
3359 Dst, MRI,
3360 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3361 return false;
3362
3363 MatchInfo = [=](MachineIRBuilder &B) {
3364 if (C1 & C2) {
3365 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3366 return;
3367 }
3368 auto Zero = B.buildConstant(Ty, 0);
3369 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3370 };
3371 return true;
3372}
3373
3375 Register &Replacement) const {
3376 // Given
3377 //
3378 // %y:_(sN) = G_SOMETHING
3379 // %x:_(sN) = G_SOMETHING
3380 // %res:_(sN) = G_AND %x, %y
3381 //
3382 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3383 //
3384 // Patterns like this can appear as a result of legalization. E.g.
3385 //
3386 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3387 // %one:_(s32) = G_CONSTANT i32 1
3388 // %and:_(s32) = G_AND %cmp, %one
3389 //
3390 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3391 assert(MI.getOpcode() == TargetOpcode::G_AND);
3392 if (!VT)
3393 return false;
3394
3395 Register AndDst = MI.getOperand(0).getReg();
3396 Register LHS = MI.getOperand(1).getReg();
3397 Register RHS = MI.getOperand(2).getReg();
3398
3399 // Check the RHS (maybe a constant) first, and if we have no KnownBits there,
3400 // we can't do anything. If we do, then it depends on whether we have
3401 // KnownBits on the LHS.
3402 KnownBits RHSBits = VT->getKnownBits(RHS);
3403 if (RHSBits.isUnknown())
3404 return false;
3405
3406 KnownBits LHSBits = VT->getKnownBits(LHS);
3407
3408 // Check that x & Mask == x.
3409 // x & 1 == x, always
3410 // x & 0 == x, only if x is also 0
3411 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3412 //
3413 // Check if we can replace AndDst with the LHS of the G_AND
3414 if (canReplaceReg(AndDst, LHS, MRI) &&
3415 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3416 Replacement = LHS;
3417 return true;
3418 }
3419
3420 // Check if we can replace AndDst with the RHS of the G_AND
3421 if (canReplaceReg(AndDst, RHS, MRI) &&
3422 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3423 Replacement = RHS;
3424 return true;
3425 }
3426
3427 return false;
3428}
3429
3431 Register &Replacement) const {
3432 // Given
3433 //
3434 // %y:_(sN) = G_SOMETHING
3435 // %x:_(sN) = G_SOMETHING
3436 // %res:_(sN) = G_OR %x, %y
3437 //
3438 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3439 assert(MI.getOpcode() == TargetOpcode::G_OR);
3440 if (!VT)
3441 return false;
3442
3443 Register OrDst = MI.getOperand(0).getReg();
3444 Register LHS = MI.getOperand(1).getReg();
3445 Register RHS = MI.getOperand(2).getReg();
3446
3447 KnownBits LHSBits = VT->getKnownBits(LHS);
3448 KnownBits RHSBits = VT->getKnownBits(RHS);
3449
3450 // Check that x | Mask == x.
3451 // x | 0 == x, always
3452 // x | 1 == x, only if x is also 1
3453 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3454 //
3455 // Check if we can replace OrDst with the LHS of the G_OR
3456 if (canReplaceReg(OrDst, LHS, MRI) &&
3457 (LHSBits.One | RHSBits.Zero).isAllOnes()) {
3458 Replacement = LHS;
3459 return true;
3460 }
3461
3462 // Check if we can replace OrDst with the RHS of the G_OR
3463 if (canReplaceReg(OrDst, RHS, MRI) &&
3464 (LHSBits.Zero | RHSBits.One).isAllOnes()) {
3465 Replacement = RHS;
3466 return true;
3467 }
3468
3469 return false;
3470}
3471
3473 // If the input is already sign extended, just drop the extension.
3474 Register Src = MI.getOperand(1).getReg();
3475 unsigned ExtBits = MI.getOperand(2).getImm();
3476 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3477 return VT->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3478}
3479
3480static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3481 int64_t Cst, bool IsVector, bool IsFP) {
3482 // For i1, Cst will always be -1 regardless of boolean contents.
3483 return (ScalarSizeBits == 1 && Cst == -1) ||
3484 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3485}
3486
3487// This pattern aims to match the following shape to avoid extra mov
3488// instructions
3489// G_BUILD_VECTOR(
3490// G_UNMERGE_VALUES(src, 0)
3491// G_UNMERGE_VALUES(src, 1)
3492// G_IMPLICIT_DEF
3493// G_IMPLICIT_DEF
3494// )
3495// ->
3496// G_CONCAT_VECTORS(
3497// src,
3498// undef
3499// )
3502 Register &UnmergeSrc) const {
3503 auto &BV = cast<GBuildVector>(MI);
3504
3505 unsigned BuildUseCount = BV.getNumSources();
3506 if (BuildUseCount % 2 != 0)
3507 return false;
3508
3509 unsigned NumUnmerge = BuildUseCount / 2;
3510
3511 auto *Unmerge = getOpcodeDef<GUnmerge>(BV.getSourceReg(0), MRI);
3512
3513 // Check the first operand is an unmerge and has the correct number of
3514 // operands
3515 if (!Unmerge || Unmerge->getNumDefs() != NumUnmerge)
3516 return false;
3517
3518 UnmergeSrc = Unmerge->getSourceReg();
3519
3520 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3521 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
3522
3523 if (!UnmergeSrcTy.isVector())
3524 return false;
3525
3526 // Ensure we only generate legal instructions post-legalizer
3527 if (!IsPreLegalize &&
3528 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {DstTy, UnmergeSrcTy}}))
3529 return false;
3530
3531 // Check that all of the operands before the midpoint come from the same
3532 // unmerge and are in the same order as they are used in the build_vector
3533 for (unsigned I = 0; I < NumUnmerge; ++I) {
3534 auto MaybeUnmergeReg = BV.getSourceReg(I);
3535 auto *LoopUnmerge = getOpcodeDef<GUnmerge>(MaybeUnmergeReg, MRI);
3536
3537 if (!LoopUnmerge || LoopUnmerge != Unmerge)
3538 return false;
3539
3540 if (LoopUnmerge->getOperand(I).getReg() != MaybeUnmergeReg)
3541 return false;
3542 }
3543
3544 // Check that all of the unmerged values are used
3545 if (Unmerge->getNumDefs() != NumUnmerge)
3546 return false;
3547
3548 // Check that all of the operands after the mid point are undefs.
3549 for (unsigned I = NumUnmerge; I < BuildUseCount; ++I) {
3550 auto *Undef = getDefIgnoringCopies(BV.getSourceReg(I), MRI);
3551
3552 if (Undef->getOpcode() != TargetOpcode::G_IMPLICIT_DEF)
3553 return false;
3554 }
3555
3556 return true;
3557}
3558
3562 Register &UnmergeSrc) const {
3563 assert(UnmergeSrc && "Expected there to be one matching G_UNMERGE_VALUES");
3564 B.setInstrAndDebugLoc(MI);
3565
3566 Register UndefVec = B.buildUndef(MRI.getType(UnmergeSrc)).getReg(0);
3567 B.buildConcatVectors(MI.getOperand(0), {UnmergeSrc, UndefVec});
3568
3569 MI.eraseFromParent();
3570}
3571
3572// This combine tries to reduce the number of scalarised G_TRUNC instructions by
3573// using vector truncates instead
3574//
3575// EXAMPLE:
3576// %a(i32), %b(i32) = G_UNMERGE_VALUES %src(<2 x i32>)
3577// %T_a(i16) = G_TRUNC %a(i32)
3578// %T_b(i16) = G_TRUNC %b(i32)
3579// %Undef(i16) = G_IMPLICIT_DEF(i16)
3580// %dst(v4i16) = G_BUILD_VECTORS %T_a(i16), %T_b(i16), %Undef(i16), %Undef(i16)
3581//
3582// ===>
3583// %Undef(<2 x i32>) = G_IMPLICIT_DEF(<2 x i32>)
3584// %Mid(<4 x s32>) = G_CONCAT_VECTORS %src(<2 x i32>), %Undef(<2 x i32>)
3585// %dst(<4 x s16>) = G_TRUNC %Mid(<4 x s32>)
3586//
3587// Only matches sources made up of G_TRUNCs followed by G_IMPLICIT_DEFs
3589 Register &MatchInfo) const {
3590 auto BuildMI = cast<GBuildVector>(&MI);
3591 unsigned NumOperands = BuildMI->getNumSources();
3592 LLT DstTy = MRI.getType(BuildMI->getReg(0));
3593
3594 // Check the G_BUILD_VECTOR sources
3595 unsigned I;
3596 MachineInstr *UnmergeMI = nullptr;
3597
3598 // Check all source TRUNCs come from the same UNMERGE instruction
3599 for (I = 0; I < NumOperands; ++I) {
3600 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3601 auto SrcMIOpc = SrcMI->getOpcode();
3602
3603 // Check if the G_TRUNC instructions all come from the same MI
3604 if (SrcMIOpc == TargetOpcode::G_TRUNC) {
3605 if (!UnmergeMI) {
3606 UnmergeMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3607 if (UnmergeMI->getOpcode() != TargetOpcode::G_UNMERGE_VALUES)
3608 return false;
3609 } else {
3610 auto UnmergeSrcMI = MRI.getVRegDef(SrcMI->getOperand(1).getReg());
3611 if (UnmergeMI != UnmergeSrcMI)
3612 return false;
3613 }
3614 } else {
3615 break;
3616 }
3617 }
3618 if (I < 2)
3619 return false;
3620
3621 // Check the remaining source elements are only G_IMPLICIT_DEF
3622 for (; I < NumOperands; ++I) {
3623 auto SrcMI = MRI.getVRegDef(BuildMI->getSourceReg(I));
3624 auto SrcMIOpc = SrcMI->getOpcode();
3625
3626 if (SrcMIOpc != TargetOpcode::G_IMPLICIT_DEF)
3627 return false;
3628 }
3629
3630 // Check the size of unmerge source
3631 MatchInfo = cast<GUnmerge>(UnmergeMI)->getSourceReg();
3632 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3633 if (!DstTy.getElementCount().isKnownMultipleOf(UnmergeSrcTy.getNumElements()))
3634 return false;
3635
3636 // Check the unmerge source and destination element types match
3637 LLT UnmergeSrcEltTy = UnmergeSrcTy.getElementType();
3638 Register UnmergeDstReg = UnmergeMI->getOperand(0).getReg();
3639 LLT UnmergeDstEltTy = MRI.getType(UnmergeDstReg);
3640 if (UnmergeSrcEltTy != UnmergeDstEltTy)
3641 return false;
3642
3643 // Only generate legal instructions post-legalizer
3644 if (!IsPreLegalize) {
3645 LLT MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3646
3647 if (DstTy.getElementCount() != UnmergeSrcTy.getElementCount() &&
3648 !isLegal({TargetOpcode::G_CONCAT_VECTORS, {MidTy, UnmergeSrcTy}}))
3649 return false;
3650
3651 if (!isLegal({TargetOpcode::G_TRUNC, {DstTy, MidTy}}))
3652 return false;
3653 }
3654
3655 return true;
3656}
3657
3659 Register &MatchInfo) const {
3660 Register MidReg;
3661 auto BuildMI = cast<GBuildVector>(&MI);
3662 Register DstReg = BuildMI->getReg(0);
3663 LLT DstTy = MRI.getType(DstReg);
3664 LLT UnmergeSrcTy = MRI.getType(MatchInfo);
3665 unsigned DstTyNumElt = DstTy.getNumElements();
3666 unsigned UnmergeSrcTyNumElt = UnmergeSrcTy.getNumElements();
3667
3668 // No need to pad vector if only G_TRUNC is needed
3669 if (DstTyNumElt / UnmergeSrcTyNumElt == 1) {
3670 MidReg = MatchInfo;
3671 } else {
3672 Register UndefReg = Builder.buildUndef(UnmergeSrcTy).getReg(0);
3673 SmallVector<Register> ConcatRegs = {MatchInfo};
3674 for (unsigned I = 1; I < DstTyNumElt / UnmergeSrcTyNumElt; ++I)
3675 ConcatRegs.push_back(UndefReg);
3676
3677 auto MidTy = DstTy.changeElementType(UnmergeSrcTy.getScalarType());
3678 MidReg = Builder.buildConcatVectors(MidTy, ConcatRegs).getReg(0);
3679 }
3680
3681 Builder.buildTrunc(DstReg, MidReg);
3682 MI.eraseFromParent();
3683}
3684
3686 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3687 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3688 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3689 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3690 Register XorSrc;
3691 Register CstReg;
3692 // We match xor(src, true) here.
3693 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3694 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3695 return false;
3696
3697 if (!MRI.hasOneNonDBGUse(XorSrc))
3698 return false;
3699
3700 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3701 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3702 // list of tree nodes to visit.
3703 RegsToNegate.push_back(XorSrc);
3704 // Remember whether the comparisons are all integer or all floating point.
3705 bool IsInt = false;
3706 bool IsFP = false;
3707 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3708 Register Reg = RegsToNegate[I];
3709 if (!MRI.hasOneNonDBGUse(Reg))
3710 return false;
3711 MachineInstr *Def = MRI.getVRegDef(Reg);
3712 switch (Def->getOpcode()) {
3713 default:
3714 // Don't match if the tree contains anything other than ANDs, ORs and
3715 // comparisons.
3716 return false;
3717 case TargetOpcode::G_ICMP:
3718 if (IsFP)
3719 return false;
3720 IsInt = true;
3721 // When we apply the combine we will invert the predicate.
3722 break;
3723 case TargetOpcode::G_FCMP:
3724 if (IsInt)
3725 return false;
3726 IsFP = true;
3727 // When we apply the combine we will invert the predicate.
3728 break;
3729 case TargetOpcode::G_AND:
3730 case TargetOpcode::G_OR:
3731 // Implement De Morgan's laws:
3732 // ~(x & y) -> ~x | ~y
3733 // ~(x | y) -> ~x & ~y
3734 // When we apply the combine we will change the opcode and recursively
3735 // negate the operands.
3736 RegsToNegate.push_back(Def->getOperand(1).getReg());
3737 RegsToNegate.push_back(Def->getOperand(2).getReg());
3738 break;
3739 }
3740 }
3741
3742 // Now we know whether the comparisons are integer or floating point, check
3743 // the constant in the xor.
3744 int64_t Cst;
3745 if (Ty.isVector()) {
3746 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3747 auto MaybeCst = getIConstantSplatSExtVal(*CstDef, MRI);
3748 if (!MaybeCst)
3749 return false;
3750 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3751 return false;
3752 } else {
3753 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3754 return false;
3755 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3756 return false;
3757 }
3758
3759 return true;
3760}
3761
3763 MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate) const {
3764 for (Register Reg : RegsToNegate) {
3765 MachineInstr *Def = MRI.getVRegDef(Reg);
3766 Observer.changingInstr(*Def);
3767 // For each comparison, invert the opcode. For each AND and OR, change the
3768 // opcode.
3769 switch (Def->getOpcode()) {
3770 default:
3771 llvm_unreachable("Unexpected opcode");
3772 case TargetOpcode::G_ICMP:
3773 case TargetOpcode::G_FCMP: {
3774 MachineOperand &PredOp = Def->getOperand(1);
3777 PredOp.setPredicate(NewP);
3778 break;
3779 }
3780 case TargetOpcode::G_AND:
3781 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3782 break;
3783 case TargetOpcode::G_OR:
3784 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3785 break;
3786 }
3787 Observer.changedInstr(*Def);
3788 }
3789
3790 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3791 MI.eraseFromParent();
3792}
3793
3795 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3796 // Match (xor (and x, y), y) (or any of its commuted cases)
3797 assert(MI.getOpcode() == TargetOpcode::G_XOR);
3798 Register &X = MatchInfo.first;
3799 Register &Y = MatchInfo.second;
3800 Register AndReg = MI.getOperand(1).getReg();
3801 Register SharedReg = MI.getOperand(2).getReg();
3802
3803 // Find a G_AND on either side of the G_XOR.
3804 // Look for one of
3805 //
3806 // (xor (and x, y), SharedReg)
3807 // (xor SharedReg, (and x, y))
3808 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3809 std::swap(AndReg, SharedReg);
3810 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3811 return false;
3812 }
3813
3814 // Only do this if we'll eliminate the G_AND.
3815 if (!MRI.hasOneNonDBGUse(AndReg))
3816 return false;
3817
3818 // We can combine if SharedReg is the same as either the LHS or RHS of the
3819 // G_AND.
3820 if (Y != SharedReg)
3821 std::swap(X, Y);
3822 return Y == SharedReg;
3823}
3824
3826 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) const {
3827 // Fold (xor (and x, y), y) -> (and (not x), y)
3828 Register X, Y;
3829 std::tie(X, Y) = MatchInfo;
3830 auto Not = Builder.buildNot(MRI.getType(X), X);
3831 Observer.changingInstr(MI);
3832 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3833 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3834 MI.getOperand(2).setReg(Y);
3835 Observer.changedInstr(MI);
3836}
3837
3839 auto &PtrAdd = cast<GPtrAdd>(MI);
3840 Register DstReg = PtrAdd.getReg(0);
3841 LLT Ty = MRI.getType(DstReg);
3842 const DataLayout &DL = Builder.getMF().getDataLayout();
3843
3844 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3845 return false;
3846
3847 if (Ty.isPointer()) {
3848 auto ConstVal = getIConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3849 return ConstVal && *ConstVal == 0;
3850 }
3851
3852 assert(Ty.isVector() && "Expecting a vector type");
3853 const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3854 return isBuildVectorAllZeros(*VecMI, MRI);
3855}
3856
3858 auto &PtrAdd = cast<GPtrAdd>(MI);
3859 Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3860 PtrAdd.eraseFromParent();
3861}
3862
3863/// The second source operand is known to be a power of 2.
3865 Register DstReg = MI.getOperand(0).getReg();
3866 Register Src0 = MI.getOperand(1).getReg();
3867 Register Pow2Src1 = MI.getOperand(2).getReg();
3868 LLT Ty = MRI.getType(DstReg);
3869
3870 // Fold (urem x, pow2) -> (and x, pow2-1)
3871 auto NegOne = Builder.buildConstant(Ty, -1);
3872 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3873 Builder.buildAnd(DstReg, Src0, Add);
3874 MI.eraseFromParent();
3875}
3876
3878 unsigned &SelectOpNo) const {
3879 Register LHS = MI.getOperand(1).getReg();
3880 Register RHS = MI.getOperand(2).getReg();
3881
3882 Register OtherOperandReg = RHS;
3883 SelectOpNo = 1;
3884 MachineInstr *Select = MRI.getVRegDef(LHS);
3885
3886 // Don't do this unless the old select is going away. We want to eliminate the
3887 // binary operator, not replace a binop with a select.
3888 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3889 !MRI.hasOneNonDBGUse(LHS)) {
3890 OtherOperandReg = LHS;
3891 SelectOpNo = 2;
3892 Select = MRI.getVRegDef(RHS);
3893 if (Select->getOpcode() != TargetOpcode::G_SELECT ||
3894 !MRI.hasOneNonDBGUse(RHS))
3895 return false;
3896 }
3897
3898 MachineInstr *SelectLHS = MRI.getVRegDef(Select->getOperand(2).getReg());
3899 MachineInstr *SelectRHS = MRI.getVRegDef(Select->getOperand(3).getReg());
3900
3901 if (!isConstantOrConstantVector(*SelectLHS, MRI,
3902 /*AllowFP*/ true,
3903 /*AllowOpaqueConstants*/ false))
3904 return false;
3905 if (!isConstantOrConstantVector(*SelectRHS, MRI,
3906 /*AllowFP*/ true,
3907 /*AllowOpaqueConstants*/ false))
3908 return false;
3909
3910 unsigned BinOpcode = MI.getOpcode();
3911
3912 // We know that one of the operands is a select of constants. Now verify that
3913 // the other binary operator operand is either a constant, or we can handle a
3914 // variable.
3915 bool CanFoldNonConst =
3916 (BinOpcode == TargetOpcode::G_AND || BinOpcode == TargetOpcode::G_OR) &&
3917 (isNullOrNullSplat(*SelectLHS, MRI) ||
3918 isAllOnesOrAllOnesSplat(*SelectLHS, MRI)) &&
3919 (isNullOrNullSplat(*SelectRHS, MRI) ||
3920 isAllOnesOrAllOnesSplat(*SelectRHS, MRI));
3921 if (CanFoldNonConst)
3922 return true;
3923
3924 return isConstantOrConstantVector(*MRI.getVRegDef(OtherOperandReg), MRI,
3925 /*AllowFP*/ true,
3926 /*AllowOpaqueConstants*/ false);
3927}
3928
3929/// \p SelectOperand is the operand in binary operator \p MI that is the select
3930/// to fold.
3932 MachineInstr &MI, const unsigned &SelectOperand) const {
3933 Register Dst = MI.getOperand(0).getReg();
3934 Register LHS = MI.getOperand(1).getReg();
3935 Register RHS = MI.getOperand(2).getReg();
3936 MachineInstr *Select = MRI.getVRegDef(MI.getOperand(SelectOperand).getReg());
3937
3938 Register SelectCond = Select->getOperand(1).getReg();
3939 Register SelectTrue = Select->getOperand(2).getReg();
3940 Register SelectFalse = Select->getOperand(3).getReg();
3941
3942 LLT Ty = MRI.getType(Dst);
3943 unsigned BinOpcode = MI.getOpcode();
3944
3945 Register FoldTrue, FoldFalse;
3946
3947 // We have a select-of-constants followed by a binary operator with a
3948 // constant. Eliminate the binop by pulling the constant math into the select.
3949 // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
3950 if (SelectOperand == 1) {
3951 // TODO: SelectionDAG verifies this actually constant folds before
3952 // committing to the combine.
3953
3954 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {SelectTrue, RHS}).getReg(0);
3955 FoldFalse =
3956 Builder.buildInstr(BinOpcode, {Ty}, {SelectFalse, RHS}).getReg(0);
3957 } else {
3958 FoldTrue = Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectTrue}).getReg(0);
3959 FoldFalse =
3960 Builder.buildInstr(BinOpcode, {Ty}, {LHS, SelectFalse}).getReg(0);
3961 }
3962
3963 Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
3964 MI.eraseFromParent();
3965}
3966
3967std::optional<SmallVector<Register, 8>>
3968CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3969 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3970 // We want to detect if Root is part of a tree which represents a bunch
3971 // of loads being merged into a larger load. We'll try to recognize patterns
3972 // like, for example:
3973 //
3974 // Reg Reg
3975 // \ /
3976 // OR_1 Reg
3977 // \ /
3978 // OR_2
3979 // \ Reg
3980 // .. /
3981 // Root
3982 //
3983 // Reg Reg Reg Reg
3984 // \ / \ /
3985 // OR_1 OR_2
3986 // \ /
3987 // \ /
3988 // ...
3989 // Root
3990 //
3991 // Each "Reg" may have been produced by a load + some arithmetic. This
3992 // function will save each of them.
3993 SmallVector<Register, 8> RegsToVisit;
3995
3996 // In the "worst" case, we're dealing with a load for each byte. So, there
3997 // are at most #bytes - 1 ORs.
3998 const unsigned MaxIter =
3999 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
4000 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
4001 if (Ors.empty())
4002 break;
4003 const MachineInstr *Curr = Ors.pop_back_val();
4004 Register OrLHS = Curr->getOperand(1).getReg();
4005 Register OrRHS = Curr->getOperand(2).getReg();
4006
4007 // In the combine, we want to elimate the entire tree.
4008 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
4009 return std::nullopt;
4010
4011 // If it's a G_OR, save it and continue to walk. If it's not, then it's
4012 // something that may be a load + arithmetic.
4013 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
4014 Ors.push_back(Or);
4015 else
4016 RegsToVisit.push_back(OrLHS);
4017 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
4018 Ors.push_back(Or);
4019 else
4020 RegsToVisit.push_back(OrRHS);
4021 }
4022
4023 // We're going to try and merge each register into a wider power-of-2 type,
4024 // so we ought to have an even number of registers.
4025 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
4026 return std::nullopt;
4027 return RegsToVisit;
4028}
4029
4030/// Helper function for findLoadOffsetsForLoadOrCombine.
4031///
4032/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
4033/// and then moving that value into a specific byte offset.
4034///
4035/// e.g. x[i] << 24
4036///
4037/// \returns The load instruction and the byte offset it is moved into.
4038static std::optional<std::pair<GZExtLoad *, int64_t>>
4039matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
4040 const MachineRegisterInfo &MRI) {
4041 assert(MRI.hasOneNonDBGUse(Reg) &&
4042 "Expected Reg to only have one non-debug use?");
4043 Register MaybeLoad;
4044 int64_t Shift;
4045 if (!mi_match(Reg, MRI,
4046 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
4047 Shift = 0;
4048 MaybeLoad = Reg;
4049 }
4050
4051 if (Shift % MemSizeInBits != 0)
4052 return std::nullopt;
4053
4054 // TODO: Handle other types of loads.
4055 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
4056 if (!Load)
4057 return std::nullopt;
4058
4059 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
4060 return std::nullopt;
4061
4062 return std::make_pair(Load, Shift / MemSizeInBits);
4063}
4064
4065std::optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
4066CombinerHelper::findLoadOffsetsForLoadOrCombine(
4068 const SmallVector<Register, 8> &RegsToVisit,
4069 const unsigned MemSizeInBits) const {
4070
4071 // Each load found for the pattern. There should be one for each RegsToVisit.
4072 SmallSetVector<const MachineInstr *, 8> Loads;
4073
4074 // The lowest index used in any load. (The lowest "i" for each x[i].)
4075 int64_t LowestIdx = INT64_MAX;
4076
4077 // The load which uses the lowest index.
4078 GZExtLoad *LowestIdxLoad = nullptr;
4079
4080 // Keeps track of the load indices we see. We shouldn't see any indices twice.
4081 SmallSet<int64_t, 8> SeenIdx;
4082
4083 // Ensure each load is in the same MBB.
4084 // TODO: Support multiple MachineBasicBlocks.
4085 MachineBasicBlock *MBB = nullptr;
4086 const MachineMemOperand *MMO = nullptr;
4087
4088 // Earliest instruction-order load in the pattern.
4089 GZExtLoad *EarliestLoad = nullptr;
4090
4091 // Latest instruction-order load in the pattern.
4092 GZExtLoad *LatestLoad = nullptr;
4093
4094 // Base pointer which every load should share.
4096
4097 // We want to find a load for each register. Each load should have some
4098 // appropriate bit twiddling arithmetic. During this loop, we will also keep
4099 // track of the load which uses the lowest index. Later, we will check if we
4100 // can use its pointer in the final, combined load.
4101 for (auto Reg : RegsToVisit) {
4102 // Find the load, and find the position that it will end up in (e.g. a
4103 // shifted) value.
4104 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
4105 if (!LoadAndPos)
4106 return std::nullopt;
4107 GZExtLoad *Load;
4108 int64_t DstPos;
4109 std::tie(Load, DstPos) = *LoadAndPos;
4110
4111 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
4112 // it is difficult to check for stores/calls/etc between loads.
4113 MachineBasicBlock *LoadMBB = Load->getParent();
4114 if (!MBB)
4115 MBB = LoadMBB;
4116 if (LoadMBB != MBB)
4117 return std::nullopt;
4118
4119 // Make sure that the MachineMemOperands of every seen load are compatible.
4120 auto &LoadMMO = Load->getMMO();
4121 if (!MMO)
4122 MMO = &LoadMMO;
4123 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
4124 return std::nullopt;
4125
4126 // Find out what the base pointer and index for the load is.
4127 Register LoadPtr;
4128 int64_t Idx;
4129 if (!mi_match(Load->getOperand(1).getReg(), MRI,
4130 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
4131 LoadPtr = Load->getOperand(1).getReg();
4132 Idx = 0;
4133 }
4134
4135 // Don't combine things like a[i], a[i] -> a bigger load.
4136 if (!SeenIdx.insert(Idx).second)
4137 return std::nullopt;
4138
4139 // Every load must share the same base pointer; don't combine things like:
4140 //
4141 // a[i], b[i + 1] -> a bigger load.
4142 if (!BasePtr.isValid())
4143 BasePtr = LoadPtr;
4144 if (BasePtr != LoadPtr)
4145 return std::nullopt;
4146
4147 if (Idx < LowestIdx) {
4148 LowestIdx = Idx;
4149 LowestIdxLoad = Load;
4150 }
4151
4152 // Keep track of the byte offset that this load ends up at. If we have seen
4153 // the byte offset, then stop here. We do not want to combine:
4154 //
4155 // a[i] << 16, a[i + k] << 16 -> a bigger load.
4156 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
4157 return std::nullopt;
4158 Loads.insert(Load);
4159
4160 // Keep track of the position of the earliest/latest loads in the pattern.
4161 // We will check that there are no load fold barriers between them later
4162 // on.
4163 //
4164 // FIXME: Is there a better way to check for load fold barriers?
4165 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
4166 EarliestLoad = Load;
4167 if (!LatestLoad || dominates(*LatestLoad, *Load))
4168 LatestLoad = Load;
4169 }
4170
4171 // We found a load for each register. Let's check if each load satisfies the
4172 // pattern.
4173 assert(Loads.size() == RegsToVisit.size() &&
4174 "Expected to find a load for each register?");
4175 assert(EarliestLoad != LatestLoad && EarliestLoad &&
4176 LatestLoad && "Expected at least two loads?");
4177
4178 // Check if there are any stores, calls, etc. between any of the loads. If
4179 // there are, then we can't safely perform the combine.
4180 //
4181 // MaxIter is chosen based off the (worst case) number of iterations it
4182 // typically takes to succeed in the LLVM test suite plus some padding.
4183 //
4184 // FIXME: Is there a better way to check for load fold barriers?
4185 const unsigned MaxIter = 20;
4186 unsigned Iter = 0;
4187 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
4188 LatestLoad->getIterator())) {
4189 if (Loads.count(&MI))
4190 continue;
4191 if (MI.isLoadFoldBarrier())
4192 return std::nullopt;
4193 if (Iter++ == MaxIter)
4194 return std::nullopt;
4195 }
4196
4197 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
4198}
4199
4202 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4203 assert(MI.getOpcode() == TargetOpcode::G_OR);
4204 MachineFunction &MF = *MI.getMF();
4205 // Assuming a little-endian target, transform:
4206 // s8 *a = ...
4207 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
4208 // =>
4209 // s32 val = *((i32)a)
4210 //
4211 // s8 *a = ...
4212 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
4213 // =>
4214 // s32 val = BSWAP(*((s32)a))
4215 Register Dst = MI.getOperand(0).getReg();
4216 LLT Ty = MRI.getType(Dst);
4217 if (Ty.isVector())
4218 return false;
4219
4220 // We need to combine at least two loads into this type. Since the smallest
4221 // possible load is into a byte, we need at least a 16-bit wide type.
4222 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
4223 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
4224 return false;
4225
4226 // Match a collection of non-OR instructions in the pattern.
4227 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
4228 if (!RegsToVisit)
4229 return false;
4230
4231 // We have a collection of non-OR instructions. Figure out how wide each of
4232 // the small loads should be based off of the number of potential loads we
4233 // found.
4234 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
4235 if (NarrowMemSizeInBits % 8 != 0)
4236 return false;
4237
4238 // Check if each register feeding into each OR is a load from the same
4239 // base pointer + some arithmetic.
4240 //
4241 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
4242 //
4243 // Also verify that each of these ends up putting a[i] into the same memory
4244 // offset as a load into a wide type would.
4246 GZExtLoad *LowestIdxLoad, *LatestLoad;
4247 int64_t LowestIdx;
4248 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
4249 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
4250 if (!MaybeLoadInfo)
4251 return false;
4252 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
4253
4254 // We have a bunch of loads being OR'd together. Using the addresses + offsets
4255 // we found before, check if this corresponds to a big or little endian byte
4256 // pattern. If it does, then we can represent it using a load + possibly a
4257 // BSWAP.
4258 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
4259 std::optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
4260 if (!IsBigEndian)
4261 return false;
4262 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
4263 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
4264 return false;
4265
4266 // Make sure that the load from the lowest index produces offset 0 in the
4267 // final value.
4268 //
4269 // This ensures that we won't combine something like this:
4270 //
4271 // load x[i] -> byte 2
4272 // load x[i+1] -> byte 0 ---> wide_load x[i]
4273 // load x[i+2] -> byte 1
4274 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
4275 const unsigned ZeroByteOffset =
4276 *IsBigEndian
4277 ? bigEndianByteAt(NumLoadsInTy, 0)
4278 : littleEndianByteAt(NumLoadsInTy, 0);
4279 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
4280 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
4281 ZeroOffsetIdx->second != LowestIdx)
4282 return false;
4283
4284 // We wil reuse the pointer from the load which ends up at byte offset 0. It
4285 // may not use index 0.
4286 Register Ptr = LowestIdxLoad->getPointerReg();
4287 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
4288 LegalityQuery::MemDesc MMDesc(MMO);
4289 MMDesc.MemoryTy = Ty;
4291 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
4292 return false;
4293 auto PtrInfo = MMO.getPointerInfo();
4294 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
4295
4296 // Load must be allowed and fast on the target.
4298 auto &DL = MF.getDataLayout();
4299 unsigned Fast = 0;
4300 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
4301 !Fast)
4302 return false;
4303
4304 MatchInfo = [=](MachineIRBuilder &MIB) {
4305 MIB.setInstrAndDebugLoc(*LatestLoad);
4306 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
4307 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
4308 if (NeedsBSwap)
4309 MIB.buildBSwap(Dst, LoadDst);
4310 };
4311 return true;
4312}
4313
4315 MachineInstr *&ExtMI) const {
4316 auto &PHI = cast<GPhi>(MI);
4317 Register DstReg = PHI.getReg(0);
4318
4319 // TODO: Extending a vector may be expensive, don't do this until heuristics
4320 // are better.
4321 if (MRI.getType(DstReg).isVector())
4322 return false;
4323
4324 // Try to match a phi, whose only use is an extend.
4325 if (!MRI.hasOneNonDBGUse(DstReg))
4326 return false;
4327 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
4328 switch (ExtMI->getOpcode()) {
4329 case TargetOpcode::G_ANYEXT:
4330 return true; // G_ANYEXT is usually free.
4331 case TargetOpcode::G_ZEXT:
4332 case TargetOpcode::G_SEXT:
4333 break;
4334 default:
4335 return false;
4336 }
4337
4338 // If the target is likely to fold this extend away, don't propagate.
4339 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
4340 return false;
4341
4342 // We don't want to propagate the extends unless there's a good chance that
4343 // they'll be optimized in some way.
4344 // Collect the unique incoming values.
4346 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4347 auto *DefMI = getDefIgnoringCopies(PHI.getIncomingValue(I), MRI);
4348 switch (DefMI->getOpcode()) {
4349 case TargetOpcode::G_LOAD:
4350 case TargetOpcode::G_TRUNC:
4351 case TargetOpcode::G_SEXT:
4352 case TargetOpcode::G_ZEXT:
4353 case TargetOpcode::G_ANYEXT:
4354 case TargetOpcode::G_CONSTANT:
4355 InSrcs.insert(DefMI);
4356 // Don't try to propagate if there are too many places to create new
4357 // extends, chances are it'll increase code size.
4358 if (InSrcs.size() > 2)
4359 return false;
4360 break;
4361 default:
4362 return false;
4363 }
4364 }
4365 return true;
4366}
4367
4369 MachineInstr *&ExtMI) const {
4370 auto &PHI = cast<GPhi>(MI);
4371 Register DstReg = ExtMI->getOperand(0).getReg();
4372 LLT ExtTy = MRI.getType(DstReg);
4373
4374 // Propagate the extension into the block of each incoming reg's block.
4375 // Use a SetVector here because PHIs can have duplicate edges, and we want
4376 // deterministic iteration order.
4379 for (unsigned I = 0; I < PHI.getNumIncomingValues(); ++I) {
4380 auto SrcReg = PHI.getIncomingValue(I);
4381 auto *SrcMI = MRI.getVRegDef(SrcReg);
4382 if (!SrcMIs.insert(SrcMI))
4383 continue;
4384
4385 // Build an extend after each src inst.
4386 auto *MBB = SrcMI->getParent();
4387 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
4388 if (InsertPt != MBB->end() && InsertPt->isPHI())
4389 InsertPt = MBB->getFirstNonPHI();
4390
4391 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
4392 Builder.setDebugLoc(MI.getDebugLoc());
4393 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy, SrcReg);
4394 OldToNewSrcMap[SrcMI] = NewExt;
4395 }
4396
4397 // Create a new phi with the extended inputs.
4398 Builder.setInstrAndDebugLoc(MI);
4399 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
4400 NewPhi.addDef(DstReg);
4401 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
4402 if (!MO.isReg()) {
4403 NewPhi.addMBB(MO.getMBB());
4404 continue;
4405 }
4406 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
4407 NewPhi.addUse(NewSrc->getOperand(0).getReg());
4408 }
4409 Builder.insertInstr(NewPhi);
4410 ExtMI->eraseFromParent();
4411}
4412
4414 Register &Reg) const {
4415 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
4416 // If we have a constant index, look for a G_BUILD_VECTOR source
4417 // and find the source register that the index maps to.
4418 Register SrcVec = MI.getOperand(1).getReg();
4419 LLT SrcTy = MRI.getType(SrcVec);
4420 if (SrcTy.isScalableVector())
4421 return false;
4422
4423 auto Cst = getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
4424 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
4425 return false;
4426
4427 unsigned VecIdx = Cst->Value.getZExtValue();
4428
4429 // Check if we have a build_vector or build_vector_trunc with an optional
4430 // trunc in front.
4431 MachineInstr *SrcVecMI = MRI.getVRegDef(SrcVec);
4432 if (SrcVecMI->getOpcode() == TargetOpcode::G_TRUNC) {
4433 SrcVecMI = MRI.getVRegDef(SrcVecMI->getOperand(1).getReg());
4434 }
4435
4436 if (SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
4437 SrcVecMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR_TRUNC)
4438 return false;
4439
4440 EVT Ty(getMVTForLLT(SrcTy));
4441 if (!MRI.hasOneNonDBGUse(SrcVec) &&
4442 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
4443 return false;
4444
4445 Reg = SrcVecMI->getOperand(VecIdx + 1).getReg();
4446 return true;
4447}
4448
4450 Register &Reg) const {
4451 // Check the type of the register, since it may have come from a
4452 // G_BUILD_VECTOR_TRUNC.
4453 LLT ScalarTy = MRI.getType(Reg);
4454 Register DstReg = MI.getOperand(0).getReg();
4455 LLT DstTy = MRI.getType(DstReg);
4456
4457 if (ScalarTy != DstTy) {
4458 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
4459 Builder.buildTrunc(DstReg, Reg);
4460 MI.eraseFromParent();
4461 return;
4462 }
4464}
4465
4468 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4469 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4470 // This combine tries to find build_vector's which have every source element
4471 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
4472 // the masked load scalarization is run late in the pipeline. There's already
4473 // a combine for a similar pattern starting from the extract, but that
4474 // doesn't attempt to do it if there are multiple uses of the build_vector,
4475 // which in this case is true. Starting the combine from the build_vector
4476 // feels more natural than trying to find sibling nodes of extracts.
4477 // E.g.
4478 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
4479 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
4480 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
4481 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
4482 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
4483 // ==>
4484 // replace ext{1,2,3,4} with %s{1,2,3,4}
4485
4486 Register DstReg = MI.getOperand(0).getReg();
4487 LLT DstTy = MRI.getType(DstReg);
4488 unsigned NumElts = DstTy.getNumElements();
4489
4490 SmallBitVector ExtractedElts(NumElts);
4491 for (MachineInstr &II : MRI.use_nodbg_instructions(DstReg)) {
4492 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
4493 return false;
4494 auto Cst = getIConstantVRegVal(II.getOperand(2).getReg(), MRI);
4495 if (!Cst)
4496 return false;
4497 unsigned Idx = Cst->getZExtValue();
4498 if (Idx >= NumElts)
4499 return false; // Out of range.
4500 ExtractedElts.set(Idx);
4501 SrcDstPairs.emplace_back(
4502 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
4503 }
4504 // Match if every element was extracted.
4505 return ExtractedElts.all();
4506}
4507
4510 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) const {
4511 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4512 for (auto &Pair : SrcDstPairs) {
4513 auto *ExtMI = Pair.second;
4514 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
4515 ExtMI->eraseFromParent();
4516 }
4517 MI.eraseFromParent();
4518}
4519
4522 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4523 applyBuildFnNoErase(MI, MatchInfo);
4524 MI.eraseFromParent();
4525}
4526
4529 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4530 MatchInfo(Builder);
4531}
4532
4534 bool AllowScalarConstants,
4535 BuildFnTy &MatchInfo) const {
4536 assert(MI.getOpcode() == TargetOpcode::G_OR);
4537
4538 Register Dst = MI.getOperand(0).getReg();
4539 LLT Ty = MRI.getType(Dst);
4540 unsigned BitWidth = Ty.getScalarSizeInBits();
4541
4542 Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
4543 unsigned FshOpc = 0;
4544
4545 // Match (or (shl ...), (lshr ...)).
4546 if (!mi_match(Dst, MRI,
4547 // m_GOr() handles the commuted version as well.
4548 m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
4549 m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
4550 return false;
4551
4552 // Given constants C0 and C1 such that C0 + C1 is bit-width:
4553 // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
4554 int64_t CstShlAmt = 0, CstLShrAmt;
4555 if (mi_match(ShlAmt, MRI, m_ICstOrSplat(CstShlAmt)) &&
4556 mi_match(LShrAmt, MRI, m_ICstOrSplat(CstLShrAmt)) &&
4557 CstShlAmt + CstLShrAmt == BitWidth) {
4558 FshOpc = TargetOpcode::G_FSHR;
4559 Amt = LShrAmt;
4560 } else if (mi_match(LShrAmt, MRI,
4562 ShlAmt == Amt) {
4563 // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
4564 FshOpc = TargetOpcode::G_FSHL;
4565 } else if (mi_match(ShlAmt, MRI,
4567 LShrAmt == Amt) {
4568 // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
4569 FshOpc = TargetOpcode::G_FSHR;
4570 } else {
4571 return false;
4572 }
4573
4574 LLT AmtTy = MRI.getType(Amt);
4575 if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}) &&
4576 (!AllowScalarConstants || CstShlAmt == 0 || !Ty.isScalar()))
4577 return false;
4578
4579 MatchInfo = [=](MachineIRBuilder &B) {
4580 B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
4581 };
4582 return true;
4583}
4584
4585/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4587 unsigned Opc = MI.getOpcode();
4588 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4589 Register X = MI.getOperand(1).getReg();
4590 Register Y = MI.getOperand(2).getReg();
4591 if (X != Y)
4592 return false;
4593 unsigned RotateOpc =
4594 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4595 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4596}
4597
4599 unsigned Opc = MI.getOpcode();
4600 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4601 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4602 Observer.changingInstr(MI);
4603 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4604 : TargetOpcode::G_ROTR));
4605 MI.removeOperand(2);
4606 Observer.changedInstr(MI);
4607}
4608
4609// Fold (rot x, c) -> (rot x, c % BitSize)
4611 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4612 MI.getOpcode() == TargetOpcode::G_ROTR);
4613 unsigned Bitsize =
4614 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4615 Register AmtReg = MI.getOperand(2).getReg();
4616 bool OutOfRange = false;
4617 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4618 if (auto *CI = dyn_cast<ConstantInt>(C))
4619 OutOfRange |= CI->getValue().uge(Bitsize);
4620 return true;
4621 };
4622 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4623}
4624
4626 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4627 MI.getOpcode() == TargetOpcode::G_ROTR);
4628 unsigned Bitsize =
4629 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4630 Register Amt = MI.getOperand(2).getReg();
4631 LLT AmtTy = MRI.getType(Amt);
4632 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4633 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4634 Observer.changingInstr(MI);
4635 MI.getOperand(2).setReg(Amt);
4636 Observer.changedInstr(MI);
4637}
4638
4640 int64_t &MatchInfo) const {
4641 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4642 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4643
4644 // We want to avoid calling KnownBits on the LHS if possible, as this combine
4645 // has no filter and runs on every G_ICMP instruction. We can avoid calling
4646 // KnownBits on the LHS in two cases:
4647 //
4648 // - The RHS is unknown: Constants are always on RHS. If the RHS is unknown
4649 // we cannot do any transforms so we can safely bail out early.
4650 // - The RHS is zero: we don't need to know the LHS to do unsigned <0 and
4651 // >=0.
4652 auto KnownRHS = VT->getKnownBits(MI.getOperand(3).getReg());
4653 if (KnownRHS.isUnknown())
4654 return false;
4655
4656 std::optional<bool> KnownVal;
4657 if (KnownRHS.isZero()) {
4658 // ? uge 0 -> always true
4659 // ? ult 0 -> always false
4660 if (Pred == CmpInst::ICMP_UGE)
4661 KnownVal = true;
4662 else if (Pred == CmpInst::ICMP_ULT)
4663 KnownVal = false;
4664 }
4665
4666 if (!KnownVal) {
4667 auto KnownLHS = VT->getKnownBits(MI.getOperand(2).getReg());
4668 KnownVal = ICmpInst::compare(KnownLHS, KnownRHS, Pred);
4669 }
4670
4671 if (!KnownVal)
4672 return false;
4673 MatchInfo =
4674 *KnownVal
4676 /*IsVector = */
4677 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4678 /* IsFP = */ false)
4679 : 0;
4680 return true;
4681}
4682
4685 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4686 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4687 // Given:
4688 //
4689 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4690 // %cmp = G_ICMP ne %x, 0
4691 //
4692 // Or:
4693 //
4694 // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
4695 // %cmp = G_ICMP eq %x, 1
4696 //
4697 // We can replace %cmp with %x assuming true is 1 on the target.
4698 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4699 if (!CmpInst::isEquality(Pred))
4700 return false;
4701 Register Dst = MI.getOperand(0).getReg();
4702 LLT DstTy = MRI.getType(Dst);
4704 /* IsFP = */ false) != 1)
4705 return false;
4706 int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
4707 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
4708 return false;
4709 Register LHS = MI.getOperand(2).getReg();
4710 auto KnownLHS = VT->getKnownBits(LHS);
4711 if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
4712 return false;
4713 // Make sure replacing Dst with the LHS is a legal operation.
4714 LLT LHSTy = MRI.getType(LHS);
4715 unsigned LHSSize = LHSTy.getSizeInBits();
4716 unsigned DstSize = DstTy.getSizeInBits();
4717 unsigned Op = TargetOpcode::COPY;
4718 if (DstSize != LHSSize)
4719 Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
4720 if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
4721 return false;
4722 MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
4723 return true;
4724}
4725
4726// Replace (and (or x, c1), c2) with (and x, c2) iff c1 & c2 == 0
4729 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4730 assert(MI.getOpcode() == TargetOpcode::G_AND);
4731
4732 // Ignore vector types to simplify matching the two constants.
4733 // TODO: do this for vectors and scalars via a demanded bits analysis.
4734 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4735 if (Ty.isVector())
4736 return false;
4737
4738 Register Src;
4739 Register AndMaskReg;
4740 int64_t AndMaskBits;
4741 int64_t OrMaskBits;
4742 if (!mi_match(MI, MRI,
4743 m_GAnd(m_GOr(m_Reg(Src), m_ICst(OrMaskBits)),
4744 m_all_of(m_ICst(AndMaskBits), m_Reg(AndMaskReg)))))
4745 return false;
4746
4747 // Check if OrMask could turn on any bits in Src.
4748 if (AndMaskBits & OrMaskBits)
4749 return false;
4750
4751 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4752 Observer.changingInstr(MI);
4753 // Canonicalize the result to have the constant on the RHS.
4754 if (MI.getOperand(1).getReg() == AndMaskReg)
4755 MI.getOperand(2).setReg(AndMaskReg);
4756 MI.getOperand(1).setReg(Src);
4757 Observer.changedInstr(MI);
4758 };
4759 return true;
4760}
4761
4762/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4765 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4766 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4767 Register Dst = MI.getOperand(0).getReg();
4768 Register Src = MI.getOperand(1).getReg();
4769 LLT Ty = MRI.getType(Src);
4771 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4772 return false;
4773 int64_t Width = MI.getOperand(2).getImm();
4774 Register ShiftSrc;
4775 int64_t ShiftImm;
4776 if (!mi_match(
4777 Src, MRI,
4778 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4779 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4780 return false;
4781 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4782 return false;
4783
4784 MatchInfo = [=](MachineIRBuilder &B) {
4785 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4786 auto Cst2 = B.buildConstant(ExtractTy, Width);
4787 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4788 };
4789 return true;
4790}
4791
4792/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4794 BuildFnTy &MatchInfo) const {
4795 GAnd *And = cast<GAnd>(&MI);
4796 Register Dst = And->getReg(0);
4797 LLT Ty = MRI.getType(Dst);
4799 // Note that isLegalOrBeforeLegalizer is stricter and does not take custom
4800 // into account.
4801 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4802 return false;
4803
4804 int64_t AndImm, LSBImm;
4805 Register ShiftSrc;
4806 const unsigned Size = Ty.getScalarSizeInBits();
4807 if (!mi_match(And->getReg(0), MRI,
4808 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4809 m_ICst(AndImm))))
4810 return false;
4811
4812 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4813 auto MaybeMask = static_cast<uint64_t>(AndImm);
4814 if (MaybeMask & (MaybeMask + 1))
4815 return false;
4816
4817 // LSB must fit within the register.
4818 if (static_cast<uint64_t>(LSBImm) >= Size)
4819 return false;
4820
4821 uint64_t Width = APInt(Size, AndImm).countr_one();
4822 MatchInfo = [=](MachineIRBuilder &B) {
4823 auto WidthCst = B.buildConstant(ExtractTy, Width);
4824 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4825 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4826 };
4827 return true;
4828}
4829
4832 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4833 const unsigned Opcode = MI.getOpcode();
4834 assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4835
4836 const Register Dst = MI.getOperand(0).getReg();
4837
4838 const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4839 ? TargetOpcode::G_SBFX
4840 : TargetOpcode::G_UBFX;
4841
4842 // Check if the type we would use for the extract is legal
4843 LLT Ty = MRI.getType(Dst);
4845 if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4846 return false;
4847
4848 Register ShlSrc;
4849 int64_t ShrAmt;
4850 int64_t ShlAmt;
4851 const unsigned Size = Ty.getScalarSizeInBits();
4852
4853 // Try to match shr (shl x, c1), c2
4854 if (!mi_match(Dst, MRI,
4855 m_BinOp(Opcode,
4856 m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4857 m_ICst(ShrAmt))))
4858 return false;
4859
4860 // Make sure that the shift sizes can fit a bitfield extract
4861 if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4862 return false;
4863
4864 // Skip this combine if the G_SEXT_INREG combine could handle it
4865 if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4866 return false;
4867
4868 // Calculate start position and width of the extract
4869 const int64_t Pos = ShrAmt - ShlAmt;
4870 const int64_t Width = Size - ShrAmt;
4871
4872 MatchInfo = [=](MachineIRBuilder &B) {
4873 auto WidthCst = B.buildConstant(ExtractTy, Width);
4874 auto PosCst = B.buildConstant(ExtractTy, Pos);
4875 B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4876 };
4877 return true;
4878}
4879
4882 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
4883 const unsigned Opcode = MI.getOpcode();
4884 assert(Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_ASHR);
4885
4886 const Register Dst = MI.getOperand(0).getReg();
4887 LLT Ty = MRI.getType(Dst);
4889 if (LI && !LI->isLegalOrCustom({TargetOpcode::G_UBFX, {Ty, ExtractTy}}))
4890 return false;
4891
4892 // Try to match shr (and x, c1), c2
4893 Register AndSrc;
4894 int64_t ShrAmt;
4895 int64_t SMask;
4896 if (!mi_match(Dst, MRI,
4897 m_BinOp(Opcode,
4898 m_OneNonDBGUse(m_GAnd(m_Reg(AndSrc), m_ICst(SMask))),
4899 m_ICst(ShrAmt))))
4900 return false;
4901
4902 const unsigned Size = Ty.getScalarSizeInBits();
4903 if (ShrAmt < 0 || ShrAmt >= Size)
4904 return false;
4905
4906 // If the shift subsumes the mask, emit the 0 directly.
4907 if (0 == (SMask >> ShrAmt)) {
4908 MatchInfo = [=](MachineIRBuilder &B) {
4909 B.buildConstant(Dst, 0);
4910 };
4911 return true;
4912 }
4913
4914 // Check that ubfx can do the extraction, with no holes in the mask.
4915 uint64_t UMask = SMask;
4916 UMask |= maskTrailingOnes<uint64_t>(ShrAmt);
4918 if (!isMask_64(UMask))
4919 return false;
4920
4921 // Calculate start position and width of the extract.
4922 const int64_t Pos = ShrAmt;
4923 const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
4924
4925 // It's preferable to keep the shift, rather than form G_SBFX.
4926 // TODO: remove the G_AND via demanded bits analysis.
4927 if (Opcode == TargetOpcode::G_ASHR && Width + ShrAmt == Size)
4928 return false;
4929
4930 MatchInfo = [=](MachineIRBuilder &B) {
4931 auto WidthCst = B.buildConstant(ExtractTy, Width);
4932 auto PosCst = B.buildConstant(ExtractTy, Pos);
4933 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {AndSrc, PosCst, WidthCst});
4934 };
4935 return true;
4936}
4937
4938bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4939 MachineInstr &MI) const {
4940 auto &PtrAdd = cast<GPtrAdd>(MI);
4941
4942 Register Src1Reg = PtrAdd.getBaseReg();
4943 auto *Src1Def = getOpcodeDef<GPtrAdd>(Src1Reg, MRI);
4944 if (!Src1Def)
4945 return false;
4946
4947 Register Src2Reg = PtrAdd.getOffsetReg();
4948
4949 if (MRI.hasOneNonDBGUse(Src1Reg))
4950 return false;
4951
4952 auto C1 = getIConstantVRegVal(Src1Def->getOffsetReg(), MRI);
4953 if (!C1)
4954 return false;
4955 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
4956 if (!C2)
4957 return false;
4958
4959 const APInt &C1APIntVal = *C1;
4960 const APInt &C2APIntVal = *C2;
4961 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4962
4963 for (auto &UseMI : MRI.use_nodbg_instructions(PtrAdd.getReg(0))) {
4964 // This combine may end up running before ptrtoint/inttoptr combines
4965 // manage to eliminate redundant conversions, so try to look through them.
4966 MachineInstr *ConvUseMI = &UseMI;
4967 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4968 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4969 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4970 Register DefReg = ConvUseMI->getOperand(0).getReg();
4971 if (!MRI.hasOneNonDBGUse(DefReg))
4972 break;
4973 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4974 ConvUseOpc = ConvUseMI->getOpcode();
4975 }
4976 auto *LdStMI = dyn_cast<GLoadStore>(ConvUseMI);
4977 if (!LdStMI)
4978 continue;
4979 // Is x[offset2] already not a legal addressing mode? If so then
4980 // reassociating the constants breaks nothing (we test offset2 because
4981 // that's the one we hope to fold into the load or store).
4982 TargetLoweringBase::AddrMode AM;
4983 AM.HasBaseReg = true;
4984 AM.BaseOffs = C2APIntVal.getSExtValue();
4985 unsigned AS = MRI.getType(LdStMI->getPointerReg()).getAddressSpace();
4986 Type *AccessTy = getTypeForLLT(LdStMI->getMMO().getMemoryType(),
4987 PtrAdd.getMF()->getFunction().getContext());
4988 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4989 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4990 AccessTy, AS))
4991 continue;
4992
4993 // Would x[offset1+offset2] still be a legal addressing mode?
4994 AM.BaseOffs = CombinedValue;
4995 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4996 AccessTy, AS))
4997 return true;
4998 }
4999
5000 return false;
5001}
5002
5004 MachineInstr *RHS,
5005 BuildFnTy &MatchInfo) const {
5006 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5007 Register Src1Reg = MI.getOperand(1).getReg();
5008 if (RHS->getOpcode() != TargetOpcode::G_ADD)
5009 return false;
5010 auto C2 = getIConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
5011 if (!C2)
5012 return false;
5013
5014 // If both additions are nuw, the reassociated additions are also nuw.
5015 // If the original G_PTR_ADD is additionally nusw, X and C are both not
5016 // negative, so BASE+X is between BASE and BASE+(X+C). The new G_PTR_ADDs are
5017 // therefore also nusw.
5018 // If the original G_PTR_ADD is additionally inbounds (which implies nusw),
5019 // the new G_PTR_ADDs are then also inbounds.
5020 unsigned PtrAddFlags = MI.getFlags();
5021 unsigned AddFlags = RHS->getFlags();
5022 bool IsNoUWrap = PtrAddFlags & AddFlags & MachineInstr::MIFlag::NoUWrap;
5023 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::NoUSWrap);
5024 bool IsInBounds = IsNoUWrap && (PtrAddFlags & MachineInstr::MIFlag::InBounds);
5025 unsigned Flags = 0;
5026 if (IsNoUWrap)
5028 if (IsNoUSWrap)
5030 if (IsInBounds)
5032
5033 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5034 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
5035
5036 auto NewBase =
5037 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg(), Flags);
5038 Observer.changingInstr(MI);
5039 MI.getOperand(1).setReg(NewBase.getReg(0));
5040 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
5041 MI.setFlags(Flags);
5042 Observer.changedInstr(MI);
5043 };
5044 return !reassociationCanBreakAddressingModePattern(MI);
5045}
5046
5048 MachineInstr *LHS,
5049 MachineInstr *RHS,
5050 BuildFnTy &MatchInfo) const {
5051 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> (G_PTR_ADD (G_PTR_ADD(X, Y), C)
5052 // if and only if (G_PTR_ADD X, C) has one use.
5053 Register LHSBase;
5054 std::optional<ValueAndVReg> LHSCstOff;
5055 if (!mi_match(MI.getBaseReg(), MRI,
5056 m_OneNonDBGUse(m_GPtrAdd(m_Reg(LHSBase), m_GCst(LHSCstOff)))))
5057 return false;
5058
5059 auto *LHSPtrAdd = cast<GPtrAdd>(LHS);
5060
5061 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5062 // nuw and inbounds (which implies nusw), the offsets are both non-negative,
5063 // so the new G_PTR_ADDs are also inbounds.
5064 unsigned PtrAddFlags = MI.getFlags();
5065 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5066 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5067 bool IsNoUSWrap = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5069 bool IsInBounds = IsNoUWrap && (PtrAddFlags & LHSPtrAddFlags &
5071 unsigned Flags = 0;
5072 if (IsNoUWrap)
5074 if (IsNoUSWrap)
5076 if (IsInBounds)
5078
5079 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5080 // When we change LHSPtrAdd's offset register we might cause it to use a reg
5081 // before its def. Sink the instruction so the outer PTR_ADD to ensure this
5082 // doesn't happen.
5083 LHSPtrAdd->moveBefore(&MI);
5084 Register RHSReg = MI.getOffsetReg();
5085 // set VReg will cause type mismatch if it comes from extend/trunc
5086 auto NewCst = B.buildConstant(MRI.getType(RHSReg), LHSCstOff->Value);
5087 Observer.changingInstr(MI);
5088 MI.getOperand(2).setReg(NewCst.getReg(0));
5089 MI.setFlags(Flags);
5090 Observer.changedInstr(MI);
5091 Observer.changingInstr(*LHSPtrAdd);
5092 LHSPtrAdd->getOperand(2).setReg(RHSReg);
5093 LHSPtrAdd->setFlags(Flags);
5094 Observer.changedInstr(*LHSPtrAdd);
5095 };
5096 return !reassociationCanBreakAddressingModePattern(MI);
5097}
5098
5100 GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS,
5101 BuildFnTy &MatchInfo) const {
5102 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5103 auto *LHSPtrAdd = dyn_cast<GPtrAdd>(LHS);
5104 if (!LHSPtrAdd)
5105 return false;
5106
5107 Register Src2Reg = MI.getOperand(2).getReg();
5108 Register LHSSrc1 = LHSPtrAdd->getBaseReg();
5109 Register LHSSrc2 = LHSPtrAdd->getOffsetReg();
5110 auto C1 = getIConstantVRegVal(LHSSrc2, MRI);
5111 if (!C1)
5112 return false;
5113 auto C2 = getIConstantVRegVal(Src2Reg, MRI);
5114 if (!C2)
5115 return false;
5116
5117 // Reassociating nuw additions preserves nuw. If both original G_PTR_ADDs are
5118 // inbounds, reaching the same result in one G_PTR_ADD is also inbounds.
5119 // The nusw constraints are satisfied because imm1+imm2 cannot exceed the
5120 // largest signed integer that fits into the index type, which is the maximum
5121 // size of allocated objects according to the IR Language Reference.
5122 unsigned PtrAddFlags = MI.getFlags();
5123 unsigned LHSPtrAddFlags = LHSPtrAdd->getFlags();
5124 bool IsNoUWrap = PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::NoUWrap;
5125 bool IsInBounds =
5126 PtrAddFlags & LHSPtrAddFlags & MachineInstr::MIFlag::InBounds;
5127 unsigned Flags = 0;
5128 if (IsNoUWrap)
5130 if (IsInBounds) {
5133 }
5134
5135 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5136 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
5137 Observer.changingInstr(MI);
5138 MI.getOperand(1).setReg(LHSSrc1);
5139 MI.getOperand(2).setReg(NewCst.getReg(0));
5140 MI.setFlags(Flags);
5141 Observer.changedInstr(MI);
5142 };
5143 return !reassociationCanBreakAddressingModePattern(MI);
5144}
5145
5147 BuildFnTy &MatchInfo) const {
5148 auto &PtrAdd = cast<GPtrAdd>(MI);
5149 // We're trying to match a few pointer computation patterns here for
5150 // re-association opportunities.
5151 // 1) Isolating a constant operand to be on the RHS, e.g.:
5152 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
5153 //
5154 // 2) Folding two constants in each sub-tree as long as such folding
5155 // doesn't break a legal addressing mode.
5156 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
5157 //
5158 // 3) Move a constant from the LHS of an inner op to the RHS of the outer.
5159 // G_PTR_ADD (G_PTR_ADD X, C), Y) -> G_PTR_ADD (G_PTR_ADD(X, Y), C)
5160 // iif (G_PTR_ADD X, C) has one use.
5161 MachineInstr *LHS = MRI.getVRegDef(PtrAdd.getBaseReg());
5162 MachineInstr *RHS = MRI.getVRegDef(PtrAdd.getOffsetReg());
5163
5164 // Try to match example 2.
5165 if (matchReassocFoldConstantsInSubTree(PtrAdd, LHS, RHS, MatchInfo))
5166 return true;
5167
5168 // Try to match example 3.
5169 if (matchReassocConstantInnerLHS(PtrAdd, LHS, RHS, MatchInfo))
5170 return true;
5171
5172 // Try to match example 1.
5173 if (matchReassocConstantInnerRHS(PtrAdd, RHS, MatchInfo))
5174 return true;
5175
5176 return false;
5177}
5179 Register OpLHS, Register OpRHS,
5180 BuildFnTy &MatchInfo) const {
5181 LLT OpRHSTy = MRI.getType(OpRHS);
5182 MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
5183
5184 if (OpLHSDef->getOpcode() != Opc)
5185 return false;
5186
5187 MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
5188 Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
5189 Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
5190
5191 // If the inner op is (X op C), pull the constant out so it can be folded with
5192 // other constants in the expression tree. Folding is not guaranteed so we
5193 // might have (C1 op C2). In that case do not pull a constant out because it
5194 // won't help and can lead to infinite loops.
5195 if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
5196 !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
5197 if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
5198 // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
5199 MatchInfo = [=](MachineIRBuilder &B) {
5200 auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
5201 B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
5202 };
5203 return true;
5204 }
5205 if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
5206 // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
5207 // iff (op x, c1) has one use
5208 MatchInfo = [=](MachineIRBuilder &B) {
5209 auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
5210 B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
5211 };
5212 return true;
5213 }
5214 }
5215
5216 return false;
5217}
5218
5220 BuildFnTy &MatchInfo) const {
5221 // We don't check if the reassociation will break a legal addressing mode
5222 // here since pointer arithmetic is handled by G_PTR_ADD.
5223 unsigned Opc = MI.getOpcode();
5224 Register DstReg = MI.getOperand(0).getReg();
5225 Register LHSReg = MI.getOperand(1).getReg();
5226 Register RHSReg = MI.getOperand(2).getReg();
5227
5228 if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
5229 return true;
5230 if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
5231 return true;
5232 return false;
5233}
5234
5236 APInt &MatchInfo) const {
5237 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5238 Register SrcOp = MI.getOperand(1).getReg();
5239
5240 if (auto MaybeCst = ConstantFoldCastOp(MI.getOpcode(), DstTy, SrcOp, MRI)) {
5241 MatchInfo = *MaybeCst;
5242 return true;
5243 }
5244
5245 return false;
5246}
5247
5249 APInt &MatchInfo) const {
5250 Register Op1 = MI.getOperand(1).getReg();
5251 Register Op2 = MI.getOperand(2).getReg();
5252 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
5253 if (!MaybeCst)
5254 return false;
5255 MatchInfo = *MaybeCst;
5256 return true;
5257}
5258
5260 ConstantFP *&MatchInfo) const {
5261 Register Op1 = MI.getOperand(1).getReg();
5262 Register Op2 = MI.getOperand(2).getReg();
5263 auto MaybeCst = ConstantFoldFPBinOp(MI.getOpcode(), Op1, Op2, MRI);
5264 if (!MaybeCst)
5265 return false;
5266 MatchInfo =
5267 ConstantFP::get(MI.getMF()->getFunction().getContext(), *MaybeCst);
5268 return true;
5269}
5270
5272 ConstantFP *&MatchInfo) const {
5273 assert(MI.getOpcode() == TargetOpcode::G_FMA ||
5274 MI.getOpcode() == TargetOpcode::G_FMAD);
5275 auto [_, Op1, Op2, Op3] = MI.getFirst4Regs();
5276
5277 const ConstantFP *Op3Cst = getConstantFPVRegVal(Op3, MRI);
5278 if (!Op3Cst)
5279 return false;
5280
5281 const ConstantFP *Op2Cst = getConstantFPVRegVal(Op2, MRI);
5282 if (!Op2Cst)
5283 return false;
5284
5285 const ConstantFP *Op1Cst = getConstantFPVRegVal(Op1, MRI);
5286 if (!Op1Cst)
5287 return false;
5288
5289 APFloat Op1F = Op1Cst->getValueAPF();
5290 Op1F.fusedMultiplyAdd(Op2Cst->getValueAPF(), Op3Cst->getValueAPF(),
5292 MatchInfo = ConstantFP::get(MI.getMF()->getFunction().getContext(), Op1F);
5293 return true;
5294}
5295
5298 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
5299 // Look for a binop feeding into an AND with a mask:
5300 //
5301 // %add = G_ADD %lhs, %rhs
5302 // %and = G_AND %add, 000...11111111
5303 //
5304 // Check if it's possible to perform the binop at a narrower width and zext
5305 // back to the original width like so:
5306 //
5307 // %narrow_lhs = G_TRUNC %lhs
5308 // %narrow_rhs = G_TRUNC %rhs
5309 // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
5310 // %new_add = G_ZEXT %narrow_add
5311 // %and = G_AND %new_add, 000...11111111
5312 //
5313 // This can allow later combines to eliminate the G_AND if it turns out
5314 // that the mask is irrelevant.
5315 assert(MI.getOpcode() == TargetOpcode::G_AND);
5316 Register Dst = MI.getOperand(0).getReg();
5317 Register AndLHS = MI.getOperand(1).getReg();
5318 Register AndRHS = MI.getOperand(2).getReg();
5319 LLT WideTy = MRI.getType(Dst);
5320
5321 // If the potential binop has more than one use, then it's possible that one
5322 // of those uses will need its full width.
5323 if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
5324 return false;
5325
5326 // Check if the LHS feeding the AND is impacted by the high bits that we're
5327 // masking out.
5328 //
5329 // e.g. for 64-bit x, y:
5330 //
5331 // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
5332 MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
5333 if (!LHSInst)
5334 return false;
5335 unsigned LHSOpc = LHSInst->getOpcode();
5336 switch (LHSOpc) {
5337 default:
5338 return false;
5339 case TargetOpcode::G_ADD:
5340 case TargetOpcode::G_SUB:
5341 case TargetOpcode::G_MUL:
5342 case TargetOpcode::G_AND:
5343 case TargetOpcode::G_OR:
5344 case TargetOpcode::G_XOR:
5345 break;
5346 }
5347
5348 // Find the mask on the RHS.
5349 auto Cst = getIConstantVRegValWithLookThrough(AndRHS, MRI);
5350 if (!Cst)
5351 return false;
5352 auto Mask = Cst->Value;
5353 if (!Mask.isMask())
5354 return false;
5355
5356 // No point in combining if there's nothing to truncate.
5357 unsigned NarrowWidth = Mask.countr_one();
5358 if (NarrowWidth == WideTy.getSizeInBits())
5359 return false;
5360 LLT NarrowTy = LLT::scalar(NarrowWidth);
5361
5362 // Check if adding the zext + truncates could be harmful.
5363 auto &MF = *MI.getMF();
5364 const auto &TLI = getTargetLowering();
5365 LLVMContext &Ctx = MF.getFunction().getContext();
5366 if (!TLI.isTruncateFree(WideTy, NarrowTy, Ctx) ||
5367 !TLI.isZExtFree(NarrowTy, WideTy, Ctx))
5368 return false;
5369 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
5370 !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
5371 return false;
5372 Register BinOpLHS = LHSInst->getOperand(1).getReg();
5373 Register BinOpRHS = LHSInst->getOperand(2).getReg();
5374 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5375 auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
5376 auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
5377 auto NarrowBinOp =
5378 Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
5379 auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
5380 Observer.changingInstr(MI);
5381 MI.getOperand(1).setReg(Ext.getReg(0));
5382 Observer.changedInstr(MI);
5383 };
5384 return true;
5385}
5386
5388 BuildFnTy &MatchInfo) const {
5389 unsigned Opc = MI.getOpcode();
5390 assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
5391
5392 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(2)))
5393 return false;
5394
5395 MatchInfo = [=, &MI](MachineIRBuilder &B) {
5396 Observer.changingInstr(MI);
5397 unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
5398 : TargetOpcode::G_SADDO;
5399 MI.setDesc(Builder.getTII().get(NewOpc));
5400 MI.getOperand(3).setReg(MI.getOperand(2).getReg());
5401 Observer.changedInstr(MI);
5402 };
5403 return true;
5404}
5405
5407 BuildFnTy &MatchInfo) const {
5408 // (G_*MULO x, 0) -> 0 + no carry out
5409 assert(MI.getOpcode() == TargetOpcode::G_UMULO ||
5410 MI.getOpcode() == TargetOpcode::G_SMULO);
5411 if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICstOrSplat(0)))
5412 return false;
5413 Register Dst = MI.getOperand(0).getReg();
5414 Register Carry = MI.getOperand(1).getReg();
5415 if (!isConstantLegalOrBeforeLegalizer(MRI.getType(Dst)) ||
5416 !isConstantLegalOrBeforeLegalizer(MRI.getType(Carry)))
5417 return false;
5418 MatchInfo = [=](MachineIRBuilder &B) {
5419 B.buildConstant(Dst, 0);
5420 B.buildConstant(Carry, 0);
5421 };
5422 return true;
5423}
5424
5426 BuildFnTy &MatchInfo) const {
5427 // (G_*ADDE x, y, 0) -> (G_*ADDO x, y)
5428 // (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
5429 assert(MI.getOpcode() == TargetOpcode::G_UADDE ||
5430 MI.getOpcode() == TargetOpcode::G_SADDE ||
5431 MI.getOpcode() == TargetOpcode::G_USUBE ||
5432 MI.getOpcode() == TargetOpcode::G_SSUBE);
5433 if (!mi_match(MI.getOperand(4).getReg(), MRI, m_SpecificICstOrSplat(0)))
5434 return false;
5435 MatchInfo = [&](MachineIRBuilder &B) {
5436 unsigned NewOpcode;
5437 switch (MI.getOpcode()) {
5438 case TargetOpcode::G_UADDE:
5439 NewOpcode = TargetOpcode::G_UADDO;
5440 break;
5441 case TargetOpcode::G_SADDE:
5442 NewOpcode = TargetOpcode::G_SADDO;
5443 break;
5444 case TargetOpcode::G_USUBE:
5445 NewOpcode = TargetOpcode::G_USUBO;
5446 break;
5447 case TargetOpcode::G_SSUBE:
5448 NewOpcode = TargetOpcode::G_SSUBO;
5449 break;
5450 }
5451 Observer.changingInstr(MI);
5452 MI.setDesc(B.getTII().get(NewOpcode));
5453 MI.removeOperand(4);
5454 Observer.changedInstr(MI);
5455 };
5456 return true;
5457}
5458
5460 BuildFnTy &MatchInfo) const {
5461 assert(MI.getOpcode() == TargetOpcode::G_SUB);
5462 Register Dst = MI.getOperand(0).getReg();
5463 // (x + y) - z -> x (if y == z)
5464 // (x + y) - z -> y (if x == z)
5465 Register X, Y, Z;
5466 if (mi_match(Dst, MRI, m_GSub(m_GAdd(m_Reg(X), m_Reg(Y)), m_Reg(Z)))) {
5467 Register ReplaceReg;
5468 int64_t CstX, CstY;
5469 if (Y == Z || (mi_match(Y, MRI, m_ICstOrSplat(CstY)) &&
5471 ReplaceReg = X;
5472 else if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5474 ReplaceReg = Y;
5475 if (ReplaceReg) {
5476 MatchInfo = [=](MachineIRBuilder &B) { B.buildCopy(Dst, ReplaceReg); };
5477 return true;
5478 }
5479 }
5480
5481 // x - (y + z) -> 0 - y (if x == z)
5482 // x - (y + z) -> 0 - z (if x == y)
5483 if (mi_match(Dst, MRI, m_GSub(m_Reg(X), m_GAdd(m_Reg(Y), m_Reg(Z))))) {
5484 Register ReplaceReg;
5485 int64_t CstX;
5486 if (X == Z || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5488 ReplaceReg = Y;
5489 else if (X == Y || (mi_match(X, MRI, m_ICstOrSplat(CstX)) &&
5491 ReplaceReg = Z;
5492 if (ReplaceReg) {
5493 MatchInfo = [=](MachineIRBuilder &B) {
5494 auto Zero = B.buildConstant(MRI.getType(Dst), 0);
5495 B.buildSub(Dst, Zero, ReplaceReg);
5496 };
5497 return true;
5498 }
5499 }
5500 return false;
5501}
5502
5504 unsigned Opcode = MI.getOpcode();
5505 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5506 auto &UDivorRem = cast<GenericMachineInstr>(MI);
5507 Register Dst = UDivorRem.getReg(0);
5508 Register LHS = UDivorRem.getReg(1);
5509 Register RHS = UDivorRem.getReg(2);
5510 LLT Ty = MRI.getType(Dst);
5511 LLT ScalarTy = Ty.getScalarType();
5512 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5514 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5515
5516 auto &MIB = Builder;
5517
5518 bool UseSRL = false;
5519 SmallVector<Register, 16> Shifts, Factors;
5520 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5521 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5522
5523 auto BuildExactUDIVPattern = [&](const Constant *C) {
5524 // Don't recompute inverses for each splat element.
5525 if (IsSplat && !Factors.empty()) {
5526 Shifts.push_back(Shifts[0]);
5527 Factors.push_back(Factors[0]);
5528 return true;
5529 }
5530
5531 auto *CI = cast<ConstantInt>(C);
5532 APInt Divisor = CI->getValue();
5533 unsigned Shift = Divisor.countr_zero();
5534 if (Shift) {
5535 Divisor.lshrInPlace(Shift);
5536 UseSRL = true;
5537 }
5538
5539 // Calculate the multiplicative inverse modulo BW.
5540 APInt Factor = Divisor.multiplicativeInverse();
5541 Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5542 Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5543 return true;
5544 };
5545
5546 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5547 // Collect all magic values from the build vector.
5548 if (!matchUnaryPredicate(MRI, RHS, BuildExactUDIVPattern))
5549 llvm_unreachable("Expected unary predicate match to succeed");
5550
5551 Register Shift, Factor;
5552 if (Ty.isVector()) {
5553 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5554 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5555 } else {
5556 Shift = Shifts[0];
5557 Factor = Factors[0];
5558 }
5559
5560 Register Res = LHS;
5561
5562 if (UseSRL)
5563 Res = MIB.buildLShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5564
5565 return MIB.buildMul(Ty, Res, Factor);
5566 }
5567
5568 unsigned KnownLeadingZeros =
5569 VT ? VT->getKnownBits(LHS).countMinLeadingZeros() : 0;
5570
5571 bool UseNPQ = false;
5572 SmallVector<Register, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
5573 auto BuildUDIVPattern = [&](const Constant *C) {
5574 auto *CI = cast<ConstantInt>(C);
5575 const APInt &Divisor = CI->getValue();
5576
5577 bool SelNPQ = false;
5578 APInt Magic(Divisor.getBitWidth(), 0);
5579 unsigned PreShift = 0, PostShift = 0;
5580
5581 // Magic algorithm doesn't work for division by 1. We need to emit a select
5582 // at the end.
5583 // TODO: Use undef values for divisor of 1.
5584 if (!Divisor.isOne()) {
5585
5586 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros
5587 // in the dividend exceeds the leading zeros for the divisor.
5590 Divisor, std::min(KnownLeadingZeros, Divisor.countl_zero()));
5591
5592 Magic = std::move(magics.Magic);
5593
5594 assert(magics.PreShift < Divisor.getBitWidth() &&
5595 "We shouldn't generate an undefined shift!");
5596 assert(magics.PostShift < Divisor.getBitWidth() &&
5597 "We shouldn't generate an undefined shift!");
5598 assert((!magics.IsAdd || magics.PreShift == 0) && "Unexpected pre-shift");
5599 PreShift = magics.PreShift;
5600 PostShift = magics.PostShift;
5601 SelNPQ = magics.IsAdd;
5602 }
5603
5604 PreShifts.push_back(
5605 MIB.buildConstant(ScalarShiftAmtTy, PreShift).getReg(0));
5606 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magic).getReg(0));
5607 NPQFactors.push_back(
5608 MIB.buildConstant(ScalarTy,
5609 SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
5610 : APInt::getZero(EltBits))
5611 .getReg(0));
5612 PostShifts.push_back(
5613 MIB.buildConstant(ScalarShiftAmtTy, PostShift).getReg(0));
5614 UseNPQ |= SelNPQ;
5615 return true;
5616 };
5617
5618 // Collect the shifts/magic values from each element.
5619 bool Matched = matchUnaryPredicate(MRI, RHS, BuildUDIVPattern);
5620 (void)Matched;
5621 assert(Matched && "Expected unary predicate match to succeed");
5622
5623 Register PreShift, PostShift, MagicFactor, NPQFactor;
5624 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5625 if (RHSDef) {
5626 PreShift = MIB.buildBuildVector(ShiftAmtTy, PreShifts).getReg(0);
5627 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5628 NPQFactor = MIB.buildBuildVector(Ty, NPQFactors).getReg(0);
5629 PostShift = MIB.buildBuildVector(ShiftAmtTy, PostShifts).getReg(0);
5630 } else {
5631 assert(MRI.getType(RHS).isScalar() &&
5632 "Non-build_vector operation should have been a scalar");
5633 PreShift = PreShifts[0];
5634 MagicFactor = MagicFactors[0];
5635 PostShift = PostShifts[0];
5636 }
5637
5638 Register Q = LHS;
5639 Q = MIB.buildLShr(Ty, Q, PreShift).getReg(0);
5640
5641 // Multiply the numerator (operand 0) by the magic value.
5642 Q = MIB.buildUMulH(Ty, Q, MagicFactor).getReg(0);
5643
5644 if (UseNPQ) {
5645 Register NPQ = MIB.buildSub(Ty, LHS, Q).getReg(0);
5646
5647 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5648 // G_UMULH to act as a SRL-by-1 for NPQ, else multiply by zero.
5649 if (Ty.isVector())
5650 NPQ = MIB.buildUMulH(Ty, NPQ, NPQFactor).getReg(0);
5651 else
5652 NPQ = MIB.buildLShr(Ty, NPQ, MIB.buildConstant(ShiftAmtTy, 1)).getReg(0);
5653
5654 Q = MIB.buildAdd(Ty, NPQ, Q).getReg(0);
5655 }
5656
5657 Q = MIB.buildLShr(Ty, Q, PostShift).getReg(0);
5658 auto One = MIB.buildConstant(Ty, 1);
5659 auto IsOne = MIB.buildICmp(
5661 Ty.isScalar() ? LLT::scalar(1) : Ty.changeElementSize(1), RHS, One);
5662 auto ret = MIB.buildSelect(Ty, IsOne, LHS, Q);
5663
5664 if (Opcode == TargetOpcode::G_UREM) {
5665 auto Prod = MIB.buildMul(Ty, ret, RHS);
5666 return MIB.buildSub(Ty, LHS, Prod);
5667 }
5668 return ret;
5669}
5670
5672 unsigned Opcode = MI.getOpcode();
5673 assert(Opcode == TargetOpcode::G_UDIV || Opcode == TargetOpcode::G_UREM);
5674 Register Dst = MI.getOperand(0).getReg();
5675 Register RHS = MI.getOperand(2).getReg();
5676 LLT DstTy = MRI.getType(Dst);
5677
5678 auto &MF = *MI.getMF();
5679 AttributeList Attr = MF.getFunction().getAttributes();
5680 const auto &TLI = getTargetLowering();
5681 LLVMContext &Ctx = MF.getFunction().getContext();
5682 if (DstTy.getScalarSizeInBits() == 1 ||
5683 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5684 return false;
5685
5686 // Don't do this for minsize because the instruction sequence is usually
5687 // larger.
5688 if (MF.getFunction().hasMinSize())
5689 return false;
5690
5691 if (Opcode == TargetOpcode::G_UDIV &&
5693 return matchUnaryPredicate(
5694 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5695 }
5696
5697 auto *RHSDef = MRI.getVRegDef(RHS);
5698 if (!isConstantOrConstantVector(*RHSDef, MRI))
5699 return false;
5700
5701 // Don't do this if the types are not going to be legal.
5702 if (LI) {
5703 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5704 return false;
5705 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMULH, {DstTy}}))
5706 return false;
5708 {TargetOpcode::G_ICMP,
5709 {DstTy.isVector() ? DstTy.changeElementSize(1) : LLT::scalar(1),
5710 DstTy}}))
5711 return false;
5712 if (Opcode == TargetOpcode::G_UREM &&
5713 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5714 return false;
5715 }
5716
5717 return matchUnaryPredicate(
5718 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5719}
5720
5722 auto *NewMI = buildUDivOrURemUsingMul(MI);
5723 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5724}
5725
5727 unsigned Opcode = MI.getOpcode();
5728 assert(Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM);
5729 Register Dst = MI.getOperand(0).getReg();
5730 Register RHS = MI.getOperand(2).getReg();
5731 LLT DstTy = MRI.getType(Dst);
5732 auto SizeInBits = DstTy.getScalarSizeInBits();
5733 LLT WideTy = DstTy.changeElementSize(SizeInBits * 2);
5734
5735 auto &MF = *MI.getMF();
5736 AttributeList Attr = MF.getFunction().getAttributes();
5737 const auto &TLI = getTargetLowering();
5738 LLVMContext &Ctx = MF.getFunction().getContext();
5739 if (DstTy.getScalarSizeInBits() < 3 ||
5740 TLI.isIntDivCheap(getApproximateEVTForLLT(DstTy, Ctx), Attr))
5741 return false;
5742
5743 // Don't do this for minsize because the instruction sequence is usually
5744 // larger.
5745 if (MF.getFunction().hasMinSize())
5746 return false;
5747
5748 // If the sdiv has an 'exact' flag we can use a simpler lowering.
5749 if (Opcode == TargetOpcode::G_SDIV &&
5751 return matchUnaryPredicate(
5752 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5753 }
5754
5755 auto *RHSDef = MRI.getVRegDef(RHS);
5756 if (!isConstantOrConstantVector(*RHSDef, MRI))
5757 return false;
5758
5759 // Don't do this if the types are not going to be legal.
5760 if (LI) {
5761 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_MUL, {DstTy, DstTy}}))
5762 return false;
5763 if (!isLegal({TargetOpcode::G_SMULH, {DstTy}}) &&
5764 !isLegalOrHasWidenScalar({TargetOpcode::G_MUL, {WideTy, WideTy}}))
5765 return false;
5766 if (Opcode == TargetOpcode::G_SREM &&
5767 !isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy, DstTy}}))
5768 return false;
5769 }
5770
5771 return matchUnaryPredicate(
5772 MRI, RHS, [](const Constant *C) { return C && !C->isNullValue(); });
5773}
5774
5776 auto *NewMI = buildSDivOrSRemUsingMul(MI);
5777 replaceSingleDefInstWithReg(MI, NewMI->getOperand(0).getReg());
5778}
5779
5781 unsigned Opcode = MI.getOpcode();
5782 assert(MI.getOpcode() == TargetOpcode::G_SDIV ||
5783 Opcode == TargetOpcode::G_SREM);
5784 auto &SDivorRem = cast<GenericMachineInstr>(MI);
5785 Register Dst = SDivorRem.getReg(0);
5786 Register LHS = SDivorRem.getReg(1);
5787 Register RHS = SDivorRem.getReg(2);
5788 LLT Ty = MRI.getType(Dst);
5789 LLT ScalarTy = Ty.getScalarType();
5790 const unsigned EltBits = ScalarTy.getScalarSizeInBits();
5792 LLT ScalarShiftAmtTy = ShiftAmtTy.getScalarType();
5793 auto &MIB = Builder;
5794
5795 bool UseSRA = false;
5796 SmallVector<Register, 16> ExactShifts, ExactFactors;
5797
5798 auto *RHSDefInstr = cast<GenericMachineInstr>(getDefIgnoringCopies(RHS, MRI));
5799 bool IsSplat = getIConstantSplatVal(*RHSDefInstr, MRI).has_value();
5800
5801 auto BuildExactSDIVPattern = [&](const Constant *C) {
5802 // Don't recompute inverses for each splat element.
5803 if (IsSplat && !ExactFactors.empty()) {
5804 ExactShifts.push_back(ExactShifts[0]);
5805 ExactFactors.push_back(ExactFactors[0]);
5806 return true;
5807 }
5808
5809 auto *CI = cast<ConstantInt>(C);
5810 APInt Divisor = CI->getValue();
5811 unsigned Shift = Divisor.countr_zero();
5812 if (Shift) {
5813 Divisor.ashrInPlace(Shift);
5814 UseSRA = true;
5815 }
5816
5817 // Calculate the multiplicative inverse modulo BW.
5818 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5819 APInt Factor = Divisor.multiplicativeInverse();
5820 ExactShifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0));
5821 ExactFactors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0));
5822 return true;
5823 };
5824
5825 if (MI.getFlag(MachineInstr::MIFlag::IsExact)) {
5826 // Collect all magic values from the build vector.
5827 bool Matched = matchUnaryPredicate(MRI, RHS, BuildExactSDIVPattern);
5828 (void)Matched;
5829 assert(Matched && "Expected unary predicate match to succeed");
5830
5831 Register Shift, Factor;
5832 if (Ty.isVector()) {
5833 Shift = MIB.buildBuildVector(ShiftAmtTy, ExactShifts).getReg(0);
5834 Factor = MIB.buildBuildVector(Ty, ExactFactors).getReg(0);
5835 } else {
5836 Shift = ExactShifts[0];
5837 Factor = ExactFactors[0];
5838 }
5839
5840 Register Res = LHS;
5841
5842 if (UseSRA)
5843 Res = MIB.buildAShr(Ty, Res, Shift, MachineInstr::IsExact).getReg(0);
5844
5845 return MIB.buildMul(Ty, Res, Factor);
5846 }
5847
5848 SmallVector<Register, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5849
5850 auto BuildSDIVPattern = [&](const Constant *C) {
5851 auto *CI = cast<ConstantInt>(C);
5852 const APInt &Divisor = CI->getValue();
5853
5856 int NumeratorFactor = 0;
5857 int ShiftMask = -1;
5858
5859 if (Divisor.isOne() || Divisor.isAllOnes()) {
5860 // If d is +1/-1, we just multiply the numerator by +1/-1.
5861 NumeratorFactor = Divisor.getSExtValue();
5862 Magics.Magic = 0;
5863 Magics.ShiftAmount = 0;
5864 ShiftMask = 0;
5865 } else if (Divisor.isStrictlyPositive() && Magics.Magic.isNegative()) {
5866 // If d > 0 and m < 0, add the numerator.
5867 NumeratorFactor = 1;
5868 } else if (Divisor.isNegative() && Magics.Magic.isStrictlyPositive()) {
5869 // If d < 0 and m > 0, subtract the numerator.
5870 NumeratorFactor = -1;
5871 }
5872
5873 MagicFactors.push_back(MIB.buildConstant(ScalarTy, Magics.Magic).getReg(0));
5874 Factors.push_back(MIB.buildConstant(ScalarTy, NumeratorFactor).getReg(0));
5875 Shifts.push_back(
5876 MIB.buildConstant(ScalarShiftAmtTy, Magics.ShiftAmount).getReg(0));
5877 ShiftMasks.push_back(MIB.buildConstant(ScalarTy, ShiftMask).getReg(0));
5878
5879 return true;
5880 };
5881
5882 // Collect the shifts/magic values from each element.
5883 bool Matched = matchUnaryPredicate(MRI, RHS, BuildSDIVPattern);
5884 (void)Matched;
5885 assert(Matched && "Expected unary predicate match to succeed");
5886
5887 Register MagicFactor, Factor, Shift, ShiftMask;
5888 auto *RHSDef = getOpcodeDef<GBuildVector>(RHS, MRI);
5889 if (RHSDef) {
5890 MagicFactor = MIB.buildBuildVector(Ty, MagicFactors).getReg(0);
5891 Factor = MIB.buildBuildVector(Ty, Factors).getReg(0);
5892 Shift = MIB.buildBuildVector(ShiftAmtTy, Shifts).getReg(0);
5893 ShiftMask = MIB.buildBuildVector(Ty, ShiftMasks).getReg(0);
5894 } else {
5895 assert(MRI.getType(RHS).isScalar() &&
5896 "Non-build_vector operation should have been a scalar");
5897 MagicFactor = MagicFactors[0];
5898 Factor = Factors[0];
5899 Shift = Shifts[0];
5900 ShiftMask = ShiftMasks[0];
5901 }
5902
5903 Register Q = LHS;
5904 Q = MIB.buildSMulH(Ty, LHS, MagicFactor).getReg(0);
5905
5906 // (Optionally) Add/subtract the numerator using Factor.
5907 Factor = MIB.buildMul(Ty, LHS, Factor).getReg(0);
5908 Q = MIB.buildAdd(Ty, Q, Factor).getReg(0);
5909
5910 // Shift right algebraic by shift value.
5911 Q = MIB.buildAShr(Ty, Q, Shift).getReg(0);
5912
5913 // Extract the sign bit, mask it and add it to the quotient.
5914 auto SignShift = MIB.buildConstant(ShiftAmtTy, EltBits - 1);
5915 auto T = MIB.buildLShr(Ty, Q, SignShift);
5916 T = MIB.buildAnd(Ty, T, ShiftMask);
5917 auto ret = MIB.buildAdd(Ty, Q, T);
5918
5919 if (Opcode == TargetOpcode::G_SREM) {
5920 auto Prod = MIB.buildMul(Ty, ret, RHS);
5921 return MIB.buildSub(Ty, LHS, Prod);
5922 }
5923 return ret;
5924}
5925
5927 assert((MI.getOpcode() == TargetOpcode::G_SDIV ||
5928 MI.getOpcode() == TargetOpcode::G_UDIV) &&
5929 "Expected SDIV or UDIV");
5930 auto &Div = cast<GenericMachineInstr>(MI);
5931 Register RHS = Div.getReg(2);
5932 auto MatchPow2 = [&](const Constant *C) {
5933 auto *CI = dyn_cast<ConstantInt>(C);
5934 return CI && (CI->getValue().isPowerOf2() ||
5935 (IsSigned && CI->getValue().isNegatedPowerOf2()));
5936 };
5937 return matchUnaryPredicate(MRI, RHS, MatchPow2, /*AllowUndefs=*/false);
5938}
5939
5941 assert(MI.getOpcode() == TargetOpcode::G_SDIV && "Expected SDIV");
5942 auto &SDiv = cast<GenericMachineInstr>(MI);
5943 Register Dst = SDiv.getReg(0);
5944 Register LHS = SDiv.getReg(1);
5945 Register RHS = SDiv.getReg(2);
5946 LLT Ty = MRI.getType(Dst);
5948 LLT CCVT =
5949 Ty.isVector() ? LLT::vector(Ty.getElementCount(), 1) : LLT::scalar(1);
5950
5951 // Effectively we want to lower G_SDIV %lhs, %rhs, where %rhs is a power of 2,
5952 // to the following version:
5953 //
5954 // %c1 = G_CTTZ %rhs
5955 // %inexact = G_SUB $bitwidth, %c1
5956 // %sign = %G_ASHR %lhs, $(bitwidth - 1)
5957 // %lshr = G_LSHR %sign, %inexact
5958 // %add = G_ADD %lhs, %lshr
5959 // %ashr = G_ASHR %add, %c1
5960 // %ashr = G_SELECT, %isoneorallones, %lhs, %ashr
5961 // %zero = G_CONSTANT $0
5962 // %neg = G_NEG %ashr
5963 // %isneg = G_ICMP SLT %rhs, %zero
5964 // %res = G_SELECT %isneg, %neg, %ashr
5965
5966 unsigned BitWidth = Ty.getScalarSizeInBits();
5967 auto Zero = Builder.buildConstant(Ty, 0);
5968
5969 auto Bits = Builder.buildConstant(ShiftAmtTy, BitWidth);
5970 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
5971 auto Inexact = Builder.buildSub(ShiftAmtTy, Bits, C1);
5972 // Splat the sign bit into the register
5973 auto Sign = Builder.buildAShr(
5974 Ty, LHS, Builder.buildConstant(ShiftAmtTy, BitWidth - 1));
5975
5976 // Add (LHS < 0) ? abs2 - 1 : 0;
5977 auto LSrl = Builder.buildLShr(Ty, Sign, Inexact);
5978 auto Add = Builder.buildAdd(Ty, LHS, LSrl);
5979 auto AShr = Builder.buildAShr(Ty, Add, C1);
5980
5981 // Special case: (sdiv X, 1) -> X
5982 // Special Case: (sdiv X, -1) -> 0-X
5983 auto One = Builder.buildConstant(Ty, 1);
5984 auto MinusOne = Builder.buildConstant(Ty, -1);
5985 auto IsOne = Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, One);
5986 auto IsMinusOne =
5987 Builder.buildICmp(CmpInst::Predicate::ICMP_EQ, CCVT, RHS, MinusOne);
5988 auto IsOneOrMinusOne = Builder.buildOr(CCVT, IsOne, IsMinusOne);
5989 AShr = Builder.buildSelect(Ty, IsOneOrMinusOne, LHS, AShr);
5990
5991 // If divided by a positive value, we're done. Otherwise, the result must be
5992 // negated.
5993 auto Neg = Builder.buildNeg(Ty, AShr);
5994 auto IsNeg = Builder.buildICmp(CmpInst::Predicate::ICMP_SLT, CCVT, RHS, Zero);
5995 Builder.buildSelect(MI.getOperand(0).getReg(), IsNeg, Neg, AShr);
5996 MI.eraseFromParent();
5997}
5998
6000 assert(MI.getOpcode() == TargetOpcode::G_UDIV && "Expected UDIV");
6001 auto &UDiv = cast<GenericMachineInstr>(MI);
6002 Register Dst = UDiv.getReg(0);
6003 Register LHS = UDiv.getReg(1);
6004 Register RHS = UDiv.getReg(2);
6005 LLT Ty = MRI.getType(Dst);
6007
6008 auto C1 = Builder.buildCTTZ(ShiftAmtTy, RHS);
6009 Builder.buildLShr(MI.getOperand(0).getReg(), LHS, C1);
6010 MI.eraseFromParent();
6011}
6012
6014 assert(MI.getOpcode() == TargetOpcode::G_UMULH);
6015 Register RHS = MI.getOperand(2).getReg();
6016 Register Dst = MI.getOperand(0).getReg();
6017 LLT Ty = MRI.getType(Dst);
6018 LLT RHSTy = MRI.getType(RHS);
6020 auto MatchPow2ExceptOne = [&](const Constant *C) {
6021 if (auto *CI = dyn_cast<ConstantInt>(C))
6022 return CI->getValue().isPowerOf2() && !CI->getValue().isOne();
6023 return false;
6024 };
6025 if (!matchUnaryPredicate(MRI, RHS, MatchPow2ExceptOne, false))
6026 return false;
6027 // We need to check both G_LSHR and G_CTLZ because the combine uses G_CTLZ to
6028 // get log base 2, and it is not always legal for on a target.
6029 return isLegalOrBeforeLegalizer({TargetOpcode::G_LSHR, {Ty, ShiftAmtTy}}) &&
6030 isLegalOrBeforeLegalizer({TargetOpcode::G_CTLZ, {RHSTy, RHSTy}});
6031}
6032
6034 Register LHS = MI.getOperand(1).getReg();
6035 Register RHS = MI.getOperand(2).getReg();
6036 Register Dst = MI.getOperand(0).getReg();
6037 LLT Ty = MRI.getType(Dst);
6039 unsigned NumEltBits = Ty.getScalarSizeInBits();
6040
6041 auto LogBase2 = buildLogBase2(RHS, Builder);
6042 auto ShiftAmt =
6043 Builder.buildSub(Ty, Builder.buildConstant(Ty, NumEltBits), LogBase2);
6044 auto Trunc = Builder.buildZExtOrTrunc(ShiftAmtTy, ShiftAmt);
6045 Builder.buildLShr(Dst, LHS, Trunc);
6046 MI.eraseFromParent();
6047}
6048
6050 Register &MatchInfo) const {
6051 Register Dst = MI.getOperand(0).getReg();
6052 Register Src = MI.getOperand(1).getReg();
6053 LLT DstTy = MRI.getType(Dst);
6054 LLT SrcTy = MRI.getType(Src);
6055 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6056 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6057 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6058
6060 {TargetOpcode::G_TRUNC_SSAT_S, {DstTy, SrcTy}}))
6061 return false;
6062
6063 APInt SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits);
6064 APInt SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits);
6065 return mi_match(Src, MRI,
6066 m_GSMin(m_GSMax(m_Reg(MatchInfo),
6067 m_SpecificICstOrSplat(SignedMin)),
6068 m_SpecificICstOrSplat(SignedMax))) ||
6069 mi_match(Src, MRI,
6070 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6071 m_SpecificICstOrSplat(SignedMax)),
6072 m_SpecificICstOrSplat(SignedMin)));
6073}
6074
6076 Register &MatchInfo) const {
6077 Register Dst = MI.getOperand(0).getReg();
6078 Builder.buildTruncSSatS(Dst, MatchInfo);
6079 MI.eraseFromParent();
6080}
6081
6083 Register &MatchInfo) const {
6084 Register Dst = MI.getOperand(0).getReg();
6085 Register Src = MI.getOperand(1).getReg();
6086 LLT DstTy = MRI.getType(Dst);
6087 LLT SrcTy = MRI.getType(Src);
6088 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6089 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6090 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6091
6093 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6094 return false;
6095 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6096 return mi_match(Src, MRI,
6098 m_SpecificICstOrSplat(UnsignedMax))) ||
6099 mi_match(Src, MRI,
6100 m_GSMax(m_GSMin(m_Reg(MatchInfo),
6101 m_SpecificICstOrSplat(UnsignedMax)),
6102 m_SpecificICstOrSplat(0))) ||
6103 mi_match(Src, MRI,
6105 m_SpecificICstOrSplat(UnsignedMax)));
6106}
6107
6109 Register &MatchInfo) const {
6110 Register Dst = MI.getOperand(0).getReg();
6111 Builder.buildTruncSSatU(Dst, MatchInfo);
6112 MI.eraseFromParent();
6113}
6114
6116 MachineInstr &MinMI) const {
6117 Register Min = MinMI.getOperand(2).getReg();
6118 Register Val = MinMI.getOperand(1).getReg();
6119 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6120 LLT SrcTy = MRI.getType(Val);
6121 unsigned NumDstBits = DstTy.getScalarSizeInBits();
6122 unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
6123 assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation");
6124
6126 {TargetOpcode::G_TRUNC_SSAT_U, {DstTy, SrcTy}}))
6127 return false;
6128 APInt UnsignedMax = APInt::getMaxValue(NumDstBits).zext(NumSrcBits);
6129 return mi_match(Min, MRI, m_SpecificICstOrSplat(UnsignedMax)) &&
6130 !mi_match(Val, MRI, m_GSMax(m_Reg(), m_Reg()));
6131}
6132
6134 MachineInstr &SrcMI) const {
6135 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6136 LLT SrcTy = MRI.getType(SrcMI.getOperand(1).getReg());
6137
6138 return LI &&
6139 isLegalOrBeforeLegalizer({TargetOpcode::G_FPTOUI_SAT, {DstTy, SrcTy}});
6140}
6141
6143 BuildFnTy &MatchInfo) const {
6144 unsigned Opc = MI.getOpcode();
6145 assert(Opc == TargetOpcode::G_FADD || Opc == TargetOpcode::G_FSUB ||
6146 Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6147 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA);
6148
6149 Register Dst = MI.getOperand(0).getReg();
6150 Register X = MI.getOperand(1).getReg();
6151 Register Y = MI.getOperand(2).getReg();
6152 LLT Type = MRI.getType(Dst);
6153
6154 // fold (fadd x, fneg(y)) -> (fsub x, y)
6155 // fold (fadd fneg(y), x) -> (fsub x, y)
6156 // G_ADD is commutative so both cases are checked by m_GFAdd
6157 if (mi_match(Dst, MRI, m_GFAdd(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6158 isLegalOrBeforeLegalizer({TargetOpcode::G_FSUB, {Type}})) {
6159 Opc = TargetOpcode::G_FSUB;
6160 }
6161 /// fold (fsub x, fneg(y)) -> (fadd x, y)
6162 else if (mi_match(Dst, MRI, m_GFSub(m_Reg(X), m_GFNeg(m_Reg(Y)))) &&
6163 isLegalOrBeforeLegalizer({TargetOpcode::G_FADD, {Type}})) {
6164 Opc = TargetOpcode::G_FADD;
6165 }
6166 // fold (fmul fneg(x), fneg(y)) -> (fmul x, y)
6167 // fold (fdiv fneg(x), fneg(y)) -> (fdiv x, y)
6168 // fold (fmad fneg(x), fneg(y), z) -> (fmad x, y, z)
6169 // fold (fma fneg(x), fneg(y), z) -> (fma x, y, z)
6170 else if ((Opc == TargetOpcode::G_FMUL || Opc == TargetOpcode::G_FDIV ||
6171 Opc == TargetOpcode::G_FMAD || Opc == TargetOpcode::G_FMA) &&
6172 mi_match(X, MRI, m_GFNeg(m_Reg(X))) &&
6173 mi_match(Y, MRI, m_GFNeg(m_Reg(Y)))) {
6174 // no opcode change
6175 } else
6176 return false;
6177
6178 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6179 Observer.changingInstr(MI);
6180 MI.setDesc(B.getTII().get(Opc));
6181 MI.getOperand(1).setReg(X);
6182 MI.getOperand(2).setReg(Y);
6183 Observer.changedInstr(MI);
6184 };
6185 return true;
6186}
6187
6189 Register &MatchInfo) const {
6190 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6191
6192 Register LHS = MI.getOperand(1).getReg();
6193 MatchInfo = MI.getOperand(2).getReg();
6194 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
6195
6196 const auto LHSCst = Ty.isVector()
6197 ? getFConstantSplat(LHS, MRI, /* allowUndef */ true)
6199 if (!LHSCst)
6200 return false;
6201
6202 // -0.0 is always allowed
6203 if (LHSCst->Value.isNegZero())
6204 return true;
6205
6206 // +0.0 is only allowed if nsz is set.
6207 if (LHSCst->Value.isPosZero())
6208 return MI.getFlag(MachineInstr::FmNsz);
6209
6210 return false;
6211}
6212
6214 Register &MatchInfo) const {
6215 Register Dst = MI.getOperand(0).getReg();
6216 Builder.buildFNeg(
6217 Dst, Builder.buildFCanonicalize(MRI.getType(Dst), MatchInfo).getReg(0));
6218 eraseInst(MI);
6219}
6220
6221/// Checks if \p MI is TargetOpcode::G_FMUL and contractable either
6222/// due to global flags or MachineInstr flags.
6223static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally) {
6224 if (MI.getOpcode() != TargetOpcode::G_FMUL)
6225 return false;
6226 return AllowFusionGlobally || MI.getFlag(MachineInstr::MIFlag::FmContract);
6227}
6228
6229static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1,
6230 const MachineRegisterInfo &MRI) {
6231 return std::distance(MRI.use_instr_nodbg_begin(MI0.getOperand(0).getReg()),
6232 MRI.use_instr_nodbg_end()) >
6233 std::distance(MRI.use_instr_nodbg_begin(MI1.getOperand(0).getReg()),
6234 MRI.use_instr_nodbg_end());
6235}
6236
6238 bool &AllowFusionGlobally,
6239 bool &HasFMAD, bool &Aggressive,
6240 bool CanReassociate) const {
6241
6242 auto *MF = MI.getMF();
6243 const auto &TLI = *MF->getSubtarget().getTargetLowering();
6244 const TargetOptions &Options = MF->getTarget().Options;
6245 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6246
6247 if (CanReassociate && !MI.getFlag(MachineInstr::MIFlag::FmReassoc))
6248 return false;
6249
6250 // Floating-point multiply-add with intermediate rounding.
6251 HasFMAD = (!isPreLegalize() && TLI.isFMADLegal(MI, DstType));
6252 // Floating-point multiply-add without intermediate rounding.
6253 bool HasFMA = TLI.isFMAFasterThanFMulAndFAdd(*MF, DstType) &&
6254 isLegalOrBeforeLegalizer({TargetOpcode::G_FMA, {DstType}});
6255 // No valid opcode, do not combine.
6256 if (!HasFMAD && !HasFMA)
6257 return false;
6258
6259 AllowFusionGlobally = Options.AllowFPOpFusion == FPOpFusion::Fast || HasFMAD;
6260 // If the addition is not contractable, do not combine.
6261 if (!AllowFusionGlobally && !MI.getFlag(MachineInstr::MIFlag::FmContract))
6262 return false;
6263
6264 Aggressive = TLI.enableAggressiveFMAFusion(DstType);
6265 return true;
6266}
6267
6270 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6271 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6272
6273 bool AllowFusionGlobally, HasFMAD, Aggressive;
6274 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6275 return false;
6276
6277 Register Op1 = MI.getOperand(1).getReg();
6278 Register Op2 = MI.getOperand(2).getReg();
6279 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6280 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6281 unsigned PreferredFusedOpcode =
6282 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6283
6284 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6285 // prefer to fold the multiply with fewer uses.
6286 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6287 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6288 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6289 std::swap(LHS, RHS);
6290 }
6291
6292 // fold (fadd (fmul x, y), z) -> (fma x, y, z)
6293 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6294 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg))) {
6295 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6296 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6297 {LHS.MI->getOperand(1).getReg(),
6298 LHS.MI->getOperand(2).getReg(), RHS.Reg});
6299 };
6300 return true;
6301 }
6302
6303 // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
6304 if (isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6305 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg))) {
6306 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6307 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6308 {RHS.MI->getOperand(1).getReg(),
6309 RHS.MI->getOperand(2).getReg(), LHS.Reg});
6310 };
6311 return true;
6312 }
6313
6314 return false;
6315}
6316
6319 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6320 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6321
6322 bool AllowFusionGlobally, HasFMAD, Aggressive;
6323 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6324 return false;
6325
6326 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6327 Register Op1 = MI.getOperand(1).getReg();
6328 Register Op2 = MI.getOperand(2).getReg();
6329 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6330 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6331 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6332
6333 unsigned PreferredFusedOpcode =
6334 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6335
6336 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6337 // prefer to fold the multiply with fewer uses.
6338 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6339 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6340 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6341 std::swap(LHS, RHS);
6342 }
6343
6344 // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
6345 MachineInstr *FpExtSrc;
6346 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6347 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6348 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6349 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6350 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6351 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6352 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6353 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6354 {FpExtX.getReg(0), FpExtY.getReg(0), RHS.Reg});
6355 };
6356 return true;
6357 }
6358
6359 // fold (fadd z, (fpext (fmul x, y))) -> (fma (fpext x), (fpext y), z)
6360 // Note: Commutes FADD operands.
6361 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FpExtSrc))) &&
6362 isContractableFMul(*FpExtSrc, AllowFusionGlobally) &&
6363 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6364 MRI.getType(FpExtSrc->getOperand(1).getReg()))) {
6365 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6366 auto FpExtX = B.buildFPExt(DstType, FpExtSrc->getOperand(1).getReg());
6367 auto FpExtY = B.buildFPExt(DstType, FpExtSrc->getOperand(2).getReg());
6368 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6369 {FpExtX.getReg(0), FpExtY.getReg(0), LHS.Reg});
6370 };
6371 return true;
6372 }
6373
6374 return false;
6375}
6376
6379 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6380 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6381
6382 bool AllowFusionGlobally, HasFMAD, Aggressive;
6383 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive, true))
6384 return false;
6385
6386 Register Op1 = MI.getOperand(1).getReg();
6387 Register Op2 = MI.getOperand(2).getReg();
6388 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6389 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6390 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6391
6392 unsigned PreferredFusedOpcode =
6393 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6394
6395 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6396 // prefer to fold the multiply with fewer uses.
6397 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6398 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6399 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6400 std::swap(LHS, RHS);
6401 }
6402
6403 MachineInstr *FMA = nullptr;
6404 Register Z;
6405 // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z))
6406 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6407 (MRI.getVRegDef(LHS.MI->getOperand(3).getReg())->getOpcode() ==
6408 TargetOpcode::G_FMUL) &&
6409 MRI.hasOneNonDBGUse(LHS.MI->getOperand(0).getReg()) &&
6410 MRI.hasOneNonDBGUse(LHS.MI->getOperand(3).getReg())) {
6411 FMA = LHS.MI;
6412 Z = RHS.Reg;
6413 }
6414 // fold (fadd z, (fma x, y, (fmul u, v))) -> (fma x, y, (fma u, v, z))
6415 else if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6416 (MRI.getVRegDef(RHS.MI->getOperand(3).getReg())->getOpcode() ==
6417 TargetOpcode::G_FMUL) &&
6418 MRI.hasOneNonDBGUse(RHS.MI->getOperand(0).getReg()) &&
6419 MRI.hasOneNonDBGUse(RHS.MI->getOperand(3).getReg())) {
6420 Z = LHS.Reg;
6421 FMA = RHS.MI;
6422 }
6423
6424 if (FMA) {
6425 MachineInstr *FMulMI = MRI.getVRegDef(FMA->getOperand(3).getReg());
6426 Register X = FMA->getOperand(1).getReg();
6427 Register Y = FMA->getOperand(2).getReg();
6428 Register U = FMulMI->getOperand(1).getReg();
6429 Register V = FMulMI->getOperand(2).getReg();
6430
6431 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6432 Register InnerFMA = MRI.createGenericVirtualRegister(DstTy);
6433 B.buildInstr(PreferredFusedOpcode, {InnerFMA}, {U, V, Z});
6434 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6435 {X, Y, InnerFMA});
6436 };
6437 return true;
6438 }
6439
6440 return false;
6441}
6442
6445 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6446 assert(MI.getOpcode() == TargetOpcode::G_FADD);
6447
6448 bool AllowFusionGlobally, HasFMAD, Aggressive;
6449 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6450 return false;
6451
6452 if (!Aggressive)
6453 return false;
6454
6455 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6456 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
6457 Register Op1 = MI.getOperand(1).getReg();
6458 Register Op2 = MI.getOperand(2).getReg();
6459 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6460 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6461
6462 unsigned PreferredFusedOpcode =
6463 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6464
6465 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6466 // prefer to fold the multiply with fewer uses.
6467 if (Aggressive && isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6468 isContractableFMul(*RHS.MI, AllowFusionGlobally)) {
6469 if (hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6470 std::swap(LHS, RHS);
6471 }
6472
6473 // Builds: (fma x, y, (fma (fpext u), (fpext v), z))
6474 auto buildMatchInfo = [=, &MI](Register U, Register V, Register Z, Register X,
6476 Register FpExtU = B.buildFPExt(DstType, U).getReg(0);
6477 Register FpExtV = B.buildFPExt(DstType, V).getReg(0);
6478 Register InnerFMA =
6479 B.buildInstr(PreferredFusedOpcode, {DstType}, {FpExtU, FpExtV, Z})
6480 .getReg(0);
6481 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6482 {X, Y, InnerFMA});
6483 };
6484
6485 MachineInstr *FMulMI, *FMAMI;
6486 // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
6487 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6488 if (LHS.MI->getOpcode() == PreferredFusedOpcode &&
6489 mi_match(LHS.MI->getOperand(3).getReg(), MRI,
6490 m_GFPExt(m_MInstr(FMulMI))) &&
6491 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6492 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6493 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6494 MatchInfo = [=](MachineIRBuilder &B) {
6495 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6496 FMulMI->getOperand(2).getReg(), RHS.Reg,
6497 LHS.MI->getOperand(1).getReg(),
6498 LHS.MI->getOperand(2).getReg(), B);
6499 };
6500 return true;
6501 }
6502
6503 // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
6504 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6505 // FIXME: This turns two single-precision and one double-precision
6506 // operation into two double-precision operations, which might not be
6507 // interesting for all targets, especially GPUs.
6508 if (mi_match(LHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6509 FMAMI->getOpcode() == PreferredFusedOpcode) {
6510 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6511 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6512 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6513 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6514 MatchInfo = [=](MachineIRBuilder &B) {
6515 Register X = FMAMI->getOperand(1).getReg();
6516 Register Y = FMAMI->getOperand(2).getReg();
6517 X = B.buildFPExt(DstType, X).getReg(0);
6518 Y = B.buildFPExt(DstType, Y).getReg(0);
6519 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6520 FMulMI->getOperand(2).getReg(), RHS.Reg, X, Y, B);
6521 };
6522
6523 return true;
6524 }
6525 }
6526
6527 // fold (fadd z, (fma x, y, (fpext (fmul u, v)))
6528 // -> (fma x, y, (fma (fpext u), (fpext v), z))
6529 if (RHS.MI->getOpcode() == PreferredFusedOpcode &&
6530 mi_match(RHS.MI->getOperand(3).getReg(), MRI,
6531 m_GFPExt(m_MInstr(FMulMI))) &&
6532 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6533 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6534 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6535 MatchInfo = [=](MachineIRBuilder &B) {
6536 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6537 FMulMI->getOperand(2).getReg(), LHS.Reg,
6538 RHS.MI->getOperand(1).getReg(),
6539 RHS.MI->getOperand(2).getReg(), B);
6540 };
6541 return true;
6542 }
6543
6544 // fold (fadd z, (fpext (fma x, y, (fmul u, v)))
6545 // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
6546 // FIXME: This turns two single-precision and one double-precision
6547 // operation into two double-precision operations, which might not be
6548 // interesting for all targets, especially GPUs.
6549 if (mi_match(RHS.Reg, MRI, m_GFPExt(m_MInstr(FMAMI))) &&
6550 FMAMI->getOpcode() == PreferredFusedOpcode) {
6551 MachineInstr *FMulMI = MRI.getVRegDef(FMAMI->getOperand(3).getReg());
6552 if (isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6553 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstType,
6554 MRI.getType(FMAMI->getOperand(0).getReg()))) {
6555 MatchInfo = [=](MachineIRBuilder &B) {
6556 Register X = FMAMI->getOperand(1).getReg();
6557 Register Y = FMAMI->getOperand(2).getReg();
6558 X = B.buildFPExt(DstType, X).getReg(0);
6559 Y = B.buildFPExt(DstType, Y).getReg(0);
6560 buildMatchInfo(FMulMI->getOperand(1).getReg(),
6561 FMulMI->getOperand(2).getReg(), LHS.Reg, X, Y, B);
6562 };
6563 return true;
6564 }
6565 }
6566
6567 return false;
6568}
6569
6572 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6573 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6574
6575 bool AllowFusionGlobally, HasFMAD, Aggressive;
6576 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6577 return false;
6578
6579 Register Op1 = MI.getOperand(1).getReg();
6580 Register Op2 = MI.getOperand(2).getReg();
6581 DefinitionAndSourceRegister LHS = {MRI.getVRegDef(Op1), Op1};
6582 DefinitionAndSourceRegister RHS = {MRI.getVRegDef(Op2), Op2};
6583 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6584
6585 // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
6586 // prefer to fold the multiply with fewer uses.
6587 int FirstMulHasFewerUses = true;
6588 if (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6589 isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6590 hasMoreUses(*LHS.MI, *RHS.MI, MRI))
6591 FirstMulHasFewerUses = false;
6592
6593 unsigned PreferredFusedOpcode =
6594 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6595
6596 // fold (fsub (fmul x, y), z) -> (fma x, y, -z)
6597 if (FirstMulHasFewerUses &&
6598 (isContractableFMul(*LHS.MI, AllowFusionGlobally) &&
6599 (Aggressive || MRI.hasOneNonDBGUse(LHS.Reg)))) {
6600 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6601 Register NegZ = B.buildFNeg(DstTy, RHS.Reg).getReg(0);
6602 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6603 {LHS.MI->getOperand(1).getReg(),
6604 LHS.MI->getOperand(2).getReg(), NegZ});
6605 };
6606 return true;
6607 }
6608 // fold (fsub x, (fmul y, z)) -> (fma -y, z, x)
6609 else if ((isContractableFMul(*RHS.MI, AllowFusionGlobally) &&
6610 (Aggressive || MRI.hasOneNonDBGUse(RHS.Reg)))) {
6611 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6612 Register NegY =
6613 B.buildFNeg(DstTy, RHS.MI->getOperand(1).getReg()).getReg(0);
6614 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6615 {NegY, RHS.MI->getOperand(2).getReg(), LHS.Reg});
6616 };
6617 return true;
6618 }
6619
6620 return false;
6621}
6622
6625 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6626 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6627
6628 bool AllowFusionGlobally, HasFMAD, Aggressive;
6629 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6630 return false;
6631
6632 Register LHSReg = MI.getOperand(1).getReg();
6633 Register RHSReg = MI.getOperand(2).getReg();
6634 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6635
6636 unsigned PreferredFusedOpcode =
6637 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6638
6639 MachineInstr *FMulMI;
6640 // fold (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z))
6641 if (mi_match(LHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6642 (Aggressive || (MRI.hasOneNonDBGUse(LHSReg) &&
6643 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6644 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6645 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6646 Register NegX =
6647 B.buildFNeg(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6648 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6649 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6650 {NegX, FMulMI->getOperand(2).getReg(), NegZ});
6651 };
6652 return true;
6653 }
6654
6655 // fold (fsub x, (fneg (fmul, y, z))) -> (fma y, z, x)
6656 if (mi_match(RHSReg, MRI, m_GFNeg(m_MInstr(FMulMI))) &&
6657 (Aggressive || (MRI.hasOneNonDBGUse(RHSReg) &&
6658 MRI.hasOneNonDBGUse(FMulMI->getOperand(0).getReg()))) &&
6659 isContractableFMul(*FMulMI, AllowFusionGlobally)) {
6660 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6661 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6662 {FMulMI->getOperand(1).getReg(),
6663 FMulMI->getOperand(2).getReg(), LHSReg});
6664 };
6665 return true;
6666 }
6667
6668 return false;
6669}
6670
6673 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6674 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6675
6676 bool AllowFusionGlobally, HasFMAD, Aggressive;
6677 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6678 return false;
6679
6680 Register LHSReg = MI.getOperand(1).getReg();
6681 Register RHSReg = MI.getOperand(2).getReg();
6682 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6683
6684 unsigned PreferredFusedOpcode =
6685 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6686
6687 MachineInstr *FMulMI;
6688 // fold (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z))
6689 if (mi_match(LHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6690 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6691 (Aggressive || MRI.hasOneNonDBGUse(LHSReg))) {
6692 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6693 Register FpExtX =
6694 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6695 Register FpExtY =
6696 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6697 Register NegZ = B.buildFNeg(DstTy, RHSReg).getReg(0);
6698 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6699 {FpExtX, FpExtY, NegZ});
6700 };
6701 return true;
6702 }
6703
6704 // fold (fsub x, (fpext (fmul y, z))) -> (fma (fneg (fpext y)), (fpext z), x)
6705 if (mi_match(RHSReg, MRI, m_GFPExt(m_MInstr(FMulMI))) &&
6706 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6707 (Aggressive || MRI.hasOneNonDBGUse(RHSReg))) {
6708 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6709 Register FpExtY =
6710 B.buildFPExt(DstTy, FMulMI->getOperand(1).getReg()).getReg(0);
6711 Register NegY = B.buildFNeg(DstTy, FpExtY).getReg(0);
6712 Register FpExtZ =
6713 B.buildFPExt(DstTy, FMulMI->getOperand(2).getReg()).getReg(0);
6714 B.buildInstr(PreferredFusedOpcode, {MI.getOperand(0).getReg()},
6715 {NegY, FpExtZ, LHSReg});
6716 };
6717 return true;
6718 }
6719
6720 return false;
6721}
6722
6725 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
6726 assert(MI.getOpcode() == TargetOpcode::G_FSUB);
6727
6728 bool AllowFusionGlobally, HasFMAD, Aggressive;
6729 if (!canCombineFMadOrFMA(MI, AllowFusionGlobally, HasFMAD, Aggressive))
6730 return false;
6731
6732 const auto &TLI = *MI.getMF()->getSubtarget().getTargetLowering();
6733 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6734 Register LHSReg = MI.getOperand(1).getReg();
6735 Register RHSReg = MI.getOperand(2).getReg();
6736
6737 unsigned PreferredFusedOpcode =
6738 HasFMAD ? TargetOpcode::G_FMAD : TargetOpcode::G_FMA;
6739
6740 auto buildMatchInfo = [=](Register Dst, Register X, Register Y, Register Z,
6742 Register FpExtX = B.buildFPExt(DstTy, X).getReg(0);
6743 Register FpExtY = B.buildFPExt(DstTy, Y).getReg(0);
6744 B.buildInstr(PreferredFusedOpcode, {Dst}, {FpExtX, FpExtY, Z});
6745 };
6746
6747 MachineInstr *FMulMI;
6748 // fold (fsub (fpext (fneg (fmul x, y))), z) ->
6749 // (fneg (fma (fpext x), (fpext y), z))
6750 // fold (fsub (fneg (fpext (fmul x, y))), z) ->
6751 // (fneg (fma (fpext x), (fpext y), z))
6752 if ((mi_match(LHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6753 mi_match(LHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6754 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6755 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6756 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6757 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6758 Register FMAReg = MRI.createGenericVirtualRegister(DstTy);
6759 buildMatchInfo(FMAReg, FMulMI->getOperand(1).getReg(),
6760 FMulMI->getOperand(2).getReg(), RHSReg, B);
6761 B.buildFNeg(MI.getOperand(0).getReg(), FMAReg);
6762 };
6763 return true;
6764 }
6765
6766 // fold (fsub x, (fpext (fneg (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6767 // fold (fsub x, (fneg (fpext (fmul y, z)))) -> (fma (fpext y), (fpext z), x)
6768 if ((mi_match(RHSReg, MRI, m_GFPExt(m_GFNeg(m_MInstr(FMulMI)))) ||
6769 mi_match(RHSReg, MRI, m_GFNeg(m_GFPExt(m_MInstr(FMulMI))))) &&
6770 isContractableFMul(*FMulMI, AllowFusionGlobally) &&
6771 TLI.isFPExtFoldable(MI, PreferredFusedOpcode, DstTy,
6772 MRI.getType(FMulMI->getOperand(0).getReg()))) {
6773 MatchInfo = [=, &MI](MachineIRBuilder &B) {
6774 buildMatchInfo(MI.getOperand(0).getReg(), FMulMI->getOperand(1).getReg(),
6775 FMulMI->getOperand(2).getReg(), LHSReg, B);
6776 };
6777 return true;
6778 }
6779
6780 return false;
6781}
6782
6784 unsigned &IdxToPropagate) const {
6785 bool PropagateNaN;
6786 switch (MI.getOpcode()) {
6787 default:
6788 return false;
6789 case TargetOpcode::G_FMINNUM:
6790 case TargetOpcode::G_FMAXNUM:
6791 PropagateNaN = false;
6792 break;
6793 case TargetOpcode::G_FMINIMUM:
6794 case TargetOpcode::G_FMAXIMUM:
6795 PropagateNaN = true;
6796 break;
6797 }
6798
6799 auto MatchNaN = [&](unsigned Idx) {
6800 Register MaybeNaNReg = MI.getOperand(Idx).getReg();
6801 const ConstantFP *MaybeCst = getConstantFPVRegVal(MaybeNaNReg, MRI);
6802 if (!MaybeCst || !MaybeCst->getValueAPF().isNaN())
6803 return false;
6804 IdxToPropagate = PropagateNaN ? Idx : (Idx == 1 ? 2 : 1);
6805 return true;
6806 };
6807
6808 return MatchNaN(1) || MatchNaN(2);
6809}
6810
6811// Combine multiple FDIVs with the same divisor into multiple FMULs by the
6812// reciprocal.
6813// E.g., (a / Y; b / Y;) -> (recip = 1.0 / Y; a * recip; b * recip)
6815 MachineInstr &MI, SmallVector<MachineInstr *> &MatchInfo) const {
6816 assert(MI.getOpcode() == TargetOpcode::G_FDIV);
6817
6818 Register X = MI.getOperand(1).getReg();
6819 Register Y = MI.getOperand(2).getReg();
6820
6821 if (!MI.getFlag(MachineInstr::MIFlag::FmArcp))
6822 return false;
6823
6824 // Skip if current node is a reciprocal/fneg-reciprocal.
6825 auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI);
6826 if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
6827 return false;
6828
6829 // Exit early if the target does not want this transform or if there can't
6830 // possibly be enough uses of the divisor to make the transform worthwhile.
6831 unsigned MinUses = getTargetLowering().combineRepeatedFPDivisors();
6832 if (!MinUses)
6833 return false;
6834
6835 // Find all FDIV users of the same divisor. For the moment we limit all
6836 // instructions to a single BB and use the first Instr in MatchInfo as the
6837 // dominating position.
6838 MatchInfo.push_back(&MI);
6839 for (auto &U : MRI.use_nodbg_instructions(Y)) {
6840 if (&U == &MI || U.getParent() != MI.getParent())
6841 continue;
6842 if (U.getOpcode() == TargetOpcode::G_FDIV &&
6843 U.getOperand(2).getReg() == Y && U.getOperand(1).getReg() != Y) {
6844 // This division is eligible for optimization only if global unsafe math
6845 // is enabled or if this division allows reciprocal formation.
6846 if (U.getFlag(MachineInstr::MIFlag::FmArcp)) {
6847 MatchInfo.push_back(&U);
6848 if (dominates(U, *MatchInfo[0]))
6849 std::swap(MatchInfo[0], MatchInfo.back());
6850 }
6851 }
6852 }
6853
6854 // Now that we have the actual number of divisor uses, make sure it meets
6855 // the minimum threshold specified by the target.
6856 return MatchInfo.size() >= MinUses;
6857}
6858
6860 SmallVector<MachineInstr *> &MatchInfo) const {
6861 // Generate the new div at the position of the first instruction, that we have
6862 // ensured will dominate all other instructions.
6863 Builder.setInsertPt(*MatchInfo[0]->getParent(), MatchInfo[0]);
6864 LLT Ty = MRI.getType(MatchInfo[0]->getOperand(0).getReg());
6865 auto Div = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0),
6866 MatchInfo[0]->getOperand(2).getReg(),
6867 MatchInfo[0]->getFlags());
6868
6869 // Replace all found div's with fmul instructions.
6870 for (MachineInstr *MI : MatchInfo) {
6871 Builder.setInsertPt(*MI->getParent(), MI);
6872 Builder.buildFMul(MI->getOperand(0).getReg(), MI->getOperand(1).getReg(),
6873 Div->getOperand(0).getReg(), MI->getFlags());
6874 MI->eraseFromParent();
6875 }
6876}
6877
6879 assert(MI.getOpcode() == TargetOpcode::G_ADD && "Expected a G_ADD");
6880 Register LHS = MI.getOperand(1).getReg();
6881 Register RHS = MI.getOperand(2).getReg();
6882
6883 // Helper lambda to check for opportunities for
6884 // A + (B - A) -> B
6885 // (B - A) + A -> B
6886 auto CheckFold = [&](Register MaybeSub, Register MaybeSameReg) {
6887 Register Reg;
6888 return mi_match(MaybeSub, MRI, m_GSub(m_Reg(Src), m_Reg(Reg))) &&
6889 Reg == MaybeSameReg;
6890 };
6891 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
6892}
6893
6895 Register &MatchInfo) const {
6896 // This combine folds the following patterns:
6897 //
6898 // G_BUILD_VECTOR_TRUNC (G_BITCAST(x), G_LSHR(G_BITCAST(x), k))
6899 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), G_TRUNC(G_LSHR(G_BITCAST(x), k)))
6900 // into
6901 // x
6902 // if
6903 // k == sizeof(VecEltTy)/2
6904 // type(x) == type(dst)
6905 //
6906 // G_BUILD_VECTOR(G_TRUNC(G_BITCAST(x)), undef)
6907 // into
6908 // x
6909 // if
6910 // type(x) == type(dst)
6911
6912 LLT DstVecTy = MRI.getType(MI.getOperand(0).getReg());
6913 LLT DstEltTy = DstVecTy.getElementType();
6914
6915 Register Lo, Hi;
6916
6917 if (mi_match(
6918 MI, MRI,
6920 MatchInfo = Lo;
6921 return MRI.getType(MatchInfo) == DstVecTy;
6922 }
6923
6924 std::optional<ValueAndVReg> ShiftAmount;
6925 const auto LoPattern = m_GBitcast(m_Reg(Lo));
6926 const auto HiPattern = m_GLShr(m_GBitcast(m_Reg(Hi)), m_GCst(ShiftAmount));
6927 if (mi_match(
6928 MI, MRI,
6929 m_any_of(m_GBuildVectorTrunc(LoPattern, HiPattern),
6930 m_GBuildVector(m_GTrunc(LoPattern), m_GTrunc(HiPattern))))) {
6931 if (Lo == Hi && ShiftAmount->Value == DstEltTy.getSizeInBits()) {
6932 MatchInfo = Lo;
6933 return MRI.getType(MatchInfo) == DstVecTy;
6934 }
6935 }
6936
6937 return false;
6938}
6939
6941 Register &MatchInfo) const {
6942 // Replace (G_TRUNC (G_BITCAST (G_BUILD_VECTOR x, y)) with just x
6943 // if type(x) == type(G_TRUNC)
6944 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6945 m_GBitcast(m_GBuildVector(m_Reg(MatchInfo), m_Reg()))))
6946 return false;
6947
6948 return MRI.getType(MatchInfo) == MRI.getType(MI.getOperand(0).getReg());
6949}
6950
6952 Register &MatchInfo) const {
6953 // Replace (G_TRUNC (G_LSHR (G_BITCAST (G_BUILD_VECTOR x, y)), K)) with
6954 // y if K == size of vector element type
6955 std::optional<ValueAndVReg> ShiftAmt;
6956 if (!mi_match(MI.getOperand(1).getReg(), MRI,
6958 m_GCst(ShiftAmt))))
6959 return false;
6960
6961 LLT MatchTy = MRI.getType(MatchInfo);
6962 return ShiftAmt->Value.getZExtValue() == MatchTy.getSizeInBits() &&
6963 MatchTy == MRI.getType(MI.getOperand(0).getReg());
6964}
6965
6966unsigned CombinerHelper::getFPMinMaxOpcForSelect(
6967 CmpInst::Predicate Pred, LLT DstTy,
6968 SelectPatternNaNBehaviour VsNaNRetVal) const {
6969 assert(VsNaNRetVal != SelectPatternNaNBehaviour::NOT_APPLICABLE &&
6970 "Expected a NaN behaviour?");
6971 // Choose an opcode based off of legality or the behaviour when one of the
6972 // LHS/RHS may be NaN.
6973 switch (Pred) {
6974 default:
6975 return 0;
6976 case CmpInst::FCMP_UGT:
6977 case CmpInst::FCMP_UGE:
6978 case CmpInst::FCMP_OGT:
6979 case CmpInst::FCMP_OGE:
6980 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6981 return TargetOpcode::G_FMAXNUM;
6982 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6983 return TargetOpcode::G_FMAXIMUM;
6984 if (isLegal({TargetOpcode::G_FMAXNUM, {DstTy}}))
6985 return TargetOpcode::G_FMAXNUM;
6986 if (isLegal({TargetOpcode::G_FMAXIMUM, {DstTy}}))
6987 return TargetOpcode::G_FMAXIMUM;
6988 return 0;
6989 case CmpInst::FCMP_ULT:
6990 case CmpInst::FCMP_ULE:
6991 case CmpInst::FCMP_OLT:
6992 case CmpInst::FCMP_OLE:
6993 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_OTHER)
6994 return TargetOpcode::G_FMINNUM;
6995 if (VsNaNRetVal == SelectPatternNaNBehaviour::RETURNS_NAN)
6996 return TargetOpcode::G_FMINIMUM;
6997 if (isLegal({TargetOpcode::G_FMINNUM, {DstTy}}))
6998 return TargetOpcode::G_FMINNUM;
6999 if (!isLegal({TargetOpcode::G_FMINIMUM, {DstTy}}))
7000 return 0;
7001 return TargetOpcode::G_FMINIMUM;
7002 }
7003}
7004
7005CombinerHelper::SelectPatternNaNBehaviour
7006CombinerHelper::computeRetValAgainstNaN(Register LHS, Register RHS,
7007 bool IsOrderedComparison) const {
7008 bool LHSSafe = isKnownNeverNaN(LHS, MRI);
7009 bool RHSSafe = isKnownNeverNaN(RHS, MRI);
7010 // Completely unsafe.
7011 if (!LHSSafe && !RHSSafe)
7012 return SelectPatternNaNBehaviour::NOT_APPLICABLE;
7013 if (LHSSafe && RHSSafe)
7014 return SelectPatternNaNBehaviour::RETURNS_ANY;
7015 // An ordered comparison will return false when given a NaN, so it
7016 // returns the RHS.
7017 if (IsOrderedComparison)
7018 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_NAN
7019 : SelectPatternNaNBehaviour::RETURNS_OTHER;
7020 // An unordered comparison will return true when given a NaN, so it
7021 // returns the LHS.
7022 return LHSSafe ? SelectPatternNaNBehaviour::RETURNS_OTHER
7023 : SelectPatternNaNBehaviour::RETURNS_NAN;
7024}
7025
7026bool CombinerHelper::matchFPSelectToMinMax(Register Dst, Register Cond,
7027 Register TrueVal, Register FalseVal,
7028 BuildFnTy &MatchInfo) const {
7029 // Match: select (fcmp cond x, y) x, y
7030 // select (fcmp cond x, y) y, x
7031 // And turn it into fminnum/fmaxnum or fmin/fmax based off of the condition.
7032 LLT DstTy = MRI.getType(Dst);
7033 // Bail out early on pointers, since we'll never want to fold to a min/max.
7034 if (DstTy.isPointer())
7035 return false;
7036 // Match a floating point compare with a less-than/greater-than predicate.
7037 // TODO: Allow multiple users of the compare if they are all selects.
7038 CmpInst::Predicate Pred;
7039 Register CmpLHS, CmpRHS;
7040 if (!mi_match(Cond, MRI,
7042 m_GFCmp(m_Pred(Pred), m_Reg(CmpLHS), m_Reg(CmpRHS)))) ||
7043 CmpInst::isEquality(Pred))
7044 return false;
7045 SelectPatternNaNBehaviour ResWithKnownNaNInfo =
7046 computeRetValAgainstNaN(CmpLHS, CmpRHS, CmpInst::isOrdered(Pred));
7047 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::NOT_APPLICABLE)
7048 return false;
7049 if (TrueVal == CmpRHS && FalseVal == CmpLHS) {
7050 std::swap(CmpLHS, CmpRHS);
7051 Pred = CmpInst::getSwappedPredicate(Pred);
7052 if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_NAN)
7053 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_OTHER;
7054 else if (ResWithKnownNaNInfo == SelectPatternNaNBehaviour::RETURNS_OTHER)
7055 ResWithKnownNaNInfo = SelectPatternNaNBehaviour::RETURNS_NAN;
7056 }
7057 if (TrueVal != CmpLHS || FalseVal != CmpRHS)
7058 return false;
7059 // Decide what type of max/min this should be based off of the predicate.
7060 unsigned Opc = getFPMinMaxOpcForSelect(Pred, DstTy, ResWithKnownNaNInfo);
7061 if (!Opc || !isLegal({Opc, {DstTy}}))
7062 return false;
7063 // Comparisons between signed zero and zero may have different results...
7064 // unless we have fmaximum/fminimum. In that case, we know -0 < 0.
7065 if (Opc != TargetOpcode::G_FMAXIMUM && Opc != TargetOpcode::G_FMINIMUM) {
7066 // We don't know if a comparison between two 0s will give us a consistent
7067 // result. Be conservative and only proceed if at least one side is
7068 // non-zero.
7069 auto KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpLHS, MRI);
7070 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero()) {
7071 KnownNonZeroSide = getFConstantVRegValWithLookThrough(CmpRHS, MRI);
7072 if (!KnownNonZeroSide || !KnownNonZeroSide->Value.isNonZero())
7073 return false;
7074 }
7075 }
7076 MatchInfo = [=](MachineIRBuilder &B) {
7077 B.buildInstr(Opc, {Dst}, {CmpLHS, CmpRHS});
7078 };
7079 return true;
7080}
7081
7083 BuildFnTy &MatchInfo) const {
7084 // TODO: Handle integer cases.
7085 assert(MI.getOpcode() == TargetOpcode::G_SELECT);
7086 // Condition may be fed by a truncated compare.
7087 Register Cond = MI.getOperand(1).getReg();
7088 Register MaybeTrunc;
7089 if (mi_match(Cond, MRI, m_OneNonDBGUse(m_GTrunc(m_Reg(MaybeTrunc)))))
7090 Cond = MaybeTrunc;
7091 Register Dst = MI.getOperand(0).getReg();
7092 Register TrueVal = MI.getOperand(2).getReg();
7093 Register FalseVal = MI.getOperand(3).getReg();
7094 return matchFPSelectToMinMax(Dst, Cond, TrueVal, FalseVal, MatchInfo);
7095}
7096
7098 BuildFnTy &MatchInfo) const {
7099 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
7100 // (X + Y) == X --> Y == 0
7101 // (X + Y) != X --> Y != 0
7102 // (X - Y) == X --> Y == 0
7103 // (X - Y) != X --> Y != 0
7104 // (X ^ Y) == X --> Y == 0
7105 // (X ^ Y) != X --> Y != 0
7106 Register Dst = MI.getOperand(0).getReg();
7107 CmpInst::Predicate Pred;
7108 Register X, Y, OpLHS, OpRHS;
7109 bool MatchedSub = mi_match(
7110 Dst, MRI,
7111 m_c_GICmp(m_Pred(Pred), m_Reg(X), m_GSub(m_Reg(OpLHS), m_Reg(Y))));
7112 if (MatchedSub && X != OpLHS)
7113 return false;
7114 if (!MatchedSub) {
7115 if (!mi_match(Dst, MRI,
7116 m_c_GICmp(m_Pred(Pred), m_Reg(X),
7117 m_any_of(m_GAdd(m_Reg(OpLHS), m_Reg(OpRHS)),
7118 m_GXor(m_Reg(OpLHS), m_Reg(OpRHS))))))
7119 return false;
7120 Y = X == OpLHS ? OpRHS : X == OpRHS ? OpLHS : Register();
7121 }
7122 MatchInfo = [=](MachineIRBuilder &B) {
7123 auto Zero = B.buildConstant(MRI.getType(Y), 0);
7124 B.buildICmp(Pred, Dst, Y, Zero);
7125 };
7126 return CmpInst::isEquality(Pred) && Y.isValid();
7127}
7128
7129/// Return the minimum useless shift amount that results in complete loss of the
7130/// source value. Return std::nullopt when it cannot determine a value.
7131static std::optional<unsigned>
7132getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
7133 std::optional<int64_t> &Result) {
7134 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
7135 Opcode == TargetOpcode::G_ASHR) &&
7136 "Expect G_SHL, G_LSHR or G_ASHR.");
7137 auto SignificantBits = 0;
7138 switch (Opcode) {
7139 case TargetOpcode::G_SHL:
7140 SignificantBits = ValueKB.countMinTrailingZeros();
7141 Result = 0;
7142 break;
7143 case TargetOpcode::G_LSHR:
7144 Result = 0;
7145 SignificantBits = ValueKB.countMinLeadingZeros();
7146 break;
7147 case TargetOpcode::G_ASHR:
7148 if (ValueKB.isNonNegative()) {
7149 SignificantBits = ValueKB.countMinLeadingZeros();
7150 Result = 0;
7151 } else if (ValueKB.isNegative()) {
7152 SignificantBits = ValueKB.countMinLeadingOnes();
7153 Result = -1;
7154 } else {
7155 // Cannot determine shift result.
7156 Result = std::nullopt;
7157 }
7158 break;
7159 default:
7160 break;
7161 }
7162 return ValueKB.getBitWidth() - SignificantBits;
7163}
7164
7166 MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
7167 Register ShiftVal = MI.getOperand(1).getReg();
7168 Register ShiftReg = MI.getOperand(2).getReg();
7169 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
7170 auto IsShiftTooBig = [&](const Constant *C) {
7171 auto *CI = dyn_cast<ConstantInt>(C);
7172 if (!CI)
7173 return false;
7174 if (CI->uge(ResTy.getScalarSizeInBits())) {
7175 MatchInfo = std::nullopt;
7176 return true;
7177 }
7178 auto OptMaxUsefulShift = getMinUselessShift(VT->getKnownBits(ShiftVal),
7179 MI.getOpcode(), MatchInfo);
7180 return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
7181 };
7182 return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
7183}
7184
7186 unsigned LHSOpndIdx = 1;
7187 unsigned RHSOpndIdx = 2;
7188 switch (MI.getOpcode()) {
7189 case TargetOpcode::G_UADDO:
7190 case TargetOpcode::G_SADDO:
7191 case TargetOpcode::G_UMULO:
7192 case TargetOpcode::G_SMULO:
7193 LHSOpndIdx = 2;
7194 RHSOpndIdx = 3;
7195 break;
7196 default:
7197 break;
7198 }
7199 Register LHS = MI.getOperand(LHSOpndIdx).getReg();
7200 Register RHS = MI.getOperand(RHSOpndIdx).getReg();
7201 if (!getIConstantVRegVal(LHS, MRI)) {
7202 // Skip commuting if LHS is not a constant. But, LHS may be a
7203 // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already
7204 // have a constant on the RHS.
7205 if (MRI.getVRegDef(LHS)->getOpcode() !=
7206 TargetOpcode::G_CONSTANT_FOLD_BARRIER)
7207 return false;
7208 }
7209 // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER.
7210 return MRI.getVRegDef(RHS)->getOpcode() !=
7211 TargetOpcode::G_CONSTANT_FOLD_BARRIER &&
7212 !getIConstantVRegVal(RHS, MRI);
7213}
7214
7216 Register LHS = MI.getOperand(1).getReg();
7217 Register RHS = MI.getOperand(2).getReg();
7218 std::optional<FPValueAndVReg> ValAndVReg;
7219 if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
7220 return false;
7221 return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
7222}
7223
7225 Observer.changingInstr(MI);
7226 unsigned LHSOpndIdx = 1;
7227 unsigned RHSOpndIdx = 2;
7228 switch (MI.getOpcode()) {
7229 case TargetOpcode::G_UADDO:
7230 case TargetOpcode::G_SADDO:
7231 case TargetOpcode::G_UMULO:
7232 case TargetOpcode::G_SMULO:
7233 LHSOpndIdx = 2;
7234 RHSOpndIdx = 3;
7235 break;
7236 default:
7237 break;
7238 }
7239 Register LHSReg = MI.getOperand(LHSOpndIdx).getReg();
7240 Register RHSReg = MI.getOperand(RHSOpndIdx).getReg();
7241 MI.getOperand(LHSOpndIdx).setReg(RHSReg);
7242 MI.getOperand(RHSOpndIdx).setReg(LHSReg);
7243 Observer.changedInstr(MI);
7244}
7245
7246bool CombinerHelper::isOneOrOneSplat(Register Src, bool AllowUndefs) const {
7247 LLT SrcTy = MRI.getType(Src);
7248 if (SrcTy.isFixedVector())
7249 return isConstantSplatVector(Src, 1, AllowUndefs);
7250 if (SrcTy.isScalar()) {
7251 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7252 return true;
7253 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7254 return IConstant && IConstant->Value == 1;
7255 }
7256 return false; // scalable vector
7257}
7258
7259bool CombinerHelper::isZeroOrZeroSplat(Register Src, bool AllowUndefs) const {
7260 LLT SrcTy = MRI.getType(Src);
7261 if (SrcTy.isFixedVector())
7262 return isConstantSplatVector(Src, 0, AllowUndefs);
7263 if (SrcTy.isScalar()) {
7264 if (AllowUndefs && getOpcodeDef<GImplicitDef>(Src, MRI) != nullptr)
7265 return true;
7266 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7267 return IConstant && IConstant->Value == 0;
7268 }
7269 return false; // scalable vector
7270}
7271
7272// Ignores COPYs during conformance checks.
7273// FIXME scalable vectors.
7274bool CombinerHelper::isConstantSplatVector(Register Src, int64_t SplatValue,
7275 bool AllowUndefs) const {
7276 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7277 if (!BuildVector)
7278 return false;
7279 unsigned NumSources = BuildVector->getNumSources();
7280
7281 for (unsigned I = 0; I < NumSources; ++I) {
7282 GImplicitDef *ImplicitDef =
7284 if (ImplicitDef && AllowUndefs)
7285 continue;
7286 if (ImplicitDef && !AllowUndefs)
7287 return false;
7288 std::optional<ValueAndVReg> IConstant =
7290 if (IConstant && IConstant->Value == SplatValue)
7291 continue;
7292 return false;
7293 }
7294 return true;
7295}
7296
7297// Ignores COPYs during lookups.
7298// FIXME scalable vectors
7299std::optional<APInt>
7300CombinerHelper::getConstantOrConstantSplatVector(Register Src) const {
7301 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7302 if (IConstant)
7303 return IConstant->Value;
7304
7305 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7306 if (!BuildVector)
7307 return std::nullopt;
7308 unsigned NumSources = BuildVector->getNumSources();
7309
7310 std::optional<APInt> Value = std::nullopt;
7311 for (unsigned I = 0; I < NumSources; ++I) {
7312 std::optional<ValueAndVReg> IConstant =
7314 if (!IConstant)
7315 return std::nullopt;
7316 if (!Value)
7317 Value = IConstant->Value;
7318 else if (*Value != IConstant->Value)
7319 return std::nullopt;
7320 }
7321 return Value;
7322}
7323
7324// FIXME G_SPLAT_VECTOR
7325bool CombinerHelper::isConstantOrConstantVectorI(Register Src) const {
7326 auto IConstant = getIConstantVRegValWithLookThrough(Src, MRI);
7327 if (IConstant)
7328 return true;
7329
7330 GBuildVector *BuildVector = getOpcodeDef<GBuildVector>(Src, MRI);
7331 if (!BuildVector)
7332 return false;
7333
7334 unsigned NumSources = BuildVector->getNumSources();
7335 for (unsigned I = 0; I < NumSources; ++I) {
7336 std::optional<ValueAndVReg> IConstant =
7338 if (!IConstant)
7339 return false;
7340 }
7341 return true;
7342}
7343
7344// TODO: use knownbits to determine zeros
7345bool CombinerHelper::tryFoldSelectOfConstants(GSelect *Select,
7346 BuildFnTy &MatchInfo) const {
7347 uint32_t Flags = Select->getFlags();
7348 Register Dest = Select->getReg(0);
7349 Register Cond = Select->getCondReg();
7350 Register True = Select->getTrueReg();
7351 Register False = Select->getFalseReg();
7352 LLT CondTy = MRI.getType(Select->getCondReg());
7353 LLT TrueTy = MRI.getType(Select->getTrueReg());
7354
7355 // We only do this combine for scalar boolean conditions.
7356 if (CondTy != LLT::scalar(1))
7357 return false;
7358
7359 if (TrueTy.isPointer())
7360 return false;
7361
7362 // Both are scalars.
7363 std::optional<ValueAndVReg> TrueOpt =
7365 std::optional<ValueAndVReg> FalseOpt =
7367
7368 if (!TrueOpt || !FalseOpt)
7369 return false;
7370
7371 APInt TrueValue = TrueOpt->Value;
7372 APInt FalseValue = FalseOpt->Value;
7373
7374 // select Cond, 1, 0 --> zext (Cond)
7375 if (TrueValue.isOne() && FalseValue.isZero()) {
7376 MatchInfo = [=](MachineIRBuilder &B) {
7377 B.setInstrAndDebugLoc(*Select);
7378 B.buildZExtOrTrunc(Dest, Cond);
7379 };
7380 return true;
7381 }
7382
7383 // select Cond, -1, 0 --> sext (Cond)
7384 if (TrueValue.isAllOnes() && FalseValue.isZero()) {
7385 MatchInfo = [=](MachineIRBuilder &B) {
7386 B.setInstrAndDebugLoc(*Select);
7387 B.buildSExtOrTrunc(Dest, Cond);
7388 };
7389 return true;
7390 }
7391
7392 // select Cond, 0, 1 --> zext (!Cond)
7393 if (TrueValue.isZero() && FalseValue.isOne()) {
7394 MatchInfo = [=](MachineIRBuilder &B) {
7395 B.setInstrAndDebugLoc(*Select);
7396 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7397 B.buildNot(Inner, Cond);
7398 B.buildZExtOrTrunc(Dest, Inner);
7399 };
7400 return true;
7401 }
7402
7403 // select Cond, 0, -1 --> sext (!Cond)
7404 if (TrueValue.isZero() && FalseValue.isAllOnes()) {
7405 MatchInfo = [=](MachineIRBuilder &B) {
7406 B.setInstrAndDebugLoc(*Select);
7407 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7408 B.buildNot(Inner, Cond);
7409 B.buildSExtOrTrunc(Dest, Inner);
7410 };
7411 return true;
7412 }
7413
7414 // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7415 if (TrueValue - 1 == FalseValue) {
7416 MatchInfo = [=](MachineIRBuilder &B) {
7417 B.setInstrAndDebugLoc(*Select);
7418 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7419 B.buildZExtOrTrunc(Inner, Cond);
7420 B.buildAdd(Dest, Inner, False);
7421 };
7422 return true;
7423 }
7424
7425 // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7426 if (TrueValue + 1 == FalseValue) {
7427 MatchInfo = [=](MachineIRBuilder &B) {
7428 B.setInstrAndDebugLoc(*Select);
7429 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7430 B.buildSExtOrTrunc(Inner, Cond);
7431 B.buildAdd(Dest, Inner, False);
7432 };
7433 return true;
7434 }
7435
7436 // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
7437 if (TrueValue.isPowerOf2() && FalseValue.isZero()) {
7438 MatchInfo = [=](MachineIRBuilder &B) {
7439 B.setInstrAndDebugLoc(*Select);
7440 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7441 B.buildZExtOrTrunc(Inner, Cond);
7442 // The shift amount must be scalar.
7443 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7444 auto ShAmtC = B.buildConstant(ShiftTy, TrueValue.exactLogBase2());
7445 B.buildShl(Dest, Inner, ShAmtC, Flags);
7446 };
7447 return true;
7448 }
7449
7450 // select Cond, 0, Pow2 --> (zext (!Cond)) << log2(Pow2)
7451 if (FalseValue.isPowerOf2() && TrueValue.isZero()) {
7452 MatchInfo = [=](MachineIRBuilder &B) {
7453 B.setInstrAndDebugLoc(*Select);
7454 Register Not = MRI.createGenericVirtualRegister(CondTy);
7455 B.buildNot(Not, Cond);
7456 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7457 B.buildZExtOrTrunc(Inner, Not);
7458 // The shift amount must be scalar.
7459 LLT ShiftTy = TrueTy.isVector() ? TrueTy.getElementType() : TrueTy;
7460 auto ShAmtC = B.buildConstant(ShiftTy, FalseValue.exactLogBase2());
7461 B.buildShl(Dest, Inner, ShAmtC, Flags);
7462 };
7463 return true;
7464 }
7465
7466 // select Cond, -1, C --> or (sext Cond), C
7467 if (TrueValue.isAllOnes()) {
7468 MatchInfo = [=](MachineIRBuilder &B) {
7469 B.setInstrAndDebugLoc(*Select);
7470 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7471 B.buildSExtOrTrunc(Inner, Cond);
7472 B.buildOr(Dest, Inner, False, Flags);
7473 };
7474 return true;
7475 }
7476
7477 // select Cond, C, -1 --> or (sext (not Cond)), C
7478 if (FalseValue.isAllOnes()) {
7479 MatchInfo = [=](MachineIRBuilder &B) {
7480 B.setInstrAndDebugLoc(*Select);
7481 Register Not = MRI.createGenericVirtualRegister(CondTy);
7482 B.buildNot(Not, Cond);
7483 Register Inner = MRI.createGenericVirtualRegister(TrueTy);
7484 B.buildSExtOrTrunc(Inner, Not);
7485 B.buildOr(Dest, Inner, True, Flags);
7486 };
7487 return true;
7488 }
7489
7490 return false;
7491}
7492
7493// TODO: use knownbits to determine zeros
7494bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
7495 BuildFnTy &MatchInfo) const {
7496 uint32_t Flags = Select->getFlags();
7497 Register DstReg = Select->getReg(0);
7498 Register Cond = Select->getCondReg();
7499 Register True = Select->getTrueReg();
7500 Register False = Select->getFalseReg();
7501 LLT CondTy = MRI.getType(Select->getCondReg());
7502 LLT TrueTy = MRI.getType(Select->getTrueReg());
7503
7504 // Boolean or fixed vector of booleans.
7505 if (CondTy.isScalableVector() ||
7506 (CondTy.isFixedVector() &&
7507 CondTy.getElementType().getScalarSizeInBits() != 1) ||
7508 CondTy.getScalarSizeInBits() != 1)
7509 return false;
7510
7511 if (CondTy != TrueTy)
7512 return false;
7513
7514 // select Cond, Cond, F --> or Cond, F
7515 // select Cond, 1, F --> or Cond, F
7516 if ((Cond == True) || isOneOrOneSplat(True, /* AllowUndefs */ true)) {
7517 MatchInfo = [=](MachineIRBuilder &B) {
7518 B.setInstrAndDebugLoc(*Select);
7519 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7520 B.buildZExtOrTrunc(Ext, Cond);
7521 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7522 B.buildOr(DstReg, Ext, FreezeFalse, Flags);
7523 };
7524 return true;
7525 }
7526
7527 // select Cond, T, Cond --> and Cond, T
7528 // select Cond, T, 0 --> and Cond, T
7529 if ((Cond == False) || isZeroOrZeroSplat(False, /* AllowUndefs */ true)) {
7530 MatchInfo = [=](MachineIRBuilder &B) {
7531 B.setInstrAndDebugLoc(*Select);
7532 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7533 B.buildZExtOrTrunc(Ext, Cond);
7534 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7535 B.buildAnd(DstReg, Ext, FreezeTrue);
7536 };
7537 return true;
7538 }
7539
7540 // select Cond, T, 1 --> or (not Cond), T
7541 if (isOneOrOneSplat(False, /* AllowUndefs */ true)) {
7542 MatchInfo = [=](MachineIRBuilder &B) {
7543 B.setInstrAndDebugLoc(*Select);
7544 // First the not.
7545 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7546 B.buildNot(Inner, Cond);
7547 // Then an ext to match the destination register.
7548 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7549 B.buildZExtOrTrunc(Ext, Inner);
7550 auto FreezeTrue = B.buildFreeze(TrueTy, True);
7551 B.buildOr(DstReg, Ext, FreezeTrue, Flags);
7552 };
7553 return true;
7554 }
7555
7556 // select Cond, 0, F --> and (not Cond), F
7557 if (isZeroOrZeroSplat(True, /* AllowUndefs */ true)) {
7558 MatchInfo = [=](MachineIRBuilder &B) {
7559 B.setInstrAndDebugLoc(*Select);
7560 // First the not.
7561 Register Inner = MRI.createGenericVirtualRegister(CondTy);
7562 B.buildNot(Inner, Cond);
7563 // Then an ext to match the destination register.
7564 Register Ext = MRI.createGenericVirtualRegister(TrueTy);
7565 B.buildZExtOrTrunc(Ext, Inner);
7566 auto FreezeFalse = B.buildFreeze(TrueTy, False);
7567 B.buildAnd(DstReg, Ext, FreezeFalse);
7568 };
7569 return true;
7570 }
7571
7572 return false;
7573}
7574
7576 BuildFnTy &MatchInfo) const {
7577 GSelect *Select = cast<GSelect>(MRI.getVRegDef(MO.getReg()));
7578 GICmp *Cmp = cast<GICmp>(MRI.getVRegDef(Select->getCondReg()));
7579
7580 Register DstReg = Select->getReg(0);
7581 Register True = Select->getTrueReg();
7582 Register False = Select->getFalseReg();
7583 LLT DstTy = MRI.getType(DstReg);
7584
7585 if (DstTy.isPointerOrPointerVector())
7586 return false;
7587
7588 // We want to fold the icmp and replace the select.
7589 if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
7590 return false;
7591
7592 CmpInst::Predicate Pred = Cmp->getCond();
7593 // We need a larger or smaller predicate for
7594 // canonicalization.
7595 if (CmpInst::isEquality(Pred))
7596 return false;
7597
7598 Register CmpLHS = Cmp->getLHSReg();
7599 Register CmpRHS = Cmp->getRHSReg();
7600
7601 // We can swap CmpLHS and CmpRHS for higher hitrate.
7602 if (True == CmpRHS && False == CmpLHS) {
7603 std::swap(CmpLHS, CmpRHS);
7604 Pred = CmpInst::getSwappedPredicate(Pred);
7605 }
7606
7607 // (icmp X, Y) ? X : Y -> integer minmax.
7608 // see matchSelectPattern in ValueTracking.
7609 // Legality between G_SELECT and integer minmax can differ.
7610 if (True != CmpLHS || False != CmpRHS)
7611 return false;
7612
7613 switch (Pred) {
7614 case ICmpInst::ICMP_UGT:
7615 case ICmpInst::ICMP_UGE: {
7616 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
7617 return false;
7618 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMax(DstReg, True, False); };
7619 return true;
7620 }
7621 case ICmpInst::ICMP_SGT:
7622 case ICmpInst::ICMP_SGE: {
7623 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
7624 return false;
7625 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMax(DstReg, True, False); };
7626 return true;
7627 }
7628 case ICmpInst::ICMP_ULT:
7629 case ICmpInst::ICMP_ULE: {
7630 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
7631 return false;
7632 MatchInfo = [=](MachineIRBuilder &B) { B.buildUMin(DstReg, True, False); };
7633 return true;
7634 }
7635 case ICmpInst::ICMP_SLT:
7636 case ICmpInst::ICMP_SLE: {
7637 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
7638 return false;
7639 MatchInfo = [=](MachineIRBuilder &B) { B.buildSMin(DstReg, True, False); };
7640 return true;
7641 }
7642 default:
7643 return false;
7644 }
7645}
7646
7647// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
7649 BuildFnTy &MatchInfo) const {
7650 assert(MI.getOpcode() == TargetOpcode::G_SUB);
7651 Register DestReg = MI.getOperand(0).getReg();
7652 LLT DestTy = MRI.getType(DestReg);
7653
7654 Register X;
7655 Register Sub0;
7656 auto NegPattern = m_all_of(m_Neg(m_DeferredReg(X)), m_Reg(Sub0));
7657 if (mi_match(DestReg, MRI,
7658 m_Neg(m_OneUse(m_any_of(m_GSMin(m_Reg(X), NegPattern),
7659 m_GSMax(m_Reg(X), NegPattern),
7660 m_GUMin(m_Reg(X), NegPattern),
7661 m_GUMax(m_Reg(X), NegPattern)))))) {
7662 MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
7663 unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
7664 if (isLegal({NewOpc, {DestTy}})) {
7665 MatchInfo = [=](MachineIRBuilder &B) {
7666 B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
7667 };
7668 return true;
7669 }
7670 }
7671
7672 return false;
7673}
7674
7677
7678 if (tryFoldSelectOfConstants(Select, MatchInfo))
7679 return true;
7680
7681 if (tryFoldBoolSelectToLogic(Select, MatchInfo))
7682 return true;
7683
7684 return false;
7685}
7686
7687/// Fold (icmp Pred1 V1, C1) && (icmp Pred2 V2, C2)
7688/// or (icmp Pred1 V1, C1) || (icmp Pred2 V2, C2)
7689/// into a single comparison using range-based reasoning.
7690/// see InstCombinerImpl::foldAndOrOfICmpsUsingRanges.
7691bool CombinerHelper::tryFoldAndOrOrICmpsUsingRanges(
7692 GLogicalBinOp *Logic, BuildFnTy &MatchInfo) const {
7693 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpected xor");
7694 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7695 Register DstReg = Logic->getReg(0);
7696 Register LHS = Logic->getLHSReg();
7697 Register RHS = Logic->getRHSReg();
7698 unsigned Flags = Logic->getFlags();
7699
7700 // We need an G_ICMP on the LHS register.
7701 GICmp *Cmp1 = getOpcodeDef<GICmp>(LHS, MRI);
7702 if (!Cmp1)
7703 return false;
7704
7705 // We need an G_ICMP on the RHS register.
7706 GICmp *Cmp2 = getOpcodeDef<GICmp>(RHS, MRI);
7707 if (!Cmp2)
7708 return false;
7709
7710 // We want to fold the icmps.
7711 if (!MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7712 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)))
7713 return false;
7714
7715 APInt C1;
7716 APInt C2;
7717 std::optional<ValueAndVReg> MaybeC1 =
7719 if (!MaybeC1)
7720 return false;
7721 C1 = MaybeC1->Value;
7722
7723 std::optional<ValueAndVReg> MaybeC2 =
7725 if (!MaybeC2)
7726 return false;
7727 C2 = MaybeC2->Value;
7728
7729 Register R1 = Cmp1->getLHSReg();
7730 Register R2 = Cmp2->getLHSReg();
7731 CmpInst::Predicate Pred1 = Cmp1->getCond();
7732 CmpInst::Predicate Pred2 = Cmp2->getCond();
7733 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7734 LLT CmpOperandTy = MRI.getType(R1);
7735
7736 if (CmpOperandTy.isPointer())
7737 return false;
7738
7739 // We build ands, adds, and constants of type CmpOperandTy.
7740 // They must be legal to build.
7741 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_AND, CmpOperandTy}) ||
7742 !isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, CmpOperandTy}) ||
7743 !isConstantLegalOrBeforeLegalizer(CmpOperandTy))
7744 return false;
7745
7746 // Look through add of a constant offset on R1, R2, or both operands. This
7747 // allows us to interpret the R + C' < C'' range idiom into a proper range.
7748 std::optional<APInt> Offset1;
7749 std::optional<APInt> Offset2;
7750 if (R1 != R2) {
7751 if (GAdd *Add = getOpcodeDef<GAdd>(R1, MRI)) {
7752 std::optional<ValueAndVReg> MaybeOffset1 =
7754 if (MaybeOffset1) {
7755 R1 = Add->getLHSReg();
7756 Offset1 = MaybeOffset1->Value;
7757 }
7758 }
7759 if (GAdd *Add = getOpcodeDef<GAdd>(R2, MRI)) {
7760 std::optional<ValueAndVReg> MaybeOffset2 =
7762 if (MaybeOffset2) {
7763 R2 = Add->getLHSReg();
7764 Offset2 = MaybeOffset2->Value;
7765 }
7766 }
7767 }
7768
7769 if (R1 != R2)
7770 return false;
7771
7772 // We calculate the icmp ranges including maybe offsets.
7773 ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
7774 IsAnd ? ICmpInst::getInversePredicate(Pred1) : Pred1, C1);
7775 if (Offset1)
7776 CR1 = CR1.subtract(*Offset1);
7777
7778 ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
7779 IsAnd ? ICmpInst::getInversePredicate(Pred2) : Pred2, C2);
7780 if (Offset2)
7781 CR2 = CR2.subtract(*Offset2);
7782
7783 bool CreateMask = false;
7784 APInt LowerDiff;
7785 std::optional<ConstantRange> CR = CR1.exactUnionWith(CR2);
7786 if (!CR) {
7787 // We need non-wrapping ranges.
7788 if (CR1.isWrappedSet() || CR2.isWrappedSet())
7789 return false;
7790
7791 // Check whether we have equal-size ranges that only differ by one bit.
7792 // In that case we can apply a mask to map one range onto the other.
7793 LowerDiff = CR1.getLower() ^ CR2.getLower();
7794 APInt UpperDiff = (CR1.getUpper() - 1) ^ (CR2.getUpper() - 1);
7795 APInt CR1Size = CR1.getUpper() - CR1.getLower();
7796 if (!LowerDiff.isPowerOf2() || LowerDiff != UpperDiff ||
7797 CR1Size != CR2.getUpper() - CR2.getLower())
7798 return false;
7799
7800 CR = CR1.getLower().ult(CR2.getLower()) ? CR1 : CR2;
7801 CreateMask = true;
7802 }
7803
7804 if (IsAnd)
7805 CR = CR->inverse();
7806
7807 CmpInst::Predicate NewPred;
7808 APInt NewC, Offset;
7809 CR->getEquivalentICmp(NewPred, NewC, Offset);
7810
7811 // We take the result type of one of the original icmps, CmpTy, for
7812 // the to be build icmp. The operand type, CmpOperandTy, is used for
7813 // the other instructions and constants to be build. The types of
7814 // the parameters and output are the same for add and and. CmpTy
7815 // and the type of DstReg might differ. That is why we zext or trunc
7816 // the icmp into the destination register.
7817
7818 MatchInfo = [=](MachineIRBuilder &B) {
7819 if (CreateMask && Offset != 0) {
7820 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7821 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7822 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7823 auto Add = B.buildAdd(CmpOperandTy, And, OffsetC, Flags);
7824 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7825 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7826 B.buildZExtOrTrunc(DstReg, ICmp);
7827 } else if (CreateMask && Offset == 0) {
7828 auto TildeLowerDiff = B.buildConstant(CmpOperandTy, ~LowerDiff);
7829 auto And = B.buildAnd(CmpOperandTy, R1, TildeLowerDiff); // the mask.
7830 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7831 auto ICmp = B.buildICmp(NewPred, CmpTy, And, NewCon);
7832 B.buildZExtOrTrunc(DstReg, ICmp);
7833 } else if (!CreateMask && Offset != 0) {
7834 auto OffsetC = B.buildConstant(CmpOperandTy, Offset);
7835 auto Add = B.buildAdd(CmpOperandTy, R1, OffsetC, Flags);
7836 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7837 auto ICmp = B.buildICmp(NewPred, CmpTy, Add, NewCon);
7838 B.buildZExtOrTrunc(DstReg, ICmp);
7839 } else if (!CreateMask && Offset == 0) {
7840 auto NewCon = B.buildConstant(CmpOperandTy, NewC);
7841 auto ICmp = B.buildICmp(NewPred, CmpTy, R1, NewCon);
7842 B.buildZExtOrTrunc(DstReg, ICmp);
7843 } else {
7844 llvm_unreachable("unexpected configuration of CreateMask and Offset");
7845 }
7846 };
7847 return true;
7848}
7849
7850bool CombinerHelper::tryFoldLogicOfFCmps(GLogicalBinOp *Logic,
7851 BuildFnTy &MatchInfo) const {
7852 assert(Logic->getOpcode() != TargetOpcode::G_XOR && "unexpecte xor");
7853 Register DestReg = Logic->getReg(0);
7854 Register LHS = Logic->getLHSReg();
7855 Register RHS = Logic->getRHSReg();
7856 bool IsAnd = Logic->getOpcode() == TargetOpcode::G_AND;
7857
7858 // We need a compare on the LHS register.
7859 GFCmp *Cmp1 = getOpcodeDef<GFCmp>(LHS, MRI);
7860 if (!Cmp1)
7861 return false;
7862
7863 // We need a compare on the RHS register.
7864 GFCmp *Cmp2 = getOpcodeDef<GFCmp>(RHS, MRI);
7865 if (!Cmp2)
7866 return false;
7867
7868 LLT CmpTy = MRI.getType(Cmp1->getReg(0));
7869 LLT CmpOperandTy = MRI.getType(Cmp1->getLHSReg());
7870
7871 // We build one fcmp, want to fold the fcmps, replace the logic op,
7872 // and the fcmps must have the same shape.
7874 {TargetOpcode::G_FCMP, {CmpTy, CmpOperandTy}}) ||
7875 !MRI.hasOneNonDBGUse(Logic->getReg(0)) ||
7876 !MRI.hasOneNonDBGUse(Cmp1->getReg(0)) ||
7877 !MRI.hasOneNonDBGUse(Cmp2->getReg(0)) ||
7878 MRI.getType(Cmp1->getLHSReg()) != MRI.getType(Cmp2->getLHSReg()))
7879 return false;
7880
7881 CmpInst::Predicate PredL = Cmp1->getCond();
7882 CmpInst::Predicate PredR = Cmp2->getCond();
7883 Register LHS0 = Cmp1->getLHSReg();
7884 Register LHS1 = Cmp1->getRHSReg();
7885 Register RHS0 = Cmp2->getLHSReg();
7886 Register RHS1 = Cmp2->getRHSReg();
7887
7888 if (LHS0 == RHS1 && LHS1 == RHS0) {
7889 // Swap RHS operands to match LHS.
7890 PredR = CmpInst::getSwappedPredicate(PredR);
7891 std::swap(RHS0, RHS1);
7892 }
7893
7894 if (LHS0 == RHS0 && LHS1 == RHS1) {
7895 // We determine the new predicate.
7896 unsigned CmpCodeL = getFCmpCode(PredL);
7897 unsigned CmpCodeR = getFCmpCode(PredR);
7898 unsigned NewPred = IsAnd ? CmpCodeL & CmpCodeR : CmpCodeL | CmpCodeR;
7899 unsigned Flags = Cmp1->getFlags() | Cmp2->getFlags();
7900 MatchInfo = [=](MachineIRBuilder &B) {
7901 // The fcmp predicates fill the lower part of the enum.
7902 FCmpInst::Predicate Pred = static_cast<FCmpInst::Predicate>(NewPred);
7903 if (Pred == FCmpInst::FCMP_FALSE &&
7905 auto False = B.buildConstant(CmpTy, 0);
7906 B.buildZExtOrTrunc(DestReg, False);
7907 } else if (Pred == FCmpInst::FCMP_TRUE &&
7909 auto True =
7910 B.buildConstant(CmpTy, getICmpTrueVal(getTargetLowering(),
7911 CmpTy.isVector() /*isVector*/,
7912 true /*isFP*/));
7913 B.buildZExtOrTrunc(DestReg, True);
7914 } else { // We take the predicate without predicate optimizations.
7915 auto Cmp = B.buildFCmp(Pred, CmpTy, LHS0, LHS1, Flags);
7916 B.buildZExtOrTrunc(DestReg, Cmp);
7917 }
7918 };
7919 return true;
7920 }
7921
7922 return false;
7923}
7924
7926 GAnd *And = cast<GAnd>(&MI);
7927
7928 if (tryFoldAndOrOrICmpsUsingRanges(And, MatchInfo))
7929 return true;
7930
7931 if (tryFoldLogicOfFCmps(And, MatchInfo))
7932 return true;
7933
7934 return false;
7935}
7936
7938 GOr *Or = cast<GOr>(&MI);
7939
7940 if (tryFoldAndOrOrICmpsUsingRanges(Or, MatchInfo))
7941 return true;
7942
7943 if (tryFoldLogicOfFCmps(Or, MatchInfo))
7944 return true;
7945
7946 return false;
7947}
7948
7950 BuildFnTy &MatchInfo) const {
7952
7953 // Addo has no flags
7954 Register Dst = Add->getReg(0);
7955 Register Carry = Add->getReg(1);
7956 Register LHS = Add->getLHSReg();
7957 Register RHS = Add->getRHSReg();
7958 bool IsSigned = Add->isSigned();
7959 LLT DstTy = MRI.getType(Dst);
7960 LLT CarryTy = MRI.getType(Carry);
7961
7962 // Fold addo, if the carry is dead -> add, undef.
7963 if (MRI.use_nodbg_empty(Carry) &&
7964 isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}})) {
7965 MatchInfo = [=](MachineIRBuilder &B) {
7966 B.buildAdd(Dst, LHS, RHS);
7967 B.buildUndef(Carry);
7968 };
7969 return true;
7970 }
7971
7972 // Canonicalize constant to RHS.
7973 if (isConstantOrConstantVectorI(LHS) && !isConstantOrConstantVectorI(RHS)) {
7974 if (IsSigned) {
7975 MatchInfo = [=](MachineIRBuilder &B) {
7976 B.buildSAddo(Dst, Carry, RHS, LHS);
7977 };
7978 return true;
7979 }
7980 // !IsSigned
7981 MatchInfo = [=](MachineIRBuilder &B) {
7982 B.buildUAddo(Dst, Carry, RHS, LHS);
7983 };
7984 return true;
7985 }
7986
7987 std::optional<APInt> MaybeLHS = getConstantOrConstantSplatVector(LHS);
7988 std::optional<APInt> MaybeRHS = getConstantOrConstantSplatVector(RHS);
7989
7990 // Fold addo(c1, c2) -> c3, carry.
7991 if (MaybeLHS && MaybeRHS && isConstantLegalOrBeforeLegalizer(DstTy) &&
7993 bool Overflow;
7994 APInt Result = IsSigned ? MaybeLHS->sadd_ov(*MaybeRHS, Overflow)
7995 : MaybeLHS->uadd_ov(*MaybeRHS, Overflow);
7996 MatchInfo = [=](MachineIRBuilder &B) {
7997 B.buildConstant(Dst, Result);
7998 B.buildConstant(Carry, Overflow);
7999 };
8000 return true;
8001 }
8002
8003 // Fold (addo x, 0) -> x, no carry
8004 if (MaybeRHS && *MaybeRHS == 0 && isConstantLegalOrBeforeLegalizer(CarryTy)) {
8005 MatchInfo = [=](MachineIRBuilder &B) {
8006 B.buildCopy(Dst, LHS);
8007 B.buildConstant(Carry, 0);
8008 };
8009 return true;
8010 }
8011
8012 // Given 2 constant operands whose sum does not overflow:
8013 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
8014 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
8015 GAdd *AddLHS = getOpcodeDef<GAdd>(LHS, MRI);
8016 if (MaybeRHS && AddLHS && MRI.hasOneNonDBGUse(Add->getReg(0)) &&
8017 ((IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoSWrap)) ||
8018 (!IsSigned && AddLHS->getFlag(MachineInstr::MIFlag::NoUWrap)))) {
8019 std::optional<APInt> MaybeAddRHS =
8020 getConstantOrConstantSplatVector(AddLHS->getRHSReg());
8021 if (MaybeAddRHS) {
8022 bool Overflow;
8023 APInt NewC = IsSigned ? MaybeAddRHS->sadd_ov(*MaybeRHS, Overflow)
8024 : MaybeAddRHS->uadd_ov(*MaybeRHS, Overflow);
8025 if (!Overflow && isConstantLegalOrBeforeLegalizer(DstTy)) {
8026 if (IsSigned) {
8027 MatchInfo = [=](MachineIRBuilder &B) {
8028 auto ConstRHS = B.buildConstant(DstTy, NewC);
8029 B.buildSAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8030 };
8031 return true;
8032 }
8033 // !IsSigned
8034 MatchInfo = [=](MachineIRBuilder &B) {
8035 auto ConstRHS = B.buildConstant(DstTy, NewC);
8036 B.buildUAddo(Dst, Carry, AddLHS->getLHSReg(), ConstRHS);
8037 };
8038 return true;
8039 }
8040 }
8041 };
8042
8043 // We try to combine addo to non-overflowing add.
8044 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ADD, {DstTy}}) ||
8046 return false;
8047
8048 // We try to combine uaddo to non-overflowing add.
8049 if (!IsSigned) {
8050 ConstantRange CRLHS =
8051 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/false);
8052 ConstantRange CRRHS =
8053 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/false);
8054
8055 switch (CRLHS.unsignedAddMayOverflow(CRRHS)) {
8057 return false;
8059 MatchInfo = [=](MachineIRBuilder &B) {
8060 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8061 B.buildConstant(Carry, 0);
8062 };
8063 return true;
8064 }
8067 MatchInfo = [=](MachineIRBuilder &B) {
8068 B.buildAdd(Dst, LHS, RHS);
8069 B.buildConstant(Carry, 1);
8070 };
8071 return true;
8072 }
8073 }
8074 return false;
8075 }
8076
8077 // We try to combine saddo to non-overflowing add.
8078
8079 // If LHS and RHS each have at least two sign bits, then there is no signed
8080 // overflow.
8081 if (VT->computeNumSignBits(RHS) > 1 && VT->computeNumSignBits(LHS) > 1) {
8082 MatchInfo = [=](MachineIRBuilder &B) {
8083 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8084 B.buildConstant(Carry, 0);
8085 };
8086 return true;
8087 }
8088
8089 ConstantRange CRLHS =
8090 ConstantRange::fromKnownBits(VT->getKnownBits(LHS), /*IsSigned=*/true);
8091 ConstantRange CRRHS =
8092 ConstantRange::fromKnownBits(VT->getKnownBits(RHS), /*IsSigned=*/true);
8093
8094 switch (CRLHS.signedAddMayOverflow(CRRHS)) {
8096 return false;
8098 MatchInfo = [=](MachineIRBuilder &B) {
8099 B.buildAdd(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8100 B.buildConstant(Carry, 0);
8101 };
8102 return true;
8103 }
8106 MatchInfo = [=](MachineIRBuilder &B) {
8107 B.buildAdd(Dst, LHS, RHS);
8108 B.buildConstant(Carry, 1);
8109 };
8110 return true;
8111 }
8112 }
8113
8114 return false;
8115}
8116
8118 BuildFnTy &MatchInfo) const {
8120 MatchInfo(Builder);
8121 Root->eraseFromParent();
8122}
8123
8125 int64_t Exponent) const {
8126 bool OptForSize = MI.getMF()->getFunction().hasOptSize();
8128}
8129
8131 int64_t Exponent) const {
8132 auto [Dst, Base] = MI.getFirst2Regs();
8133 LLT Ty = MRI.getType(Dst);
8134 int64_t ExpVal = Exponent;
8135
8136 if (ExpVal == 0) {
8137 Builder.buildFConstant(Dst, 1.0);
8138 MI.removeFromParent();
8139 return;
8140 }
8141
8142 if (ExpVal < 0)
8143 ExpVal = -ExpVal;
8144
8145 // We use the simple binary decomposition method from SelectionDAG ExpandPowI
8146 // to generate the multiply sequence. There are more optimal ways to do this
8147 // (for example, powi(x,15) generates one more multiply than it should), but
8148 // this has the benefit of being both really simple and much better than a
8149 // libcall.
8150 std::optional<SrcOp> Res;
8151 SrcOp CurSquare = Base;
8152 while (ExpVal > 0) {
8153 if (ExpVal & 1) {
8154 if (!Res)
8155 Res = CurSquare;
8156 else
8157 Res = Builder.buildFMul(Ty, *Res, CurSquare);
8158 }
8159
8160 CurSquare = Builder.buildFMul(Ty, CurSquare, CurSquare);
8161 ExpVal >>= 1;
8162 }
8163
8164 // If the original exponent was negative, invert the result, producing
8165 // 1/(x*x*x).
8166 if (Exponent < 0)
8167 Res = Builder.buildFDiv(Ty, Builder.buildFConstant(Ty, 1.0), *Res,
8168 MI.getFlags());
8169
8170 Builder.buildCopy(Dst, *Res);
8171 MI.eraseFromParent();
8172}
8173
8175 BuildFnTy &MatchInfo) const {
8176 // fold (A+C1)-C2 -> A+(C1-C2)
8177 const GSub *Sub = cast<GSub>(&MI);
8178 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getLHSReg()));
8179
8180 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8181 return false;
8182
8183 APInt C2 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8184 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8185
8186 Register Dst = Sub->getReg(0);
8187 LLT DstTy = MRI.getType(Dst);
8188
8189 MatchInfo = [=](MachineIRBuilder &B) {
8190 auto Const = B.buildConstant(DstTy, C1 - C2);
8191 B.buildAdd(Dst, Add->getLHSReg(), Const);
8192 };
8193
8194 return true;
8195}
8196
8198 BuildFnTy &MatchInfo) const {
8199 // fold C2-(A+C1) -> (C2-C1)-A
8200 const GSub *Sub = cast<GSub>(&MI);
8201 GAdd *Add = cast<GAdd>(MRI.getVRegDef(Sub->getRHSReg()));
8202
8203 if (!MRI.hasOneNonDBGUse(Add->getReg(0)))
8204 return false;
8205
8206 APInt C2 = getIConstantFromReg(Sub->getLHSReg(), MRI);
8207 APInt C1 = getIConstantFromReg(Add->getRHSReg(), MRI);
8208
8209 Register Dst = Sub->getReg(0);
8210 LLT DstTy = MRI.getType(Dst);
8211
8212 MatchInfo = [=](MachineIRBuilder &B) {
8213 auto Const = B.buildConstant(DstTy, C2 - C1);
8214 B.buildSub(Dst, Const, Add->getLHSReg());
8215 };
8216
8217 return true;
8218}
8219
8221 BuildFnTy &MatchInfo) const {
8222 // fold (A-C1)-C2 -> A-(C1+C2)
8223 const GSub *Sub1 = cast<GSub>(&MI);
8224 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8225
8226 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8227 return false;
8228
8229 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8230 APInt C1 = getIConstantFromReg(Sub2->getRHSReg(), MRI);
8231
8232 Register Dst = Sub1->getReg(0);
8233 LLT DstTy = MRI.getType(Dst);
8234
8235 MatchInfo = [=](MachineIRBuilder &B) {
8236 auto Const = B.buildConstant(DstTy, C1 + C2);
8237 B.buildSub(Dst, Sub2->getLHSReg(), Const);
8238 };
8239
8240 return true;
8241}
8242
8244 BuildFnTy &MatchInfo) const {
8245 // fold (C1-A)-C2 -> (C1-C2)-A
8246 const GSub *Sub1 = cast<GSub>(&MI);
8247 GSub *Sub2 = cast<GSub>(MRI.getVRegDef(Sub1->getLHSReg()));
8248
8249 if (!MRI.hasOneNonDBGUse(Sub2->getReg(0)))
8250 return false;
8251
8252 APInt C2 = getIConstantFromReg(Sub1->getRHSReg(), MRI);
8253 APInt C1 = getIConstantFromReg(Sub2->getLHSReg(), MRI);
8254
8255 Register Dst = Sub1->getReg(0);
8256 LLT DstTy = MRI.getType(Dst);
8257
8258 MatchInfo = [=](MachineIRBuilder &B) {
8259 auto Const = B.buildConstant(DstTy, C1 - C2);
8260 B.buildSub(Dst, Const, Sub2->getRHSReg());
8261 };
8262
8263 return true;
8264}
8265
8267 BuildFnTy &MatchInfo) const {
8268 // fold ((A-C1)+C2) -> (A+(C2-C1))
8269 const GAdd *Add = cast<GAdd>(&MI);
8270 GSub *Sub = cast<GSub>(MRI.getVRegDef(Add->getLHSReg()));
8271
8272 if (!MRI.hasOneNonDBGUse(Sub->getReg(0)))
8273 return false;
8274
8275 APInt C2 = getIConstantFromReg(Add->getRHSReg(), MRI);
8276 APInt C1 = getIConstantFromReg(Sub->getRHSReg(), MRI);
8277
8278 Register Dst = Add->getReg(0);
8279 LLT DstTy = MRI.getType(Dst);
8280
8281 MatchInfo = [=](MachineIRBuilder &B) {
8282 auto Const = B.buildConstant(DstTy, C2 - C1);
8283 B.buildAdd(Dst, Sub->getLHSReg(), Const);
8284 };
8285
8286 return true;
8287}
8288
8290 const MachineInstr &MI, BuildFnTy &MatchInfo) const {
8291 const GUnmerge *Unmerge = cast<GUnmerge>(&MI);
8292
8293 if (!MRI.hasOneNonDBGUse(Unmerge->getSourceReg()))
8294 return false;
8295
8296 const MachineInstr *Source = MRI.getVRegDef(Unmerge->getSourceReg());
8297
8298 LLT DstTy = MRI.getType(Unmerge->getReg(0));
8299
8300 // $bv:_(<8 x s8>) = G_BUILD_VECTOR ....
8301 // $any:_(<8 x s16>) = G_ANYEXT $bv
8302 // $uv:_(<4 x s16>), $uv1:_(<4 x s16>) = G_UNMERGE_VALUES $any
8303 //
8304 // ->
8305 //
8306 // $any:_(s16) = G_ANYEXT $bv[0]
8307 // $any1:_(s16) = G_ANYEXT $bv[1]
8308 // $any2:_(s16) = G_ANYEXT $bv[2]
8309 // $any3:_(s16) = G_ANYEXT $bv[3]
8310 // $any4:_(s16) = G_ANYEXT $bv[4]
8311 // $any5:_(s16) = G_ANYEXT $bv[5]
8312 // $any6:_(s16) = G_ANYEXT $bv[6]
8313 // $any7:_(s16) = G_ANYEXT $bv[7]
8314 // $uv:_(<4 x s16>) = G_BUILD_VECTOR $any, $any1, $any2, $any3
8315 // $uv1:_(<4 x s16>) = G_BUILD_VECTOR $any4, $any5, $any6, $any7
8316
8317 // We want to unmerge into vectors.
8318 if (!DstTy.isFixedVector())
8319 return false;
8320
8321 const GAnyExt *Any = dyn_cast<GAnyExt>(Source);
8322 if (!Any)
8323 return false;
8324
8325 const MachineInstr *NextSource = MRI.getVRegDef(Any->getSrcReg());
8326
8327 if (const GBuildVector *BV = dyn_cast<GBuildVector>(NextSource)) {
8328 // G_UNMERGE_VALUES G_ANYEXT G_BUILD_VECTOR
8329
8330 if (!MRI.hasOneNonDBGUse(BV->getReg(0)))
8331 return false;
8332
8333 // FIXME: check element types?
8334 if (BV->getNumSources() % Unmerge->getNumDefs() != 0)
8335 return false;
8336
8337 LLT BigBvTy = MRI.getType(BV->getReg(0));
8338 LLT SmallBvTy = DstTy;
8339 LLT SmallBvElemenTy = SmallBvTy.getElementType();
8340
8342 {TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
8343 return false;
8344
8345 // We check the legality of scalar anyext.
8347 {TargetOpcode::G_ANYEXT,
8348 {SmallBvElemenTy, BigBvTy.getElementType()}}))
8349 return false;
8350
8351 MatchInfo = [=](MachineIRBuilder &B) {
8352 // Build into each G_UNMERGE_VALUES def
8353 // a small build vector with anyext from the source build vector.
8354 for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
8356 for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
8357 Register SourceArray =
8358 BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
8359 auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
8360 Ops.push_back(AnyExt.getReg(0));
8361 }
8362 B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);
8363 };
8364 };
8365 return true;
8366 };
8367
8368 return false;
8369}
8370
8372 BuildFnTy &MatchInfo) const {
8373
8374 bool Changed = false;
8375 auto &Shuffle = cast<GShuffleVector>(MI);
8376 ArrayRef<int> OrigMask = Shuffle.getMask();
8377 SmallVector<int, 16> NewMask;
8378 const LLT SrcTy = MRI.getType(Shuffle.getSrc1Reg());
8379 const unsigned NumSrcElems = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
8380 const unsigned NumDstElts = OrigMask.size();
8381 for (unsigned i = 0; i != NumDstElts; ++i) {
8382 int Idx = OrigMask[i];
8383 if (Idx >= (int)NumSrcElems) {
8384 Idx = -1;
8385 Changed = true;
8386 }
8387 NewMask.push_back(Idx);
8388 }
8389
8390 if (!Changed)
8391 return false;
8392
8393 MatchInfo = [&, NewMask = std::move(NewMask)](MachineIRBuilder &B) {
8394 B.buildShuffleVector(MI.getOperand(0), MI.getOperand(1), MI.getOperand(2),
8395 std::move(NewMask));
8396 };
8397
8398 return true;
8399}
8400
8401static void commuteMask(MutableArrayRef<int> Mask, const unsigned NumElems) {
8402 const unsigned MaskSize = Mask.size();
8403 for (unsigned I = 0; I < MaskSize; ++I) {
8404 int Idx = Mask[I];
8405 if (Idx < 0)
8406 continue;
8407
8408 if (Idx < (int)NumElems)
8409 Mask[I] = Idx + NumElems;
8410 else
8411 Mask[I] = Idx - NumElems;
8412 }
8413}
8414
8416 BuildFnTy &MatchInfo) const {
8417
8418 auto &Shuffle = cast<GShuffleVector>(MI);
8419 // If any of the two inputs is already undef, don't check the mask again to
8420 // prevent infinite loop
8421 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc1Reg(), MRI))
8422 return false;
8423
8424 if (getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Shuffle.getSrc2Reg(), MRI))
8425 return false;
8426
8427 const LLT DstTy = MRI.getType(Shuffle.getReg(0));
8428 const LLT Src1Ty = MRI.getType(Shuffle.getSrc1Reg());
8430 {TargetOpcode::G_SHUFFLE_VECTOR, {DstTy, Src1Ty}}))
8431 return false;
8432
8433 ArrayRef<int> Mask = Shuffle.getMask();
8434 const unsigned NumSrcElems = Src1Ty.getNumElements();
8435
8436 bool TouchesSrc1 = false;
8437 bool TouchesSrc2 = false;
8438 const unsigned NumElems = Mask.size();
8439 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
8440 if (Mask[Idx] < 0)
8441 continue;
8442
8443 if (Mask[Idx] < (int)NumSrcElems)
8444 TouchesSrc1 = true;
8445 else
8446 TouchesSrc2 = true;
8447 }
8448
8449 if (TouchesSrc1 == TouchesSrc2)
8450 return false;
8451
8452 Register NewSrc1 = Shuffle.getSrc1Reg();
8453 SmallVector<int, 16> NewMask(Mask);
8454 if (TouchesSrc2) {
8455 NewSrc1 = Shuffle.getSrc2Reg();
8456 commuteMask(NewMask, NumSrcElems);
8457 }
8458
8459 MatchInfo = [=, &Shuffle](MachineIRBuilder &B) {
8460 auto Undef = B.buildUndef(Src1Ty);
8461 B.buildShuffleVector(Shuffle.getReg(0), NewSrc1, Undef, NewMask);
8462 };
8463
8464 return true;
8465}
8466
8468 BuildFnTy &MatchInfo) const {
8469 const GSubCarryOut *Subo = cast<GSubCarryOut>(&MI);
8470
8471 Register Dst = Subo->getReg(0);
8472 Register LHS = Subo->getLHSReg();
8473 Register RHS = Subo->getRHSReg();
8474 Register Carry = Subo->getCarryOutReg();
8475 LLT DstTy = MRI.getType(Dst);
8476 LLT CarryTy = MRI.getType(Carry);
8477
8478 // Check legality before known bits.
8479 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SUB, {DstTy}}) ||
8481 return false;
8482
8483 ConstantRange KBLHS =
8484 ConstantRange::fromKnownBits(VT->getKnownBits(LHS),
8485 /* IsSigned=*/Subo->isSigned());
8486 ConstantRange KBRHS =
8487 ConstantRange::fromKnownBits(VT->getKnownBits(RHS),
8488 /* IsSigned=*/Subo->isSigned());
8489
8490 if (Subo->isSigned()) {
8491 // G_SSUBO
8492 switch (KBLHS.signedSubMayOverflow(KBRHS)) {
8494 return false;
8496 MatchInfo = [=](MachineIRBuilder &B) {
8497 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoSWrap);
8498 B.buildConstant(Carry, 0);
8499 };
8500 return true;
8501 }
8504 MatchInfo = [=](MachineIRBuilder &B) {
8505 B.buildSub(Dst, LHS, RHS);
8506 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8507 /*isVector=*/CarryTy.isVector(),
8508 /*isFP=*/false));
8509 };
8510 return true;
8511 }
8512 }
8513 return false;
8514 }
8515
8516 // G_USUBO
8517 switch (KBLHS.unsignedSubMayOverflow(KBRHS)) {
8519 return false;
8521 MatchInfo = [=](MachineIRBuilder &B) {
8522 B.buildSub(Dst, LHS, RHS, MachineInstr::MIFlag::NoUWrap);
8523 B.buildConstant(Carry, 0);
8524 };
8525 return true;
8526 }
8529 MatchInfo = [=](MachineIRBuilder &B) {
8530 B.buildSub(Dst, LHS, RHS);
8531 B.buildConstant(Carry, getICmpTrueVal(getTargetLowering(),
8532 /*isVector=*/CarryTy.isVector(),
8533 /*isFP=*/false));
8534 };
8535 return true;
8536 }
8537 }
8538
8539 return false;
8540}
8541
8542// Fold (ctlz (xor x, (sra x, bitwidth-1))) -> (add (ctls x), 1).
8543// Fold (ctlz (or (shl (xor x, (sra x, bitwidth-1)), 1), 1) -> (ctls x)
8545 BuildFnTy &MatchInfo) const {
8546 assert((CtlzMI.getOpcode() == TargetOpcode::G_CTLZ ||
8547 CtlzMI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) &&
8548 "Expected G_CTLZ variant");
8549
8550 const Register Dst = CtlzMI.getOperand(0).getReg();
8551 Register Src = CtlzMI.getOperand(1).getReg();
8552
8553 LLT Ty = MRI.getType(Dst);
8554 LLT SrcTy = MRI.getType(Src);
8555
8556 if (!(Ty.isValid() && Ty.isScalar()))
8557 return false;
8558
8559 if (!LI)
8560 return false;
8561
8562 SmallVector<LLT, 2> QueryTypes = {Ty, SrcTy};
8563 LegalityQuery Query(TargetOpcode::G_CTLS, QueryTypes);
8564
8565 switch (LI->getAction(Query).Action) {
8566 default:
8567 return false;
8571 break;
8572 }
8573
8574 // Src = or(shl(V, 1), 1) -> Src=V; NeedAdd = False
8575 Register V;
8576 bool NeedAdd = true;
8577 if (mi_match(Src, MRI,
8579 m_SpecificICst(1))))) {
8580 NeedAdd = false;
8581 Src = V;
8582 }
8583
8584 unsigned BitWidth = Ty.getScalarSizeInBits();
8585
8586 Register X;
8587 if (!mi_match(Src, MRI,
8590 m_SpecificICst(BitWidth - 1)))))))
8591 return false;
8592
8593 MatchInfo = [=](MachineIRBuilder &B) {
8594 if (!NeedAdd) {
8595 B.buildCTLS(Dst, X);
8596 return;
8597 }
8598
8599 auto Ctls = B.buildCTLS(Ty, X);
8600 auto One = B.buildConstant(Ty, 1);
8601
8602 B.buildAdd(Dst, Ctls, One);
8603 };
8604
8605 return true;
8606}
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
unsigned RegSize
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
AMDGPU Register Bank Select
Rewrite undef for PHI
This file declares a class to represent arbitrary precision floating point values and provide a varie...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool hasMoreUses(const MachineInstr &MI0, const MachineInstr &MI1, const MachineRegisterInfo &MRI)
static bool isContractableFMul(MachineInstr &MI, bool AllowFusionGlobally)
Checks if MI is TargetOpcode::G_FMUL and contractable either due to global flags or MachineInstr flag...
static unsigned getIndexedOpc(unsigned LdStOpc)
static APFloat constantFoldFpUnary(const MachineInstr &MI, const MachineRegisterInfo &MRI, const APFloat &Val)
static std::optional< std::pair< GZExtLoad *, int64_t > > matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits, const MachineRegisterInfo &MRI)
Helper function for findLoadOffsetsForLoadOrCombine.
static std::optional< unsigned > getMinUselessShift(KnownBits ValueKB, unsigned Opcode, std::optional< int64_t > &Result)
Return the minimum useless shift amount that results in complete loss of the source value.
static Register peekThroughBitcast(Register Reg, const MachineRegisterInfo &MRI)
static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I)
static cl::opt< bool > ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), cl::desc("Force all indexed operations to be " "legal for the GlobalISel combiner"))
static void commuteMask(MutableArrayRef< int > Mask, const unsigned NumElems)
static cl::opt< unsigned > PostIndexUseThreshold("post-index-use-threshold", cl::Hidden, cl::init(32), cl::desc("Number of uses of a base pointer to check before it is no longer " "considered for post-indexing."))
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static unsigned getExtLoadOpcForExtend(unsigned ExtOpc)
static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits, int64_t Cst, bool IsVector, bool IsFP)
static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy)
static bool canFoldInAddressingMode(GLoadStore *MI, const TargetLowering &TLI, MachineRegisterInfo &MRI)
Return true if 'MI' is a load or a store that may be fold it's address operand into the load / store ...
static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I)
static Register buildLogBase2(Register V, MachineIRBuilder &MIB)
Determines the LogBase2 value for a non-null input value using the transform: LogBase2(V) = (EltBits ...
This contains common combine transformations that may be used in a combine pass,or by the target else...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
Interface for Targets to specify which operations they can successfully select and how the others sho...
Implement a low-level type suitable for MachineInstr level instruction selection.
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
Register Reg
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmTowardNegative
Definition APFloat.h:347
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static constexpr roundingMode rmTowardPositive
Definition APFloat.h:346
static constexpr roundingMode rmNearestTiesToAway
Definition APFloat.h:349
const fltSemantics & getSemantics() const
Definition APFloat.h:1524
bool isNaN() const
Definition APFloat.h:1514
opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend, roundingMode RM)
Definition APFloat.h:1290
APInt bitcastToAPInt() const
Definition APFloat.h:1408
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1044
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1677
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
int32_t exactLogBase2() const
Definition APInt.h:1798
void ashrInPlace(unsigned ShiftAmt)
Arithmetic right-shift this APInt by ShiftAmt in place.
Definition APInt.h:841
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1654
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition APInt.h:1613
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1052
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
LLVM_ABI APInt multiplicativeInverse() const
Definition APInt.cpp:1285
bool isMask(unsigned numBits) const
Definition APInt.h:489
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
bool isOne() const
Determine if this is a value of 1.
Definition APInt.h:390
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1671
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:915
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_TRUE
1 1 1 1 Always true (always folded)
Definition InstrTypes.h:693
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_FALSE
0 0 0 0 Always false (always folded)
Definition InstrTypes.h:678
static LLVM_ABI bool isEquality(Predicate pred)
Determine if this is an equals/not equals predicate.
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
static LLVM_ABI bool isOrdered(Predicate predicate)
Determine if the predicate is an ordered operation.
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRepeatedFPDivisor(MachineInstr &MI, SmallVector< MachineInstr * > &MatchInfo) const
bool matchFoldC2MinusAPlusC1(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match expression trees of the form.
const RegisterBank * getRegBank(Register Reg) const
Get the register bank of Reg.
void applyPtrAddZero(MachineInstr &MI) const
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2) const
Return true if MOP1 and MOP2 are register operands are defined by equivalent instructions.
void applyUDivOrURemByConst(MachineInstr &MI) const
bool matchConstantFoldBinOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchCtls(MachineInstr &CtlzMI, BuildFnTy &MatchInfo) const
bool matchSelectSameVal(MachineInstr &MI) const
Optimize (cond ? x : x) -> x.
bool matchAddEToAddO(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*ADDE x, y, 0) -> (G_*ADDO x, y) (G_*SUBE x, y, 0) -> (G_*SUBO x, y)
bool matchReassocConstantInnerRHS(GPtrAdd &MI, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchBitfieldExtractFromShr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (shl x, n), k -> sbfx/ubfx x, pos, width.
bool matchFoldAMinusC1PlusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
void applySimplifyURemByPow2(MachineInstr &MI) const
Combine G_UREM x, (known power of 2) to an add and bitmasking.
bool matchCombineUnmergeZExtToZExt(MachineInstr &MI) const
Transform X, Y = G_UNMERGE(G_ZEXT(Z)) -> X = G_ZEXT(Z); Y = G_CONSTANT 0.
bool matchPtrAddZero(MachineInstr &MI) const
}
void applyCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void applyXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
bool canCombineFMadOrFMA(MachineInstr &MI, bool &AllowFusionGlobally, bool &HasFMAD, bool &Aggressive, bool CanReassociate=false) const
bool matchFoldAPlusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
void applyCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
bool matchShiftsTooBig(MachineInstr &MI, std::optional< int64_t > &MatchInfo) const
Match shifts greater or equal to the range (the bitwidth of the result datatype, or the effective bit...
bool matchCombineFAddFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) (fadd (fpext (fmul x,...
bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
void applyCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
Replace MI with a flattened build_vector with Ops or an implicit_def if Ops is empty.
void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement) const
Delete MI and replace all of its uses with Replacement.
void applyCombineShuffleToBuildVector(MachineInstr &MI) const
Replace MI with a build_vector.
bool matchCombineExtractedVectorLoad(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine a G_EXTRACT_VECTOR_ELT of a load into a narrowed load.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
void replaceRegOpWith(MachineRegisterInfo &MRI, MachineOperand &FromRegOp, Register ToReg) const
Replace a single register operand with a new register and inform the observer of the changes.
bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate commutative binary operations like G_ADD.
void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCommuteConstantToRHS(MachineInstr &MI) const
Match constant LHS ops that should be commuted.
const DataLayout & getDataLayout() const
bool matchBinOpSameVal(MachineInstr &MI) const
Optimize (x op x) -> x.
bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
bool matchCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
Try to combine G_[SU]DIV and G_[SU]REM into a single G_[SU]DIVREM when their source operands are iden...
void applyUMulHToLShr(MachineInstr &MI) const
void applyNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
bool isLegalOrHasFewerElements(const LegalityQuery &Query) const
bool matchShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
Fold (shift (shift base, x), y) -> (shift base (x+y))
void applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
bool matchTruncLshrBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
bool matchAllExplicitUsesAreUndef(MachineInstr &MI) const
Return true if all register explicit use operands on MI are defined by a G_IMPLICIT_DEF.
bool isPredecessor(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI precedes UseMI or they are the same instruction.
bool matchPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool matchTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
const TargetLowering & getTargetLowering() const
bool matchShuffleUndefRHS(MachineInstr &MI, BuildFnTy &MatchInfo) const
Remove references to rhs if it is undef.
void applyBuildInstructionSteps(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Replace MI with a series of instructions described in MatchInfo.
void applySDivByPow2(MachineInstr &MI) const
void applySimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
void applyUDivByPow2(MachineInstr &MI) const
Given an G_UDIV MI expressing an unsigned divided by a pow2 constant, return expressions that impleme...
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ors.
bool matchLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo, MachineInstr &ShiftMI) const
Fold (lshr (trunc (lshr x, C1)), C2) -> trunc (shift x, (C1 + C2))
bool matchSimplifyAddToSub(MachineInstr &MI, std::tuple< Register, Register > &MatchInfo) const
Return true if MI is a G_ADD which can be simplified to a G_SUB.
void replaceInstWithConstant(MachineInstr &MI, int64_t C) const
Replace an instruction with a G_CONSTANT with value C.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool matchCombineFSubFpExtFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), (fneg z)) (fsub (fpext (fmul x,...
void applyFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantLargerBitWidth(MachineInstr &MI, unsigned ConstIdx) const
Checks if constant at ConstIdx is larger than MI 's bitwidth.
void applyCombineCopy(MachineInstr &MI) const
bool matchAddSubSameReg(MachineInstr &MI, Register &Src) const
Transform G_ADD(x, G_SUB(y, x)) to y.
bool matchCombineShlOfExtend(MachineInstr &MI, RegisterImmPair &MatchData) const
void applyCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
bool matchCombineFSubFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fmul x, y), z) -> (fma x, y, -z) (fsub (fmul x, y), z) -> (fmad x,...
bool matchCombineFAddFMAFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y, (fma u, v, z)) (fadd (fmad x,...
bool matchSextTruncSextLoad(MachineInstr &MI) const
bool matchCombineMergeUnmerge(MachineInstr &MI, Register &MatchInfo) const
Fold away a merge of an unmerge of the corresponding values.
bool matchCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, Register &UnmergeSrc) const
bool matchDivByPow2(MachineInstr &MI, bool IsSigned) const
Given an G_SDIV MI expressing a signed divided by a pow2 constant, return expressions that implements...
bool matchNarrowBinopFeedingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchRedundantNegOperands(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd x, fneg(y)) -> (fsub x, y) (fadd fneg(x), y) -> (fsub y, x) (fsub x,...
bool matchCombineLoadWithAndMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match (and (load x), mask) -> zextload x.
bool matchCombineFAddFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fadd (fmul x, y), z) -> (fma x, y, z) (fadd (fmul x, y), z) -> (fmad x,...
bool matchCombineCopy(MachineInstr &MI) const
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
void applyShiftImmedChain(MachineInstr &MI, RegisterImmPair &MatchInfo) const
bool matchXorOfAndWithSameReg(MachineInstr &MI, std::pair< Register, Register > &MatchInfo) const
Fold (xor (and x, y), y) -> (and (not x), y) {.
bool matchCombineShuffleVector(MachineInstr &MI, SmallVectorImpl< Register > &Ops) const
Check if the G_SHUFFLE_VECTOR MI can be replaced by a concat_vectors.
void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineAddP2IToPtrAdd(MachineInstr &MI, std::pair< Register, bool > &PtrRegAndCommute) const
Transform G_ADD (G_PTRTOINT x), y -> G_PTRTOINT (G_PTR_ADD x, y) Transform G_ADD y,...
void replaceInstWithFConstant(MachineInstr &MI, double C) const
Replace an instruction with a G_FCONSTANT with value C.
bool matchFunnelShiftToRotate(MachineInstr &MI) const
Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
bool matchOrShiftToFunnelShift(MachineInstr &MI, bool AllowScalarConstants, BuildFnTy &MatchInfo) const
bool matchRedundantSExtInReg(MachineInstr &MI) const
void replaceOpcodeWith(MachineInstr &FromMI, unsigned ToOpcode) const
Replace the opcode in instruction with a new opcode and inform the observer of the changes.
void applyFunnelShiftConstantModulo(MachineInstr &MI) const
Replaces the shift amount in MI with ShiftAmt % BW.
bool matchFoldC1Minus2MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineShlOfExtend(MachineInstr &MI, const RegisterImmPair &MatchData) const
void applyUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize, GISelValueTracking *VT=nullptr, MachineDominatorTree *MDT=nullptr, const LegalizerInfo *LI=nullptr)
bool matchShuffleDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
Turn shuffle a, b, mask -> shuffle undef, b, mask iff mask does not reference a.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
Transform a multiply by a power-of-2 value to a left shift.
void applyCombineShuffleVector(MachineInstr &MI, ArrayRef< Register > Ops) const
Replace MI with a concat_vectors with Ops.
bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst) const
bool matchCombineUnmergeUndef(MachineInstr &MI, std::function< void(MachineIRBuilder &)> &MatchInfo) const
Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo) const
SelectOperand is the operand in binary operator MI that is the select to fold.
bool matchFoldAMinusC1MinusC2(const MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) const
bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_UMULO x, 2) -> (G_UADDO x, x) (G_SMULO x, 2) -> (G_SADDO x, x)
bool matchCombineShuffleConcat(MachineInstr &MI, SmallVector< Register > &Ops) const
void applySextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
bool tryCombineCopy(MachineInstr &MI) const
If MI is COPY, try to combine it.
bool matchTruncUSatU(MachineInstr &MI, MachineInstr &MinMI) const
bool matchICmpToLHSKnownBits(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Reassociate pointer calculations with G_ADD involved, to allow better addressing mode usage.
bool matchUndefShuffleVectorMask(MachineInstr &MI) const
Return true if a G_SHUFFLE_VECTOR instruction MI has an undef mask.
bool matchAnyExplicitUseIsUndef(MachineInstr &MI) const
Return true if any explicit use operand on MI is defined by a G_IMPLICIT_DEF.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) const
Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineSubToAdd(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
If we have a shift-by-constant of a bitwise logic op that itself has a shift-by-constant operand with...
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is known to be a power of 2.
bool matchCombineConcatVectors(MachineInstr &MI, SmallVector< Register > &Ops) const
If MI is G_CONCAT_VECTORS, try to combine it.
bool matchInsertExtractVecEltOutOfBounds(MachineInstr &MI) const
Return true if a G_{EXTRACT,INSERT}_VECTOR_ELT has an out of range index.
bool matchExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
LLVMContext & getContext() const
void applyPtrAddImmedChain(MachineInstr &MI, PtrAddChain &MatchInfo) const
bool isConstantLegalOrBeforeLegalizer(const LLT Ty) const
bool matchNotCmp(MachineInstr &MI, SmallVectorImpl< Register > &RegsToNegate) const
Combine inverting a result of a compare into the opposite cond code.
bool matchSextInRegOfLoad(MachineInstr &MI, std::tuple< Register, unsigned > &MatchInfo) const
Match sext_inreg(load p), imm -> sextload p.
bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const
Combine select to integer min/max.
void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst) const
Transform fp_instr(cst) to constant result of the fp operation.
bool isLegal(const LegalityQuery &Query) const
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo) const
bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0, Register Op1, BuildFnTy &MatchInfo) const
Try to reassociate to reassociate operands of a commutative binop.
void eraseInst(MachineInstr &MI) const
Erase MI.
bool matchConstantFoldFPBinOp(MachineInstr &MI, ConstantFP *&MatchInfo) const
Do constant FP folding when opportunities are exposed after MIR building.
void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchUseVectorTruncate(MachineInstr &MI, Register &MatchInfo) const
bool matchUndefStore(MachineInstr &MI) const
Return true if a G_STORE instruction MI is storing an undef value.
MachineRegisterInfo & MRI
void applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) const
Transform PtrToInt(IntToPtr(x)) to x.
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI) const
bool matchConstantFPOp(const MachineOperand &MOP, double C) const
Return true if MOP is defined by a G_FCONSTANT or splat with a value exactly equal to C.
MachineInstr * buildUDivOrURemUsingMul(MachineInstr &MI) const
Given an G_UDIV MI or G_UREM MI expressing a divide by constant, return an expression that implements...
void applyExtractVecEltBuildVec(MachineInstr &MI, Register &Reg) const
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo) const
Push a binary operator through a select on constants.
bool tryCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftAmount) const
bool tryCombineExtendingLoads(MachineInstr &MI) const
If MI is extend that consumes the result of a load, try to combine it.
bool isLegalOrBeforeLegalizer(const LegalityQuery &Query) const
bool matchBuildVectorIdentityFold(MachineInstr &MI, Register &MatchInfo) const
bool matchBitfieldExtractFromShrAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: shr (and x, n), k -> ubfx x, pos, width.
void applyTruncSSatS(MachineInstr &MI, Register &MatchInfo) const
bool matchConstantFoldCastOp(MachineInstr &MI, APInt &MatchInfo) const
Do constant folding when opportunities are exposed after MIR building.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
void applyRotateOutOfRange(MachineInstr &MI) const
bool matchReassocFoldConstantsInSubTree(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchHoistLogicOpWithSameOpcodeHands(MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) const
Match (logic_op (op x...), (op y...)) -> (op (logic_op x, y))
bool matchBitfieldExtractFromAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: and (lshr x, cst), mask -> ubfx x, cst, width.
bool matchBitfieldExtractFromSExtInReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool matchUndefSelectCmp(MachineInstr &MI) const
Return true if a G_SELECT instruction MI has an undef comparison.
bool matchAndOrDisjointMask(MachineInstr &MI, BuildFnTy &MatchInfo) const
void replaceInstWithUndef(MachineInstr &MI) const
Replace an instruction with a G_IMPLICIT_DEF.
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (X + Y) == X -> Y == 0 (X - Y) == X -> Y == 0 (X ^ Y) == X -> Y == 0 (X + Y) !...
bool matchOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
If a brcond's true block is not the fallthrough, make it so by inverting the condition and swapping o...
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine addos.
void applyAshShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine selects.
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo) const
bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) const
Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
bool matchFsubToFneg(MachineInstr &MI, Register &MatchInfo) const
bool matchRotateOutOfRange(MachineInstr &MI) const
void applyExpandFPowI(MachineInstr &MI, int64_t Exponent) const
Expands FPOWI into a series of multiplications and a division if the exponent is negative.
void setRegBank(Register Reg, const RegisterBank *RegBank) const
Set the register bank of Reg.
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) const
Return true if a G_SELECT instruction MI has a constant comparison.
bool matchCommuteFPConstantToRHS(MachineInstr &MI) const
Match constant LHS FP ops that should be commuted.
void applyCombineDivRem(MachineInstr &MI, MachineInstr *&OtherMI) const
bool matchCombineFMinMaxNaN(MachineInstr &MI, unsigned &Info) const
bool matchRedundantOr(MachineInstr &MI, Register &Replacement) const
void applyTruncSSatU(MachineInstr &MI, Register &MatchInfo) const
bool matchCombineFSubFpExtFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fpext (fneg (fmul x, y))), z) -> (fneg (fma (fpext x), (fpext y),...
bool matchTruncBuildVectorFold(MachineInstr &MI, Register &MatchInfo) const
void applyCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
bool matchConstantOp(const MachineOperand &MOP, int64_t C) const
Return true if MOP is defined by a G_CONSTANT or splat with a value equal to C.
const LegalizerInfo * LI
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal) const
void applyCombineBuildUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B, Register &UnmergeSrc) const
bool matchUMulHToLShr(MachineInstr &MI) const
MachineDominatorTree * MDT
void applyFunnelShiftToRotate(MachineInstr &MI) const
bool matchSimplifySelectToMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applyRepeatedFPDivisor(SmallVector< MachineInstr * > &MatchInfo) const
bool matchTruncUSatUToFPTOUISat(MachineInstr &MI, MachineInstr &SrcMI) const
const RegisterBankInfo * RBI
bool matchMulOBy0(MachineInstr &MI, BuildFnTy &MatchInfo) const
Match: (G_*MULO x, 0) -> 0 + no carry out.
GISelValueTracking * VT
bool matchCombineUnmergeConstant(MachineInstr &MI, SmallVectorImpl< APInt > &Csts) const
Transform G_UNMERGE Constant -> Constant1, Constant2, ...
void applyShiftOfShiftedLogic(MachineInstr &MI, ShiftOfShiftedLogic &MatchInfo) const
const TargetRegisterInfo * TRI
bool matchRedundantAnd(MachineInstr &MI, Register &Replacement) const
bool dominates(const MachineInstr &DefMI, const MachineInstr &UseMI) const
Returns true if DefMI dominates UseMI.
GISelChangeObserver & Observer
void applyBuildFn(MachineInstr &MI, BuildFnTy &MatchInfo) const
Use a function which takes in a MachineIRBuilder to perform a combine.
bool matchCombineTruncOfShift(MachineInstr &MI, std::pair< MachineInstr *, LLT > &MatchInfo) const
Transform trunc (shl x, K) to shl (trunc x), K if K < VT.getScalarSizeInBits().
bool matchCombineShiftToUnmerge(MachineInstr &MI, unsigned TargetShiftSize, unsigned &ShiftVal) const
Reduce a shift by a constant to an unmerge and a shift on a half sized type.
bool matchUDivOrURemByConst(MachineInstr &MI) const
Combine G_UDIV or G_UREM by constant into a multiply by magic constant.
bool matchAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Combine ands.
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchConstantFoldFMA(MachineInstr &MI, ConstantFP *&MatchInfo) const
Constant fold G_FMA/G_FMAD.
bool matchCombineFSubFNegFMulToFMadOrFMA(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) (fsub (fneg (fmul,...
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg) const
Transform zext(trunc(x)) to x.
bool matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) const
Check if operand OpIdx is undef.
void applyLshrOfTruncOfLshr(MachineInstr &MI, LshrOfTruncOfLshr &MatchInfo) const
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
bool matchFreezeOfSingleMaybePoisonOperand(MachineInstr &MI, BuildFnTy &MatchInfo) const
void applySDivOrSRemByConst(MachineInstr &MI) const
MachineInstr * buildSDivOrSRemUsingMul(MachineInstr &MI) const
Given an G_SDIV MI or G_SREM MI expressing a signed divide by constant, return an expression that imp...
bool isLegalOrHasWidenScalar(const LegalityQuery &Query) const
bool matchSubAddSameReg(MachineInstr &MI, BuildFnTy &MatchInfo) const
Transform: (x + y) - y -> x (x + y) - x -> y x - (y + x) -> 0 - y x - (x + z) -> 0 - z.
bool matchReassocConstantInnerLHS(GPtrAdd &MI, MachineInstr *LHS, MachineInstr *RHS, BuildFnTy &MatchInfo) const
bool matchOverlappingAnd(MachineInstr &MI, BuildFnTy &MatchInfo) const
Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0.
bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) const
Transform anyext(trunc(x)) to x.
void applyExtractAllEltsFromBuildVector(MachineInstr &MI, SmallVectorImpl< std::pair< Register, MachineInstr * > > &MatchInfo) const
MachineIRBuilder & Builder
void applyCommuteBinOpOperands(MachineInstr &MI) const
void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx) const
Delete MI and replace all of its uses with its OpIdx-th operand.
void applySextTruncSextLoad(MachineInstr &MI) const
const MachineFunction & getMachineFunction() const
bool matchCombineFAddFpExtFMulToFMadOrFMAAggressive(MachineInstr &MI, BuildFnTy &MatchInfo) const
bool matchSDivOrSRemByConst(MachineInstr &MI) const
Combine G_SDIV or G_SREM by constant into a multiply by magic constant.
void applyOptBrCondByInvertingCond(MachineInstr &MI, MachineInstr *&BrCond) const
void applyCombineShiftToUnmerge(MachineInstr &MI, const unsigned &ShiftVal) const
bool matchFPowIExpansion(MachineInstr &MI, int64_t Exponent) const
Match FPOWI if it's safe to extend it into a series of multiplications.
void applyCombineInsertVecElts(MachineInstr &MI, SmallVectorImpl< Register > &MatchInfo) const
bool matchCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
Transform <ty,...> G_UNMERGE(G_MERGE ty X, Y, Z) -> ty X, Y, Z.
void applyCombineUnmergeMergeToPlainValues(MachineInstr &MI, SmallVectorImpl< Register > &Operands) const
bool matchAshrShlToSextInreg(MachineInstr &MI, std::tuple< Register, int64_t > &MatchInfo) const
Match ashr (shl x, C), C -> sext_inreg (C)
void applyCombineUnmergeZExtToZExt(MachineInstr &MI) const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
const APFloat & getValue() const
Definition Constants.h:326
const APFloat & getValueAPF() const
Definition Constants.h:325
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This class represents a range of values.
LLVM_ABI std::optional< ConstantRange > exactUnionWith(const ConstantRange &CR) const
Union the two ranges and return the result if it can be represented exactly, otherwise return std::nu...
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
static LLVM_ABI ConstantRange fromKnownBits(const KnownBits &Known, bool IsSigned)
Initialize a range based on a known bits constraint.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI OverflowResult unsignedSubMayOverflow(const ConstantRange &Other) const
Return whether unsigned sub of the two ranges always/never overflows.
LLVM_ABI OverflowResult unsignedAddMayOverflow(const ConstantRange &Other) const
Return whether unsigned add of the two ranges always/never overflows.
LLVM_ABI bool isWrappedSet() const
Return true if this set wraps around the unsigned domain.
const APInt & getUpper() const
Return the upper value for this range.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI OverflowResult signedAddMayOverflow(const ConstantRange &Other) const
Return whether signed add of the two ranges always/never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI OverflowResult signedSubMayOverflow(const ConstantRange &Other) const
Return whether signed sub of the two ranges always/never overflows.
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:215
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
Represents overflowing add operations.
Represents an integer addition.
Represents a logical and.
CmpInst::Predicate getCond() const
Register getLHSReg() const
Register getRHSReg() const
Represents an any ext.
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Register getCarryOutReg() const
Register getLHSReg() const
Register getRHSReg() const
Represents a G_BUILD_VECTOR.
Represent a G_ICMP.
Abstract class that contains various methods for clients to notify about changes.
Simple wrapper observer that takes several observers, and calls each one for each event.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
Represents a G_LOAD.
Represents a logical binary operation.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
LocationSize getMemSizeInBits() const
Returns the size in bits of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
Register getSourceReg(unsigned I) const
Returns the I'th source register.
unsigned getNumSources() const
Returns the number of source registers.
Represents a G_MERGE_VALUES.
Represents a logical or.
Represents a G_PTR_ADD.
Represents a G_SELECT.
Register getCondReg() const
Represents overflowing sub operations.
Represents an integer subtraction.
Represents a G_UNMERGE_VALUES.
unsigned getNumDefs() const
Returns the number of def registers.
Register getSourceReg() const
Get the unmerge source register.
Represents a G_ZEXTLOAD.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static LLVM_ABI bool compare(const APInt &LHS, const APInt &RHS, ICmpInst::Predicate Pred)
Return result of LHS Pred RHS comparison.
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr bool isPointerOrPointerVector() const
constexpr bool isFixedVector() const
Returns true if the LLT is a fixed vector.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
@ Legalized
Instruction has been legalized and the MachineFunction changed.
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
TypeSize getValue() const
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Helper class to build MachineInstr.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineRegisterInfo * getMRI()
Getter for MRI.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
const MachineBasicBlock * getParent() const
LLVM_ABI bool isDereferenceableInvariantLoad() const
Return true if this load instruction never traps and points to a memory location whose value doesn't ...
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
mop_range uses()
Returns all operands which may be register uses.
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
A description of a memory reference used in the backend.
LLT getMemoryType() const
Return the memory type of the memory reference.
unsigned getAddrSpace() const
const MachinePointerInfo & getPointerInfo() const
LLVM_ABI Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
MachineOperand class - Representation of each machine instruction operand.
const ConstantInt * getCImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
void setMBB(MachineBasicBlock *MBB)
void setPredicate(unsigned Predicate)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
unsigned getPredicate() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
static use_instr_nodbg_iterator use_instr_nodbg_end()
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
This class implements the register bank concept.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual LLVM_READONLY LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const
Return the preferred type to use for a shift opcode, given the shifted amount type is ShiftValueTy.
bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual const TargetLowering * getTargetLowering() const
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Legal
The operation is expected to be selectable directly by the target, and no transformation is necessary...
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR, false > m_GBuildVector(const LHS &L, const RHS &R)
GCstAndRegMatch m_GCst(std::optional< ValueAndVReg > &ValReg)
operand_type_match m_Pred()
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMIN, true > m_GUMin(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_XOR, true > m_GXor(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_SEXT > m_GSExt(const SrcTy &Src)
UnaryOp_match< SrcTy, TargetOpcode::G_FPEXT > m_GFPExt(const SrcTy &Src)
ConstantMatch< APInt > m_ICst(APInt &Cst)
UnaryOp_match< SrcTy, TargetOpcode::G_INTTOPTR > m_GIntToPtr(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ADD, true > m_GAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
ICstOrSplatMatch< APInt > m_ICstOrSplat(APInt &Cst)
ImplicitDefMatch m_GImplicitDef()
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
CheckType m_SpecificType(LLT Ty)
deferred_ty< Register > m_DeferredReg(Register &R)
Similar to m_SpecificReg/Type, but the specific value to match originated from an earlier sub-pattern...
BinaryOp_match< LHS, RHS, TargetOpcode::G_UMAX, true > m_GUMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FADD, true > m_GFAdd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_PTRTOINT > m_GPtrToInt(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_FSUB, false > m_GFSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SUB > m_GSub(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_ASHR, false > m_GAShr(const LHS &L, const RHS &R)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SHL, false > m_GShl(const LHS &L, const RHS &R)
Or< Preds... > m_any_of(Preds &&... preds)
SpecificConstantOrSplatMatch m_SpecificICstOrSplat(const APInt &RequestedValue)
Matches a RequestedValue constant or a constant splat of RequestedValue.
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_BITCAST > m_GBitcast(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_BUILD_VECTOR_TRUNC, false > m_GBuildVectorTrunc(const LHS &L, const RHS &R)
bind_ty< MachineInstr * > m_MInstr(MachineInstr *&MI)
UnaryOp_match< SrcTy, TargetOpcode::G_FNEG > m_GFNeg(const SrcTy &Src)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP, true > m_c_GICmp(const Pred &P, const LHS &L, const RHS &R)
G_ICMP matcher that also matches commuted compares.
TernaryOp_match< Src0Ty, Src1Ty, Src2Ty, TargetOpcode::G_INSERT_VECTOR_ELT > m_GInsertVecElt(const Src0Ty &Src0, const Src1Ty &Src1, const Src2Ty &Src2)
GFCstOrSplatGFCstMatch m_GFCstOrSplat(std::optional< FPValueAndVReg > &FPValReg)
And< Preds... > m_all_of(Preds &&... preds)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMIN, true > m_GSMin(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, TargetOpcode::G_LSHR, false > m_GLShr(const LHS &L, const RHS &R)
UnaryOp_match< SrcTy, TargetOpcode::G_ANYEXT > m_GAnyExt(const SrcTy &Src)
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
BinaryOp_match< LHS, RHS, TargetOpcode::G_SMAX, true > m_GSMax(const LHS &L, const RHS &R)
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Not(const Pred &P) -> Not< Pred >
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool isBuildVectorAllZeros(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndef=false)
Return true if the specified instruction is a G_BUILD_VECTOR or G_BUILD_VECTOR_TRUNC where all of the...
Definition Utils.cpp:1483
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2037
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:653
static double log2(double V)
LLVM_ABI const ConstantFP * getConstantFPVRegVal(Register VReg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:461
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:294
LLVM_ABI std::optional< APInt > getIConstantSplatVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1443
LLVM_ABI bool isAllOnesOrAllOnesSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant -1 integer or a splatted vector of a constant -1 integer (with...
Definition Utils.cpp:1608
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
std::function< void(MachineIRBuilder &)> BuildFnTy
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
LLVM_ABI std::optional< APFloat > ConstantFoldFPBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:741
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1566
LLVM_ABI bool isNullOrNullSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowUndefs=false)
Return true if the value is a constant 0 integer or a splatted vector of a constant 0 integer (with n...
Definition Utils.cpp:1590
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:494
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1623
LLVM_ABI bool isConstTrueVal(const TargetLowering &TLI, int64_t Val, bool IsVector, bool IsFP)
Returns true if given the TargetLowering's boolean contents information, the value Val contains a tru...
Definition Utils.cpp:1655
LLVM_ABI std::optional< APInt > ConstantFoldBinOp(unsigned Opcode, const Register Op1, const Register Op2, const MachineRegisterInfo &MRI)
Definition Utils.cpp:672
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI const APInt & getIConstantFromReg(Register VReg, const MachineRegisterInfo &MRI)
VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:305
LLVM_ABI bool isConstantOrConstantVector(const MachineInstr &MI, const MachineRegisterInfo &MRI, bool AllowFP=true, bool AllowOpaqueConstants=true)
Return true if the specified instruction is known to be a constant, or a vector of constants.
Definition Utils.cpp:1546
SmallVector< std::function< void(MachineInstrBuilder &)>, 4 > OperandBuildSteps
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool canReplaceReg(Register DstReg, Register SrcReg, MachineRegisterInfo &MRI)
Check if DstReg can be replaced with SrcReg depending on the register constraints.
Definition Utils.cpp:200
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
LLVM_ABI bool canCreateUndefOrPoison(const Operator *Op, bool ConsiderFlagsAndMetadata=true)
canCreateUndefOrPoison returns true if Op can create undef or poison from non-undef & non-poison oper...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI std::optional< FPValueAndVReg > getFConstantSplat(Register VReg, const MachineRegisterInfo &MRI, bool AllowUndef=true)
Returns a floating point scalar constant of a build vector splat if it exists.
Definition Utils.cpp:1476
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI std::optional< APInt > ConstantFoldCastOp(unsigned Opcode, LLT DstTy, const Register Op0, const MachineRegisterInfo &MRI)
Definition Utils.cpp:967
LLVM_ABI unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc)
Returns the inverse opcode of MinMaxOpc, which is a generic min/max opcode like G_SMIN.
Definition Utils.cpp:279
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
LLVM_ABI std::optional< FPValueAndVReg > getFConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_FCONSTANT returns it...
Definition Utils.cpp:447
LLVM_ABI std::optional< APFloat > isConstantOrConstantSplatVectorFP(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a float constant integer or a splat vector of float constant integers.
Definition Utils.cpp:1579
constexpr unsigned BitWidth
LLVM_ABI int64_t getICmpTrueVal(const TargetLowering &TLI, bool IsVector, bool IsFP)
Returns an integer representing true, as defined by the TargetBooleanContents.
Definition Utils.cpp:1680
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:433
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI std::optional< DefinitionAndSourceRegister > getDefSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, and underlying value Register folding away any copies.
Definition Utils.cpp:469
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL, bool OrZero=false, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return true if the given value is known to have exactly one bit set when defined.
LLVM_ABI Register getSrcRegIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the source register for Reg, folding away any trivial copies.
Definition Utils.cpp:501
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
unsigned getFCmpCode(CmpInst::Predicate CC)
Similar to getICmpCode but for FCmpInst.
LLVM_ABI std::optional< int64_t > getIConstantSplatSExtVal(const Register Reg, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1461
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Simple struct used to hold a Register value and the instruction which defines it.
Definition Utils.h:229
Extended Value Type.
Definition ValueTypes.h:35
SmallVector< InstructionBuildSteps, 2 > InstrsToBuild
Describes instructions to be built during a combine.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:108
unsigned countMinLeadingOnes() const
Returns the minimum number of leading one bits.
Definition KnownBits.h:267
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:258
bool isUnknown() const
Returns true if we don't know any bits.
Definition KnownBits.h:66
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:264
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:105
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
const RegisterBank * Bank
Magic data for optimising signed division by a constant.
static LLVM_ABI SignedDivisionByConstantInfo get(const APInt &D)
Calculate the magic numbers required to implement a signed integer division by a constant as a sequen...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
Magic data for optimising unsigned division by a constant.
static LLVM_ABI UnsignedDivisionByConstantInfo get(const APInt &D, unsigned LeadingZeros=0, bool AllowEvenDivisorOptimization=true, bool AllowWidenOptimization=false)
Calculate the magic numbers required to implement an unsigned integer division by a constant as a seq...