LLVM 23.0.0git
IVDescriptors.cpp
Go to the documentation of this file.
1//===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file "describes" induction and recurrence variables.
10//
11//===----------------------------------------------------------------------===//
12
20#include "llvm/IR/Dominators.h"
23#include "llvm/IR/ValueHandle.h"
24#include "llvm/Support/Debug.h"
26
27using namespace llvm;
28using namespace llvm::PatternMatch;
29using namespace llvm::SCEVPatternMatch;
30
31#define DEBUG_TYPE "iv-descriptors"
32
35 for (const Use &Use : I->operands())
36 if (!Set.count(dyn_cast<Instruction>(Use)))
37 return false;
38 return true;
39}
40
42 switch (Kind) {
43 default:
44 break;
46 case RecurKind::Sub:
47 case RecurKind::Add:
48 case RecurKind::Mul:
49 case RecurKind::Or:
50 case RecurKind::And:
51 case RecurKind::Xor:
52 case RecurKind::SMax:
53 case RecurKind::SMin:
54 case RecurKind::UMax:
55 case RecurKind::UMin:
59 return true;
60 }
61 return false;
62}
63
67
68/// Determines if Phi may have been type-promoted. If Phi has a single user
69/// that ANDs the Phi with a type mask, return the user. RT is updated to
70/// account for the narrower bit width represented by the mask, and the AND
71/// instruction is added to CI.
75 if (!Phi->hasOneUse())
76 return Phi;
77
78 const APInt *M = nullptr;
79 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
80
81 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
82 // with a new integer type of the corresponding bit width.
83 if (match(J, m_And(m_Instruction(I), m_APInt(M)))) {
84 int32_t Bits = (*M + 1).exactLogBase2();
85 if (Bits > 0) {
86 RT = IntegerType::get(Phi->getContext(), Bits);
87 Visited.insert(Phi);
88 CI.insert(J);
89 return J;
90 }
91 }
92 return Phi;
93}
94
95/// Compute the minimal bit width needed to represent a reduction whose exit
96/// instruction is given by Exit.
97static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
98 DemandedBits *DB,
100 DominatorTree *DT) {
101 bool IsSigned = false;
102 const DataLayout &DL = Exit->getDataLayout();
103 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
104
105 if (DB) {
106 // Use the demanded bits analysis to determine the bits that are live out
107 // of the exit instruction, rounding up to the nearest power of two. If the
108 // use of demanded bits results in a smaller bit width, we know the value
109 // must be positive (i.e., IsSigned = false), because if this were not the
110 // case, the sign bit would have been demanded.
111 auto Mask = DB->getDemandedBits(Exit);
112 MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
113 }
114
115 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
116 // If demanded bits wasn't able to limit the bit width, we can try to use
117 // value tracking instead. This can be the case, for example, if the value
118 // may be negative.
119 auto NumSignBits = ComputeNumSignBits(Exit, DL, AC, nullptr, DT);
120 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
121 MaxBitWidth = NumTypeBits - NumSignBits;
122 KnownBits Bits = computeKnownBits(Exit, DL);
123 if (!Bits.isNonNegative()) {
124 // If the value is not known to be non-negative, we set IsSigned to true,
125 // meaning that we will use sext instructions instead of zext
126 // instructions to restore the original type.
127 IsSigned = true;
128 // Make sure at least one sign bit is included in the result, so it
129 // will get properly sign-extended.
130 ++MaxBitWidth;
131 }
132 }
133 MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
134
135 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
136 IsSigned);
137}
138
139/// Collect cast instructions that can be ignored in the vectorizer's cost
140/// model, given a reduction exit value and the minimal type in which the
141// reduction can be represented. Also search casts to the recurrence type
142// to find the minimum width used by the recurrence.
143static void collectCastInstrs(Loop *TheLoop, Instruction *Exit,
144 Type *RecurrenceType,
146 unsigned &MinWidthCastToRecurTy) {
147
150 Worklist.push_back(Exit);
151 MinWidthCastToRecurTy = -1U;
152
153 while (!Worklist.empty()) {
154 Instruction *Val = Worklist.pop_back_val();
155 Visited.insert(Val);
156 if (auto *Cast = dyn_cast<CastInst>(Val)) {
157 if (Cast->getSrcTy() == RecurrenceType) {
158 // If the source type of a cast instruction is equal to the recurrence
159 // type, it will be eliminated, and should be ignored in the vectorizer
160 // cost model.
161 Casts.insert(Cast);
162 continue;
163 }
164 if (Cast->getDestTy() == RecurrenceType) {
165 // The minimum width used by the recurrence is found by checking for
166 // casts on its operands. The minimum width is used by the vectorizer
167 // when finding the widest type for in-loop reductions without any
168 // loads/stores.
169 MinWidthCastToRecurTy = std::min<unsigned>(
170 MinWidthCastToRecurTy, Cast->getSrcTy()->getScalarSizeInBits());
171 continue;
172 }
173 }
174 // Add all operands to the work list if they are loop-varying values that
175 // we haven't yet visited.
176 for (Value *O : cast<User>(Val)->operands())
177 if (auto *I = dyn_cast<Instruction>(O))
178 if (TheLoop->contains(I) && !Visited.count(I))
179 Worklist.push_back(I);
180 }
181}
182
183// Check if a given Phi node can be recognized as an ordered reduction for
184// vectorizing floating point operations without unsafe math.
185static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
186 Instruction *Exit, PHINode *Phi) {
187 // Currently only FAdd and FMulAdd are supported.
188 if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
189 return false;
190
191 if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
192 return false;
193
194 if (Kind == RecurKind::FMulAdd &&
196 return false;
197
198 // Ensure the exit instruction has only one user other than the reduction PHI
199 if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
200 return false;
201
202 // The only pattern accepted is the one in which the reduction PHI
203 // is used as one of the operands of the exit instruction
204 auto *Op0 = Exit->getOperand(0);
205 auto *Op1 = Exit->getOperand(1);
206 if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
207 return false;
208 if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
209 return false;
210
211 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
212 << ", ExitInst: " << *Exit << "\n");
213
214 return true;
215}
216
217// Collect FMF from a value and its associated fcmp in select patterns
219 FastMathFlags FMF = cast<FPMathOperator>(V)->getFastMathFlags();
220 if (auto *Sel = dyn_cast<SelectInst>(V)) {
221 // Accept FMF from either fcmp or select in a min/max idiom.
222 // TODO: Remove this when FMF propagation is fixed or we standardize on
223 // intrinsics.
224 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
225 FMF |= FCmp->getFastMathFlags();
226 }
227 return FMF;
228}
229
230static std::optional<FastMathFlags>
232 bool HasRequiredFMF = FPOp && FPOp->hasNoNaNs() && FPOp->hasNoSignedZeros();
233 if (HasRequiredFMF)
234 return collectMinMaxFMF(FPOp);
235
236 switch (RK) {
241 break;
242
243 case RecurKind::FMax:
245 return std::nullopt;
247 break;
248 case RecurKind::FMin:
250 return std::nullopt;
252 break;
253 default:
254 return std::nullopt;
255 }
256 return collectMinMaxFMF(FPOp);
257}
258
260 ScalarEvolution *SE) {
261 Type *Ty = Phi->getType();
262 BasicBlock *Latch = TheLoop->getLoopLatch();
263 if (Phi->getNumIncomingValues() != 2 ||
264 Phi->getParent() != TheLoop->getHeader() ||
265 (!Ty->isIntegerTy() && !Ty->isFloatingPointTy()) || !Latch)
266 return {};
267
268 auto GetMinMaxRK = [](Value *V, Value *&A, Value *&B) -> RecurKind {
269 if (match(V, m_UMin(m_Value(A), m_Value(B))))
270 return RecurKind::UMin;
271 if (match(V, m_UMax(m_Value(A), m_Value(B))))
272 return RecurKind::UMax;
273 if (match(V, m_SMax(m_Value(A), m_Value(B))))
274 return RecurKind::SMax;
275 if (match(V, m_SMin(m_Value(A), m_Value(B))))
276 return RecurKind::SMin;
277 if (match(V, m_OrdOrUnordFMin(m_Value(A), m_Value(B))) ||
279 return RecurKind::FMin;
280 if (match(V, m_OrdOrUnordFMax(m_Value(A), m_Value(B))) ||
282 return RecurKind::FMax;
283 if (match(V, m_FMinimum(m_Value(A), m_Value(B))))
284 return RecurKind::FMinimum;
285 if (match(V, m_FMaximum(m_Value(A), m_Value(B))))
286 return RecurKind::FMaximum;
291 return RecurKind::None;
292 };
293
295 Value *BackedgeValue = Phi->getIncomingValueForBlock(Latch);
297 // Walk def-use chains upwards from BackedgeValue to identify min/max
298 // recurrences.
299 SmallVector<Value *> WorkList({BackedgeValue});
300 SmallPtrSet<Value *, 8> Chain({Phi});
301 while (!WorkList.empty()) {
302 Value *Cur = WorkList.pop_back_val();
303 if (!Chain.insert(Cur).second)
304 continue;
305 auto *I = dyn_cast<Instruction>(Cur);
306 if (!I || !TheLoop->contains(I))
307 return {};
308 if (auto *PN = dyn_cast<PHINode>(I)) {
309 append_range(WorkList, PN->operands());
310 continue;
311 }
312 Value *A, *B;
313 RecurKind CurRK = GetMinMaxRK(Cur, A, B);
314 if (CurRK == RecurKind::None || (RK != RecurKind::None && CurRK != RK))
315 return {};
316
317 RK = CurRK;
318 // Check required fast-math flags for FP recurrences.
320 auto CurFMF = hasRequiredFastMathFlags(cast<FPMathOperator>(Cur), RK);
321 if (!CurFMF)
322 return {};
323 FMF &= *CurFMF;
324 }
325
326 if (auto *SI = dyn_cast<SelectInst>(I))
327 Chain.insert(SI->getCondition());
328
329 if (A == Phi || B == Phi)
330 continue;
331
332 // Add operand to worklist if it matches the pattern (exactly one must
333 // match)
334 Value *X, *Y;
335 auto *IA = dyn_cast<Instruction>(A);
336 auto *IB = dyn_cast<Instruction>(B);
337 bool AMatches = IA && TheLoop->contains(IA) && GetMinMaxRK(A, X, Y) == RK;
338 bool BMatches = IB && TheLoop->contains(IB) && GetMinMaxRK(B, X, Y) == RK;
339 if (AMatches == BMatches) // Both or neither match
340 return {};
341 WorkList.push_back(AMatches ? A : B);
342 }
343
344 // Handle argmin/argmax pattern: PHI has uses outside the reduction chain
345 // that are not intermediate min/max operations (which are handled below).
346 // Requires integer min/max, and single-use BackedgeValue (so vectorizer can
347 // handle both PHIs together).
348 bool PhiHasInvalidUses = any_of(Phi->users(), [&](User *U) {
349 Value *A, *B;
350 return !Chain.contains(U) && TheLoop->contains(cast<Instruction>(U)) &&
351 GetMinMaxRK(U, A, B) == RecurKind::None;
352 });
353 if (PhiHasInvalidUses) {
355 !BackedgeValue->hasOneUse())
356 return {};
358 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()),
359 /*Exit=*/nullptr, /*Store=*/nullptr, RK, FastMathFlags(),
360 /*ExactFP=*/nullptr, Phi->getType(), /*IsMultiUse=*/true);
361 }
362
363 // Validate chain entries and collect stores from chain entries and
364 // intermediate ops.
366 unsigned OutOfLoopUses = 0;
367 for (Value *V : Chain) {
368 for (User *U : V->users()) {
369 if (Chain.contains(U))
370 continue;
371 auto *I = dyn_cast<Instruction>(U);
372 if (!I || (!TheLoop->contains(I) &&
373 (V != BackedgeValue || ++OutOfLoopUses > 1)))
374 return {};
375 if (!TheLoop->contains(I))
376 continue;
377 if (auto *SI = dyn_cast<StoreInst>(I)) {
378 Stores.push_back(SI);
379 continue;
380 }
381 // Must be intermediate min/max of the same kind.
382 Value *A, *B;
383 if (GetMinMaxRK(I, A, B) != RK)
384 return {};
385 for (User *IU : I->users()) {
386 if (auto *SI = dyn_cast<StoreInst>(IU))
387 Stores.push_back(SI);
388 else if (!Chain.contains(IU))
389 return {};
390 }
391 }
392 }
393
394 // Validate all stores go to same invariant address and are in the same block.
395 StoreInst *IntermediateStore = nullptr;
396 const SCEV *StorePtrSCEV = nullptr;
397 for (StoreInst *SI : Stores) {
398 const SCEV *Ptr = SE->getSCEV(SI->getPointerOperand());
399 if (!SE->isLoopInvariant(Ptr, TheLoop) ||
400 (StorePtrSCEV && StorePtrSCEV != Ptr))
401 return {};
402 StorePtrSCEV = Ptr;
403 if (!IntermediateStore)
404 IntermediateStore = SI;
405 else if (IntermediateStore->getParent() != SI->getParent())
406 return {};
407 else if (IntermediateStore->comesBefore(SI))
408 IntermediateStore = SI;
409 }
410
412 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader()),
413 cast<Instruction>(BackedgeValue), IntermediateStore, RK, FMF, nullptr,
414 Phi->getType());
415}
416
417// This matches a phi that selects between the original value (HeaderPhi) and an
418// arbitrary non-reduction value.
419static bool isFindLastLikePhi(PHINode *Phi, PHINode *HeaderPhi,
420 SmallPtrSetImpl<Instruction *> &ReductionInstrs) {
421 unsigned NumNonReduxInputs = 0;
422 for (const Value *Op : Phi->operands()) {
423 if (!ReductionInstrs.contains(dyn_cast<Instruction>(Op))) {
424 if (++NumNonReduxInputs > 1)
425 return false;
426 } else if (Op != HeaderPhi) {
427 // TODO: Remove this restriction once chained phis are supported.
428 return false;
429 }
430 }
431 return NumNonReduxInputs == 1;
432}
433
435 PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes,
437 ScalarEvolution *SE) {
438 if (Phi->getNumIncomingValues() != 2)
439 return false;
440
441 // Reduction variables are only found in the loop header block.
442 if (Phi->getParent() != TheLoop->getHeader())
443 return false;
444
445 // Obtain the reduction start value from the value that comes from the loop
446 // preheader.
447 if (!TheLoop->getLoopPreheader())
448 return false;
449
450 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
451 // ExitInstruction is the single value which is used outside the loop.
452 // We only allow for a single reduction value to be used outside the loop.
453 // This includes users of the reduction, variables (which form a cycle
454 // which ends in the phi node).
455 Instruction *ExitInstruction = nullptr;
456
457 // Variable to keep last visited store instruction. By the end of the
458 // algorithm this variable will be either empty or having intermediate
459 // reduction value stored in invariant address.
460 StoreInst *IntermediateStore = nullptr;
461
462 // Indicates that we found a reduction operation in our scan.
463 bool FoundReduxOp = false;
464
465 // We start with the PHI node and scan for all of the users of this
466 // instruction. All users must be instructions that can be used as reduction
467 // variables (such as ADD). We must have a single out-of-block user. The cycle
468 // must include the original PHI.
469 bool FoundStartPHI = false;
470
471 // To recognize AnyOf patterns formed by a icmp select sequence, we store
472 // the number of instruction we saw to make sure we only see one.
473 unsigned NumCmpSelectPatternInst = 0;
474 InstDesc ReduxDesc(false, nullptr);
475
476 // To recognize find-lasts of conditional operations (such as loads or
477 // divides), that need masking, we track non-phi users and if we've found a
478 // "find-last-like" phi (see isFindLastLikePhi). We currently only support
479 // find-last reduction chains with a single "find-last-like" phi and do not
480 // allow any other operations.
481 [[maybe_unused]] unsigned NumNonPHIUsers = 0;
482 bool FoundFindLastLikePhi = false;
483
484 // Data used for determining if the recurrence has been type-promoted.
485 Type *RecurrenceType = Phi->getType();
487 unsigned MinWidthCastToRecurrenceType;
488 Instruction *Start = Phi;
489 bool IsSigned = false;
490
493
494 // Return early if the recurrence kind does not match the type of Phi. If the
495 // recurrence kind is arithmetic, we attempt to look through AND operations
496 // resulting from the type promotion performed by InstCombine. Vector
497 // operations are not limited to the legal integer widths, so we may be able
498 // to evaluate the reduction in the narrower width.
499 // Check the scalar type to handle both scalar and vector types.
500 Type *ScalarTy = RecurrenceType->getScalarType();
501 if (Kind == RecurKind::FindLast) {
502 // FindLast supports all primitive scalar types.
503 if (!ScalarTy->isFloatingPointTy() && !ScalarTy->isIntegerTy() &&
504 !ScalarTy->isPointerTy())
505 return false;
506 } else if (ScalarTy->isFloatingPointTy()) {
508 return false;
509 } else if (ScalarTy->isIntegerTy()) {
510 if (!isIntegerRecurrenceKind(Kind))
511 return false;
512 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
513 } else {
514 // Pointer min/max may exist, but it is not supported as a reduction op.
515 return false;
516 }
517
518 Worklist.push_back(Start);
519 VisitedInsts.insert(Start);
520
521 // Start with all flags set because we will intersect this with the reduction
522 // flags from all the reduction operations.
524
525 // The first instruction in the use-def chain of the Phi node that requires
526 // exact floating point operations.
527 Instruction *ExactFPMathInst = nullptr;
528
529 // A value in the reduction can be used:
530 // - By the reduction:
531 // - Reduction operation:
532 // - One use of reduction value (safe).
533 // - Multiple use of reduction value (not safe).
534 // - PHI:
535 // - All uses of the PHI must be the reduction (safe).
536 // - Otherwise, not safe.
537 // - By instructions outside of the loop (safe).
538 // * One value may have several outside users, but all outside
539 // uses must be of the same value.
540 // - By store instructions with a loop invariant address (safe with
541 // the following restrictions):
542 // * If there are several stores, all must have the same address.
543 // * Final value should be stored in that loop invariant address.
544 // - By an instruction that is not part of the reduction (not safe).
545 // This is either:
546 // * An instruction type other than PHI or the reduction operation.
547 // * A PHI in the header other than the initial PHI.
548 while (!Worklist.empty()) {
549 Instruction *Cur = Worklist.pop_back_val();
550
551 // Store instructions are allowed iff it is the store of the reduction
552 // value to the same loop invariant memory location.
553 if (auto *SI = dyn_cast<StoreInst>(Cur)) {
554 if (!SE) {
555 LLVM_DEBUG(dbgs() << "Store instructions are not processed without "
556 << "Scalar Evolution Analysis\n");
557 return false;
558 }
559
560 const SCEV *PtrScev = SE->getSCEV(SI->getPointerOperand());
561 // Check it is the same address as previous stores
562 if (IntermediateStore) {
563 const SCEV *OtherScev =
564 SE->getSCEV(IntermediateStore->getPointerOperand());
565
566 if (OtherScev != PtrScev) {
567 LLVM_DEBUG(dbgs() << "Storing reduction value to different addresses "
568 << "inside the loop: " << *SI->getPointerOperand()
569 << " and "
570 << *IntermediateStore->getPointerOperand() << '\n');
571 return false;
572 }
573 }
574
575 // Check the pointer is loop invariant
576 if (!SE->isLoopInvariant(PtrScev, TheLoop)) {
577 LLVM_DEBUG(dbgs() << "Storing reduction value to non-uniform address "
578 << "inside the loop: " << *SI->getPointerOperand()
579 << '\n');
580 return false;
581 }
582
583 // IntermediateStore is always the last store in the loop.
585 continue;
586 }
587
588 // No Users.
589 // If the instruction has no users then this is a broken chain and can't be
590 // a reduction variable.
591 if (Cur->use_empty())
592 return false;
593
594 bool IsAPhi = isa<PHINode>(Cur);
595 if (!IsAPhi)
596 ++NumNonPHIUsers;
597
598 // A header PHI use other than the original PHI.
599 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
600 return false;
601
602 // Reductions of instructions such as Div, and Sub is only possible if the
603 // LHS is the reduction variable.
604 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
605 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
606 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
607 return false;
608
609 // Any reduction instruction must be of one of the allowed kinds. We ignore
610 // the starting value (the Phi or an AND instruction if the Phi has been
611 // type-promoted).
612 if (Cur != Start) {
613 ReduxDesc = isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, SE);
614 ExactFPMathInst = ExactFPMathInst == nullptr
615 ? ReduxDesc.getExactFPMathInst()
616 : ExactFPMathInst;
617 if (!ReduxDesc.isRecurrence())
618 return false;
619 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
620 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi)
621 FMF &= collectMinMaxFMF(ReduxDesc.getPatternInst());
622 // Update this reduction kind if we matched a new instruction.
623 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
624 // state accurate while processing the worklist?
625 if (ReduxDesc.getRecKind() != RecurKind::None)
626 Kind = ReduxDesc.getRecKind();
627 }
628
629 bool IsASelect = isa<SelectInst>(Cur);
630
631 // A conditional reduction operation must only have 2 or less uses in
632 // VisitedInsts.
633 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
634 hasMultipleUsesOf(Cur, VisitedInsts, 2))
635 return false;
636
637 // A reduction operation must only have one use of the reduction value.
638 if (!IsAPhi && !IsASelect && !isAnyOfRecurrenceKind(Kind) &&
639 hasMultipleUsesOf(Cur, VisitedInsts, 1))
640 return false;
641
642 // All inputs to a PHI node must be a reduction value, unless the phi is a
643 // "FindLast-like" phi (described below).
644 if (IsAPhi && Cur != Phi) {
645 if (!areAllUsesIn(Cur, VisitedInsts)) {
646 // A "FindLast-like" phi acts like a conditional select between the
647 // previous reduction value, and an arbitrary value. Note: Multiple
648 // "FindLast-like" phis are not supported see:
649 // IVDescriptorsTest.UnsupportedFindLastPhi.
650 FoundFindLastLikePhi =
651 Kind == RecurKind::FindLast && !FoundFindLastLikePhi &&
652 isFindLastLikePhi(cast<PHINode>(Cur), Phi, VisitedInsts);
653 if (!FoundFindLastLikePhi)
654 return false;
655 }
656 }
657
658 if (isAnyOfRecurrenceKind(Kind) && IsASelect)
659 ++NumCmpSelectPatternInst;
660
661 // Check whether we found a reduction operator.
662 FoundReduxOp |= (!IsAPhi || FoundFindLastLikePhi) && Cur != Start;
663
664 // Process users of current instruction. Push non-PHI nodes after PHI nodes
665 // onto the stack. This way we are going to have seen all inputs to PHI
666 // nodes once we get to them.
669 for (User *U : Cur->users()) {
671
672 // If the user is a call to llvm.fmuladd then the instruction can only be
673 // the final operand.
674 if (isFMulAddIntrinsic(UI))
675 if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
676 return false;
677
678 // Check if we found the exit user.
679 BasicBlock *Parent = UI->getParent();
680 if (!TheLoop->contains(Parent)) {
681 // If we already know this instruction is used externally, move on to
682 // the next user.
683 if (ExitInstruction == Cur)
684 continue;
685
686 // Exit if you find multiple values used outside or if the header phi
687 // node is being used. In this case the user uses the value of the
688 // previous iteration, in which case we would loose "VF-1" iterations of
689 // the reduction operation if we vectorize.
690 if (ExitInstruction != nullptr || Cur == Phi)
691 return false;
692
693 // The instruction used by an outside user must be the last instruction
694 // before we feed back to the reduction phi. Otherwise, we loose VF-1
695 // operations on the value.
696 if (!is_contained(Phi->operands(), Cur))
697 return false;
698
699 ExitInstruction = Cur;
700 continue;
701 }
702
703 // Process instructions only once (termination). Each reduction cycle
704 // value must only be used once, except by phi nodes and conditional
705 // reductions which are represented as a cmp followed by a select.
706 InstDesc IgnoredVal(false, nullptr);
707 if (VisitedInsts.insert(UI).second) {
708 if (isa<PHINode>(UI)) {
709 PHIs.push_back(UI);
710 } else {
712 if (SI && SI->getPointerOperand() == Cur) {
713 // Reduction variable chain can only be stored somewhere but it
714 // can't be used as an address.
715 return false;
716 }
717 NonPHIs.push_back(UI);
718 }
719 } else if (!isa<PHINode>(UI) &&
720 ((!isConditionalRdxPattern(UI).isRecurrence() &&
721 !isAnyOfPattern(TheLoop, Phi, UI, IgnoredVal)
722 .isRecurrence())))
723 return false;
724
725 // Remember that we completed the cycle.
726 if (UI == Phi)
727 FoundStartPHI = true;
728 }
729 Worklist.append(PHIs.begin(), PHIs.end());
730 Worklist.append(NonPHIs.begin(), NonPHIs.end());
731 }
732
733 // We only expect to match a single "find-last-like" phi per find-last
734 // reduction, with no non-phi operations in the reduction use chain.
735 assert((!FoundFindLastLikePhi ||
736 (Kind == RecurKind::FindLast && NumNonPHIUsers == 0)) &&
737 "Unexpectedly matched a 'find-last-like' phi");
738
739 if (isAnyOfRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
740 return false;
741
742 if (IntermediateStore) {
743 // Check that stored value goes to the phi node again. This way we make sure
744 // that the value stored in IntermediateStore is indeed the final reduction
745 // value.
746 if (!is_contained(Phi->operands(), IntermediateStore->getValueOperand())) {
747 LLVM_DEBUG(dbgs() << "Not a final reduction value stored: "
748 << *IntermediateStore << '\n');
749 return false;
750 }
751
752 // If there is an exit instruction it's value should be stored in
753 // IntermediateStore
754 if (ExitInstruction &&
755 IntermediateStore->getValueOperand() != ExitInstruction) {
756 LLVM_DEBUG(dbgs() << "Last store Instruction of reduction value does not "
757 "store last calculated value of the reduction: "
758 << *IntermediateStore << '\n');
759 return false;
760 }
761
762 // If all uses are inside the loop (intermediate stores), then the
763 // reduction value after the loop will be the one used in the last store.
764 if (!ExitInstruction)
765 ExitInstruction = cast<Instruction>(IntermediateStore->getValueOperand());
766 }
767
768 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
769 return false;
770
771 const bool IsOrdered =
772 checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
773
774 if (Start != Phi) {
775 // If the starting value is not the same as the phi node, we speculatively
776 // looked through an 'and' instruction when evaluating a potential
777 // arithmetic reduction to determine if it may have been type-promoted.
778 //
779 // We now compute the minimal bit width that is required to represent the
780 // reduction. If this is the same width that was indicated by the 'and', we
781 // can represent the reduction in the smaller type. The 'and' instruction
782 // will be eliminated since it will essentially be a cast instruction that
783 // can be ignore in the cost model. If we compute a different type than we
784 // did when evaluating the 'and', the 'and' will not be eliminated, and we
785 // will end up with different kinds of operations in the recurrence
786 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
787 // the case.
788 //
789 // The vectorizer relies on InstCombine to perform the actual
790 // type-shrinking. It does this by inserting instructions to truncate the
791 // exit value of the reduction to the width indicated by RecurrenceType and
792 // then extend this value back to the original width. If IsSigned is false,
793 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
794 // used.
795 //
796 // TODO: We should not rely on InstCombine to rewrite the reduction in the
797 // smaller type. We should just generate a correctly typed expression
798 // to begin with.
799 Type *ComputedType;
800 std::tie(ComputedType, IsSigned) =
801 computeRecurrenceType(ExitInstruction, DB, AC, DT);
802 if (ComputedType != RecurrenceType)
803 return false;
804 }
805
806 // Collect cast instructions and the minimum width used by the recurrence.
807 // If the starting value is not the same as the phi node and the computed
808 // recurrence type is equal to the recurrence type, the recurrence expression
809 // will be represented in a narrower or wider type. If there are any cast
810 // instructions that will be unnecessary, collect them in CastsFromRecurTy.
811 // Note that the 'and' instruction was already included in this list.
812 //
813 // TODO: A better way to represent this may be to tag in some way all the
814 // instructions that are a part of the reduction. The vectorizer cost
815 // model could then apply the recurrence type to these instructions,
816 // without needing a white list of instructions to ignore.
817 // This may also be useful for the inloop reductions, if it can be
818 // kept simple enough.
819 collectCastInstrs(TheLoop, ExitInstruction, RecurrenceType, CastInsts,
820 MinWidthCastToRecurrenceType);
821
822 // We found a reduction var if we have reached the original phi node and we
823 // only have a single instruction with out-of-loop users.
824
825 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
826 // is saved as part of the RecurrenceDescriptor.
827
828 // Save the description of this reduction variable.
829 RedDes =
830 RecurrenceDescriptor(RdxStart, ExitInstruction, IntermediateStore, Kind,
831 FMF, ExactFPMathInst, RecurrenceType, IsSigned,
832 IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
833 return true;
834}
835
836// We are looking for loops that do something like this:
837// int r = 0;
838// for (int i = 0; i < n; i++) {
839// if (src[i] > 3)
840// r = 3;
841// }
842// where the reduction value (r) only has two states, in this example 0 or 3.
843// The generated LLVM IR for this type of loop will be like this:
844// for.body:
845// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
846// ...
847// %cmp = icmp sgt i32 %5, 3
848// %spec.select = select i1 %cmp, i32 3, i32 %r
849// ...
850// In general we can support vectorization of loops where 'r' flips between
851// any two non-constants, provided they are loop invariant. The only thing
852// we actually care about at the end of the loop is whether or not any lane
853// in the selected vector is different from the start value. The final
854// across-vector reduction after the loop simply involves choosing the start
855// value if nothing changed (0 in the example above) or the other selected
856// value (3 in the example above).
859 Instruction *I, InstDesc &Prev) {
860 // We must handle the select(cmp(),x,y) as a single instruction. Advance to
861 // the select.
862 if (match(I, m_OneUse(m_Cmp()))) {
863 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
864 return InstDesc(Select, Prev.getRecKind());
865 }
866
867 if (!match(I, m_Select(m_Cmp(), m_Value(), m_Value())))
868 return InstDesc(false, I);
869
871 Value *NonPhi = nullptr;
872
873 if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
874 NonPhi = SI->getFalseValue();
875 else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
876 NonPhi = SI->getTrueValue();
877 else
878 return InstDesc(false, I);
879
880 // We are looking for selects of the form:
881 // select(cmp(), phi, loop_invariant) or
882 // select(cmp(), loop_invariant, phi)
883 if (!Loop->isLoopInvariant(NonPhi))
884 return InstDesc(false, I);
885
886 return InstDesc(I, RecurKind::AnyOf);
887}
888
889// We are looking for loops that do something like this:
890// int r = 0;
891// for (int i = 0; i < n; i++) {
892// if (src[i] > 3)
893// r = i;
894// }
895// or like this:
896// int r = 0;
897// for (int i = 0; i < n; i++) {
898// if (src[i] > 3)
899// r = <loop-varying value>;
900// }
901// The reduction value (r) is derived from either the values of an induction
902// variable (i) sequence, an arbitrary loop-varying value, or from the start
903// value (0). The LLVM IR generated for such loops would be as follows:
904// for.body:
905// %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
906// %i = phi i32 [ %inc, %for.body ], [ 0, %entry ]
907// ...
908// %cmp = icmp sgt i32 %5, 3
909// %spec.select = select i1 %cmp, i32 %i, i32 %r
910// %inc = add nsw i32 %i, 1
911// ...
912//
913// When searching for an arbitrary loop-varying value, the reduction value will
914// either be the initial value (0) if the condition was never met, or the value
915// of the loop-varying value in the most recent loop iteration where the
916// condition was met.
920 // TODO: Support the vectorization of FindLastIV when the reduction phi is
921 // used by more than one select instruction. This vectorization is only
922 // performed when the SCEV of each increasing induction variable used by the
923 // select instructions is identical.
924 if (!OrigPhi->hasOneUse())
925 return InstDesc(false, I);
926
927 // We are looking for selects of the form:
928 // select(cmp(), phi, value) or
929 // select(cmp(), value, phi)
930 // TODO: Match selects with multi-use cmp conditions.
931 Value *NonRdxPhi = nullptr;
932 if (!match(I, m_CombineOr(m_Select(m_OneUse(m_Cmp()), m_Value(NonRdxPhi),
933 m_Specific(OrigPhi)),
934 m_Select(m_OneUse(m_Cmp()), m_Specific(OrigPhi),
935 m_Value(NonRdxPhi)))))
936 return InstDesc(false, I);
937
939}
940
941/// Returns true if the select instruction has users in the compare-and-add
942/// reduction pattern below. The select instruction argument is the last one
943/// in the sequence.
944///
945/// %sum.1 = phi ...
946/// ...
947/// %cmp = fcmp pred %0, %CFP
948/// %add = fadd %0, %sum.1
949/// %sum.2 = select %cmp, %add, %sum.1
952 Value *TrueVal, *FalseVal;
953 // Only handle single use cases for now.
954 if (!match(I,
955 m_Select(m_OneUse(m_Cmp()), m_Value(TrueVal), m_Value(FalseVal))))
956 return InstDesc(false, I);
957
958 // Handle only when either of operands of select instruction is a PHI
959 // node for now.
960 if ((isa<PHINode>(TrueVal) && isa<PHINode>(FalseVal)) ||
961 (!isa<PHINode>(TrueVal) && !isa<PHINode>(FalseVal)))
962 return InstDesc(false, I);
963
964 Instruction *I1 = isa<PHINode>(TrueVal) ? dyn_cast<Instruction>(FalseVal)
965 : dyn_cast<Instruction>(TrueVal);
966 if (!I1 || !I1->isBinaryOp())
967 return InstDesc(false, I);
968
969 Value *Op1, *Op2;
970 if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
971 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
972 I1->isFast()) ||
973 (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||
974 ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||
975 m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||
976 (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))
977 return InstDesc(false, I);
978
981 if (!IPhi || IPhi != FalseVal)
982 return InstDesc(false, I);
983
984 return InstDesc(true, I);
985}
986
989 Instruction *I, RecurKind Kind,
990 InstDesc &Prev, ScalarEvolution *SE) {
991 assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
992 switch (I->getOpcode()) {
993 default:
994 return InstDesc(false, I);
995 case Instruction::PHI:
996 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
997 case Instruction::Sub:
998 return InstDesc(
999 Kind == RecurKind::Sub || Kind == RecurKind::AddChainWithSubs, I);
1000 case Instruction::Add:
1001 return InstDesc(
1002 Kind == RecurKind::Add || Kind == RecurKind::AddChainWithSubs, I);
1003 case Instruction::Mul:
1004 return InstDesc(Kind == RecurKind::Mul, I);
1005 case Instruction::And:
1006 return InstDesc(Kind == RecurKind::And, I);
1007 case Instruction::Or:
1008 return InstDesc(Kind == RecurKind::Or, I);
1009 case Instruction::Xor:
1010 return InstDesc(Kind == RecurKind::Xor, I);
1011 case Instruction::FDiv:
1012 case Instruction::FMul:
1013 return InstDesc(Kind == RecurKind::FMul, I,
1014 I->hasAllowReassoc() ? nullptr : I);
1015 case Instruction::FSub:
1016 case Instruction::FAdd:
1017 return InstDesc(Kind == RecurKind::FAdd, I,
1018 I->hasAllowReassoc() ? nullptr : I);
1019 case Instruction::Select:
1020 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
1021 Kind == RecurKind::Add || Kind == RecurKind::Mul ||
1023 return isConditionalRdxPattern(I);
1024 if (isFindRecurrenceKind(Kind) && SE)
1025 return isFindPattern(L, OrigPhi, I, *SE);
1026 [[fallthrough]];
1027 case Instruction::FCmp:
1028 case Instruction::ICmp:
1029 case Instruction::Call:
1030 if (isAnyOfRecurrenceKind(Kind))
1031 return isAnyOfPattern(L, OrigPhi, I, Prev);
1032 if (isFMulAddIntrinsic(I))
1033 return InstDesc(Kind == RecurKind::FMulAdd, I,
1034 I->hasAllowReassoc() ? nullptr : I);
1035 return InstDesc(false, I);
1036 }
1037}
1038
1041 unsigned MaxNumUses) {
1042 unsigned NumUses = 0;
1043 for (const Use &U : I->operands()) {
1044 if (Insts.count(dyn_cast<Instruction>(U)))
1045 ++NumUses;
1046 if (NumUses > MaxNumUses)
1047 return true;
1048 }
1049
1050 return false;
1051}
1052
1054 RecurrenceDescriptor &RedDes,
1056 DominatorTree *DT,
1057 ScalarEvolution *SE) {
1058 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, RedDes, DB, AC, DT, SE)) {
1059 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
1060 return true;
1061 }
1062 if (AddReductionVar(Phi, RecurKind::Sub, TheLoop, RedDes, DB, AC, DT, SE)) {
1063 LLVM_DEBUG(dbgs() << "Found a SUB reduction PHI." << *Phi << "\n");
1064 return true;
1065 }
1066 if (AddReductionVar(Phi, RecurKind::AddChainWithSubs, TheLoop, RedDes, DB, AC,
1067 DT, SE)) {
1068 LLVM_DEBUG(dbgs() << "Found a chained ADD-SUB reduction PHI." << *Phi
1069 << "\n");
1070 return true;
1071 }
1072 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, RedDes, DB, AC, DT, SE)) {
1073 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
1074 return true;
1075 }
1076 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, RedDes, DB, AC, DT, SE)) {
1077 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
1078 return true;
1079 }
1080 if (AddReductionVar(Phi, RecurKind::And, TheLoop, RedDes, DB, AC, DT, SE)) {
1081 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
1082 return true;
1083 }
1084 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, RedDes, DB, AC, DT, SE)) {
1085 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
1086 return true;
1087 }
1088 auto RD = getMinMaxRecurrence(Phi, TheLoop, SE);
1089 if (RD.getRecurrenceKind() != RecurKind::None) {
1090 assert(
1091 RecurrenceDescriptor::isMinMaxRecurrenceKind(RD.getRecurrenceKind()) &&
1092 "Expected a min/max recurrence kind");
1093 LLVM_DEBUG(dbgs() << "Found a min/max reduction PHI." << *Phi << "\n");
1094 RedDes = std::move(RD);
1095 return true;
1096 }
1097 if (AddReductionVar(Phi, RecurKind::AnyOf, TheLoop, RedDes, DB, AC, DT, SE)) {
1098 LLVM_DEBUG(dbgs() << "Found a conditional select reduction PHI." << *Phi
1099 << "\n");
1100 return true;
1101 }
1102 if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, RedDes, DB, AC, DT,
1103 SE)) {
1104 LLVM_DEBUG(dbgs() << "Found a Find reduction PHI." << *Phi << "\n");
1105 return true;
1106 }
1107 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, RedDes, DB, AC, DT, SE)) {
1108 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
1109 return true;
1110 }
1111 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, RedDes, DB, AC, DT, SE)) {
1112 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
1113 return true;
1114 }
1115 if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, RedDes, DB, AC, DT,
1116 SE)) {
1117 LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
1118 return true;
1119 }
1120
1121 // Not a reduction of known type.
1122 return false;
1123}
1124
1126 DominatorTree *DT) {
1127
1128 // Ensure the phi node is in the loop header and has two incoming values.
1129 if (Phi->getParent() != TheLoop->getHeader() ||
1130 Phi->getNumIncomingValues() != 2)
1131 return false;
1132
1133 // Ensure the loop has a preheader and a single latch block. The loop
1134 // vectorizer will need the latch to set up the next iteration of the loop.
1135 auto *Preheader = TheLoop->getLoopPreheader();
1136 auto *Latch = TheLoop->getLoopLatch();
1137 if (!Preheader || !Latch)
1138 return false;
1139
1140 // Ensure the phi node's incoming blocks are the loop preheader and latch.
1141 if (Phi->getBasicBlockIndex(Preheader) < 0 ||
1142 Phi->getBasicBlockIndex(Latch) < 0)
1143 return false;
1144
1145 // Get the previous value. The previous value comes from the latch edge while
1146 // the initial value comes from the preheader edge.
1147 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
1148
1149 // If Previous is a phi in the header, go through incoming values from the
1150 // latch until we find a non-phi value. Use this as the new Previous, all uses
1151 // in the header will be dominated by the original phi, but need to be moved
1152 // after the non-phi previous value.
1154 while (auto *PrevPhi = dyn_cast_or_null<PHINode>(Previous)) {
1155 if (PrevPhi->getParent() != Phi->getParent())
1156 return false;
1157 if (!SeenPhis.insert(PrevPhi).second)
1158 return false;
1159 Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
1160 }
1161
1162 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
1163 return false;
1164
1165 // Ensure every user of the phi node (recursively) is dominated by the
1166 // previous value. The dominance requirement ensures the loop vectorizer will
1167 // not need to vectorize the initial value prior to the first iteration of the
1168 // loop.
1169 // TODO: Consider extending this sinking to handle memory instructions.
1170
1172 BasicBlock *PhiBB = Phi->getParent();
1174 auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
1175 // Cyclic dependence.
1176 if (Previous == SinkCandidate)
1177 return false;
1178
1179 if (!Seen.insert(SinkCandidate).second)
1180 return true;
1181 if (DT->dominates(Previous,
1182 SinkCandidate)) // We already are good w/o sinking.
1183 return true;
1184
1185 if (SinkCandidate->getParent() != PhiBB ||
1186 SinkCandidate->mayHaveSideEffects() ||
1187 SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
1188 return false;
1189
1190 // If we reach a PHI node that is not dominated by Previous, we reached a
1191 // header PHI. No need for sinking.
1192 if (isa<PHINode>(SinkCandidate))
1193 return true;
1194
1195 // Sink User tentatively and check its users
1196 WorkList.push_back(SinkCandidate);
1197 return true;
1198 };
1199
1200 WorkList.push_back(Phi);
1201 // Try to recursively sink instructions and their users after Previous.
1202 while (!WorkList.empty()) {
1203 Instruction *Current = WorkList.pop_back_val();
1204 for (User *User : Current->users()) {
1205 if (!TryToPushSinkCandidate(cast<Instruction>(User)))
1206 return false;
1207 }
1208 }
1209
1210 return true;
1211}
1212
1214 switch (Kind) {
1215 case RecurKind::Sub:
1216 return Instruction::Sub;
1218 case RecurKind::Add:
1219 return Instruction::Add;
1220 case RecurKind::Mul:
1221 return Instruction::Mul;
1222 case RecurKind::Or:
1223 return Instruction::Or;
1224 case RecurKind::And:
1225 return Instruction::And;
1226 case RecurKind::Xor:
1227 return Instruction::Xor;
1228 case RecurKind::FMul:
1229 return Instruction::FMul;
1230 case RecurKind::FMulAdd:
1231 case RecurKind::FAdd:
1232 return Instruction::FAdd;
1233 case RecurKind::SMax:
1234 case RecurKind::SMin:
1235 case RecurKind::UMax:
1236 case RecurKind::UMin:
1237 return Instruction::ICmp;
1238 case RecurKind::FMax:
1239 case RecurKind::FMin:
1244 return Instruction::FCmp;
1246 case RecurKind::AnyOf:
1247 case RecurKind::FindIV:
1248 // TODO: Set AnyOf and FindIV to Instruction::Select once in-loop reductions
1249 // are supported.
1250 default:
1251 llvm_unreachable("Unknown recurrence operation");
1252 }
1253}
1254
1257 SmallVector<Instruction *, 4> ReductionOperations;
1258 const bool IsMinMax = isMinMaxRecurrenceKind(Kind);
1259
1260 // Search down from the Phi to the LoopExitInstr, looking for instructions
1261 // with a single user of the correct type for the reduction.
1262
1263 // Note that we check that the type of the operand is correct for each item in
1264 // the chain, including the last (the loop exit value). This can come up from
1265 // sub, which would otherwise be treated as an add reduction. MinMax also need
1266 // to check for a pair of icmp/select, for which we use getNextInstruction and
1267 // isCorrectOpcode functions to step the right number of instruction, and
1268 // check the icmp/select pair.
1269 // FIXME: We also do not attempt to look through Select's yet, which might
1270 // be part of the reduction chain, or attempt to looks through And's to find a
1271 // smaller bitwidth. Subs are also currently not allowed (which are usually
1272 // treated as part of a add reduction) as they are expected to generally be
1273 // more expensive than out-of-loop reductions, and need to be costed more
1274 // carefully.
1275 unsigned ExpectedUses = 1;
1276 if (IsMinMax)
1277 ExpectedUses = 2;
1278
1279 auto getNextInstruction = [&](Instruction *Cur) -> Instruction * {
1280 for (auto *User : Cur->users()) {
1282 if (isa<PHINode>(UI))
1283 continue;
1284 if (IsMinMax) {
1285 // We are expecting a icmp/select pair, which we go to the next select
1286 // instruction if we can. We already know that Cur has 2 uses.
1287 if (isa<SelectInst>(UI))
1288 return UI;
1289 continue;
1290 }
1291 return UI;
1292 }
1293 return nullptr;
1294 };
1295 auto isCorrectOpcode = [&](Instruction *Cur) {
1296 if (IsMinMax) {
1297 Value *LHS, *RHS;
1299 matchSelectPattern(Cur, LHS, RHS).Flavor);
1300 }
1301 // Recognize a call to the llvm.fmuladd intrinsic.
1302 if (isFMulAddIntrinsic(Cur))
1303 return true;
1304
1305 if (Cur->getOpcode() == Instruction::Sub &&
1307 return true;
1308
1309 return Cur->getOpcode() == getOpcode();
1310 };
1311
1312 // Attempt to look through Phis which are part of the reduction chain
1313 unsigned ExtraPhiUses = 0;
1314 Instruction *RdxInstr = LoopExitInstr;
1315 if (auto ExitPhi = dyn_cast<PHINode>(LoopExitInstr)) {
1316 if (ExitPhi->getNumIncomingValues() != 2)
1317 return {};
1318
1319 Instruction *Inc0 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(0));
1320 Instruction *Inc1 = dyn_cast<Instruction>(ExitPhi->getIncomingValue(1));
1321
1322 Instruction *Chain = nullptr;
1323 if (Inc0 == Phi)
1324 Chain = Inc1;
1325 else if (Inc1 == Phi)
1326 Chain = Inc0;
1327 else
1328 return {};
1329
1330 RdxInstr = Chain;
1331 ExtraPhiUses = 1;
1332 }
1333
1334 // The loop exit instruction we check first (as a quick test) but add last. We
1335 // check the opcode is correct (and dont allow them to be Subs) and that they
1336 // have expected to have the expected number of uses. They will have one use
1337 // from the phi and one from a LCSSA value, no matter the type.
1338 if (!isCorrectOpcode(RdxInstr) || !LoopExitInstr->hasNUses(2))
1339 return {};
1340
1341 // Check that the Phi has one (or two for min/max) uses, plus an extra use
1342 // for conditional reductions.
1343 if (!Phi->hasNUses(ExpectedUses + ExtraPhiUses))
1344 return {};
1345
1346 Instruction *Cur = getNextInstruction(Phi);
1347
1348 // Each other instruction in the chain should have the expected number of uses
1349 // and be the correct opcode.
1350 while (Cur != RdxInstr) {
1351 if (!Cur || !isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
1352 return {};
1353
1354 ReductionOperations.push_back(Cur);
1355 Cur = getNextInstruction(Cur);
1356 }
1357
1358 ReductionOperations.push_back(Cur);
1359 return ReductionOperations;
1360}
1361
1362InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1363 const SCEV *Step, BinaryOperator *BOp,
1365 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
1366 assert(IK != IK_NoInduction && "Not an induction");
1367
1368 // Start value type should match the induction kind and the value
1369 // itself should not be null.
1370 assert(StartValue && "StartValue is null");
1371 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1372 "StartValue is not a pointer for pointer induction");
1373 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1374 "StartValue is not an integer for integer induction");
1375
1376 // Check the Step Value. It should be non-zero integer value.
1377 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1378 "Step value is zero");
1379
1380 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1381 "StepValue is not an integer");
1382
1383 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1384 "StepValue is not FP for FpInduction");
1385 assert((IK != IK_FpInduction ||
1386 (InductionBinOp &&
1387 (InductionBinOp->getOpcode() == Instruction::FAdd ||
1388 InductionBinOp->getOpcode() == Instruction::FSub))) &&
1389 "Binary opcode should be specified for FP induction");
1390
1391 if (Casts)
1392 llvm::append_range(RedundantCasts, *Casts);
1393}
1394
1396 if (isa<SCEVConstant>(Step))
1397 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
1398 return nullptr;
1399}
1400
1402 ScalarEvolution *SE,
1404
1405 // Here we only handle FP induction variables.
1406 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1407
1408 if (TheLoop->getHeader() != Phi->getParent())
1409 return false;
1410
1411 // The loop may have multiple entrances or multiple exits; we can analyze
1412 // this phi if it has a unique entry value and a unique backedge value.
1413 if (Phi->getNumIncomingValues() != 2)
1414 return false;
1415 Value *BEValue = nullptr, *StartValue = nullptr;
1416 if (TheLoop->contains(Phi->getIncomingBlock(0))) {
1417 BEValue = Phi->getIncomingValue(0);
1418 StartValue = Phi->getIncomingValue(1);
1419 } else {
1420 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1421 "Unexpected Phi node in the loop");
1422 BEValue = Phi->getIncomingValue(1);
1423 StartValue = Phi->getIncomingValue(0);
1424 }
1425
1427 if (!BOp)
1428 return false;
1429
1430 Value *Addend = nullptr;
1431 if (BOp->getOpcode() == Instruction::FAdd) {
1432 if (BOp->getOperand(0) == Phi)
1433 Addend = BOp->getOperand(1);
1434 else if (BOp->getOperand(1) == Phi)
1435 Addend = BOp->getOperand(0);
1436 } else if (BOp->getOpcode() == Instruction::FSub)
1437 if (BOp->getOperand(0) == Phi)
1438 Addend = BOp->getOperand(1);
1439
1440 if (!Addend)
1441 return false;
1442
1443 // The addend should be loop invariant
1444 if (auto *I = dyn_cast<Instruction>(Addend))
1445 if (TheLoop->contains(I))
1446 return false;
1447
1448 // FP Step has unknown SCEV
1449 const SCEV *Step = SE->getUnknown(Addend);
1450 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1451 return true;
1452}
1453
1454/// This function is called when we suspect that the update-chain of a phi node
1455/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1456/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1457/// predicate P under which the SCEV expression for the phi can be the
1458/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1459/// cast instructions that are involved in the update-chain of this induction.
1460/// A caller that adds the required runtime predicate can be free to drop these
1461/// cast instructions, and compute the phi using \p AR (instead of some scev
1462/// expression with casts).
1463///
1464/// For example, without a predicate the scev expression can take the following
1465/// form:
1466/// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1467///
1468/// It corresponds to the following IR sequence:
1469/// %for.body:
1470/// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1471/// %casted_phi = "ExtTrunc i64 %x"
1472/// %add = add i64 %casted_phi, %step
1473///
1474/// where %x is given in \p PN,
1475/// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1476/// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1477/// several forms, for example, such as:
1478/// ExtTrunc1: %casted_phi = and %x, 2^n-1
1479/// or:
1480/// ExtTrunc2: %t = shl %x, m
1481/// %casted_phi = ashr %t, m
1482///
1483/// If we are able to find such sequence, we return the instructions
1484/// we found, namely %casted_phi and the instructions on its use-def chain up
1485/// to the phi (not including the phi).
1487 const SCEVUnknown *PhiScev,
1488 const SCEVAddRecExpr *AR,
1489 SmallVectorImpl<Instruction *> &CastInsts) {
1490
1491 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1492 auto *PN = cast<PHINode>(PhiScev->getValue());
1493 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1494 const Loop *L = AR->getLoop();
1495
1496 // Find any cast instructions that participate in the def-use chain of
1497 // PhiScev in the loop.
1498 // FORNOW/TODO: We currently expect the def-use chain to include only
1499 // two-operand instructions, where one of the operands is an invariant.
1500 // createAddRecFromPHIWithCasts() currently does not support anything more
1501 // involved than that, so we keep the search simple. This can be
1502 // extended/generalized as needed.
1503
1504 auto getDef = [&](const Value *Val) -> Value * {
1505 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1506 if (!BinOp)
1507 return nullptr;
1508 Value *Op0 = BinOp->getOperand(0);
1509 Value *Op1 = BinOp->getOperand(1);
1510 Value *Def = nullptr;
1511 if (L->isLoopInvariant(Op0))
1512 Def = Op1;
1513 else if (L->isLoopInvariant(Op1))
1514 Def = Op0;
1515 return Def;
1516 };
1517
1518 // Look for the instruction that defines the induction via the
1519 // loop backedge.
1520 BasicBlock *Latch = L->getLoopLatch();
1521 if (!Latch)
1522 return false;
1523 Value *Val = PN->getIncomingValueForBlock(Latch);
1524 if (!Val)
1525 return false;
1526
1527 // Follow the def-use chain until the induction phi is reached.
1528 // If on the way we encounter a Value that has the same SCEV Expr as the
1529 // phi node, we can consider the instructions we visit from that point
1530 // as part of the cast-sequence that can be ignored.
1531 bool InCastSequence = false;
1532 auto *Inst = dyn_cast<Instruction>(Val);
1533 while (Val != PN) {
1534 // If we encountered a phi node other than PN, or if we left the loop,
1535 // we bail out.
1536 if (!Inst || !L->contains(Inst)) {
1537 return false;
1538 }
1539 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1540 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1541 InCastSequence = true;
1542 if (InCastSequence) {
1543 // Only the last instruction in the cast sequence is expected to have
1544 // uses outside the induction def-use chain.
1545 if (!CastInsts.empty())
1546 if (!Inst->hasOneUse())
1547 return false;
1548 CastInsts.push_back(Inst);
1549 }
1550 Val = getDef(Val);
1551 if (!Val)
1552 return false;
1553 Inst = dyn_cast<Instruction>(Val);
1554 }
1555
1556 return InCastSequence;
1557}
1558
1561 InductionDescriptor &D, bool Assume) {
1562 Type *PhiTy = Phi->getType();
1563
1564 // Handle integer and pointer inductions variables.
1565 // Now we handle also FP induction but not trying to make a
1566 // recurrent expression from the PHI node in-place.
1567
1568 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1569 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1570 return false;
1571
1572 if (PhiTy->isFloatingPointTy())
1573 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1574
1575 const SCEV *PhiScev = PSE.getSCEV(Phi);
1576 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1577
1578 // We need this expression to be an AddRecExpr.
1579 if (Assume && !AR)
1580 AR = PSE.getAsAddRec(Phi);
1581
1582 if (!AR) {
1583 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1584 return false;
1585 }
1586
1587 // Record any Cast instructions that participate in the induction update
1588 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1589 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1590 // only after enabling Assume with PSCEV, this means we may have encountered
1591 // cast instructions that required adding a runtime check in order to
1592 // guarantee the correctness of the AddRecurrence respresentation of the
1593 // induction.
1594 if (PhiScev != AR && SymbolicPhi) {
1596 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1597 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1598 }
1599
1600 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1601}
1602
1604 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1605 InductionDescriptor &D, const SCEV *Expr,
1606 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1607 Type *PhiTy = Phi->getType();
1608 // isSCEVable returns true for integer and pointer types.
1609 if (!SE->isSCEVable(PhiTy))
1610 return false;
1611
1612 // Check that the PHI is consecutive.
1613 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1614 const SCEV *Step;
1615
1616 // FIXME: We are currently matching the specific loop TheLoop; if it doesn't
1617 // match, we should treat it as a uniform. Unfortunately, we don't currently
1618 // know how to handled uniform PHIs.
1619 if (!match(PhiScev, m_scev_AffineAddRec(m_SCEV(), m_SCEV(Step),
1620 m_SpecificLoop(TheLoop)))) {
1621 LLVM_DEBUG(
1622 dbgs() << "LV: PHI is not a poly recurrence for requested loop.\n");
1623 return false;
1624 }
1625
1626 // This function assumes that InductionPhi is called only on Phi nodes
1627 // present inside loop headers. Check for the same, and throw an assert if
1628 // the current Phi is not present inside the loop header.
1629 assert(Phi->getParent() == TheLoop->getHeader() &&
1630 "Invalid Phi node, not present in loop header");
1631
1632 if (!TheLoop->getLoopPreheader())
1633 return false;
1634
1635 Value *StartValue =
1636 Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
1637
1638 BasicBlock *Latch = TheLoop->getLoopLatch();
1639 if (!Latch)
1640 return false;
1641
1642 if (PhiTy->isIntegerTy()) {
1643 BinaryOperator *BOp =
1644 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1645 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1646 CastsToIgnore);
1647 return true;
1648 }
1649
1650 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1651
1652 // This allows induction variables w/non-constant steps.
1653 D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
1654 return true;
1655}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE, const SCEVUnknown *PhiScev, const SCEVAddRecExpr *AR, SmallVectorImpl< Instruction * > &CastInsts)
This function is called when we suspect that the update-chain of a phi node (whose symbolic SCEV expr...
static std::optional< FastMathFlags > hasRequiredFastMathFlags(FPMathOperator *FPOp, RecurKind &RK)
static void collectCastInstrs(Loop *TheLoop, Instruction *Exit, Type *RecurrenceType, SmallPtrSetImpl< Instruction * > &Casts, unsigned &MinWidthCastToRecurTy)
Collect cast instructions that can be ignored in the vectorizer's cost model, given a reduction exit ...
static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst, Instruction *Exit, PHINode *Phi)
static bool isFindLastLikePhi(PHINode *Phi, PHINode *HeaderPhi, SmallPtrSetImpl< Instruction * > &ReductionInstrs)
static Instruction * lookThroughAnd(PHINode *Phi, Type *&RT, SmallPtrSetImpl< Instruction * > &Visited, SmallPtrSetImpl< Instruction * > &CI)
Determines if Phi may have been type-promoted.
static FastMathFlags collectMinMaxFMF(Value *V)
static RecurrenceDescriptor getMinMaxRecurrence(PHINode *Phi, Loop *TheLoop, ScalarEvolution *SE)
static std::pair< Type *, bool > computeRecurrenceType(Instruction *Exit, DemandedBits *DB, AssumptionCache *AC, DominatorTree *DT)
Compute the minimal bit width needed to represent a reduction whose exit instruction is given by Exit...
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define I(x, y, z)
Definition MD5.cpp:57
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Class for arbitrary precision integers.
Definition APInt.h:78
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
BinaryOps getOpcode() const
Definition InstrTypes.h:374
This is the shared class of boolean and integer constants.
Definition Constants.h:87
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
Definition Operator.h:302
bool hasNoSignedZeros() const
Test if this operation can ignore the sign of zero.
Definition Operator.h:312
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
static FastMathFlags getFast()
Definition FMF.h:53
@ IK_FpInduction
Floating point induction variable.
@ IK_PtrInduction
Pointer induction var. Step = C.
@ IK_IntInduction
Integer induction variable. Step = C.
static LLVM_ABI bool isInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D, const SCEV *Expr=nullptr, SmallVectorImpl< Instruction * > *CastsToIgnore=nullptr)
Returns true if Phi is an induction in the loop L.
static LLVM_ABI bool isFPInductionPHI(PHINode *Phi, const Loop *L, ScalarEvolution *SE, InductionDescriptor &D)
Returns true if Phi is a floating point induction in the loop L.
InductionDescriptor()=default
Default constructor - creates an invalid induction.
LLVM_ABI ConstantInt * getConstIntStepValue() const
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
BlockT * getHeader() const
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition LoopInfo.cpp:66
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
LLVM_ABI bool areAddRecsEqualWithPreds(const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const
Check if AR1 and AR2 are equal, while taking into account Equal predicates in Preds.
LLVM_ABI const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
LLVM_ABI const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
This POD struct holds information about a potential recurrence operation.
Instruction * getExactFPMathInst() const
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFMulAddIntrinsic(Instruction *I)
Returns true if the instruction is a call to the llvm.fmuladd intrinsic.
static LLVM_ABI bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop, DominatorTree *DT)
Returns true if Phi is a fixed-order recurrence.
static LLVM_ABI InstDesc isConditionalRdxPattern(Instruction *I)
Returns a struct describing if the instruction is a Select(FCmp(X, Y), (Z = X op PHINode),...
static LLVM_ABI bool hasMultipleUsesOf(Instruction *I, SmallPtrSetImpl< Instruction * > &Insts, unsigned MaxNumUses)
Returns true if instruction I has multiple uses in Insts.
static LLVM_ABI bool isReductionPHI(PHINode *Phi, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction in TheLoop.
static LLVM_ABI bool areAllUsesIn(Instruction *I, SmallPtrSetImpl< Instruction * > &Set)
Returns true if all uses of the instruction I is within the Set.
LLVM_ABI SmallVector< Instruction *, 4 > getReductionOpChain(PHINode *Phi, Loop *L) const
Attempts to find a chain of operations from Phi to LoopExitInst that can be treated as a set of reduc...
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
static LLVM_ABI InstDesc isAnyOfPattern(Loop *Loop, PHINode *OrigPhi, Instruction *I, InstDesc &Prev)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
StoreInst * IntermediateStore
Reductions may store temporary or final result to an invariant address.
static bool isFindRecurrenceKind(RecurKind Kind)
static LLVM_ABI bool isFloatingPointRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is a floating point kind.
static LLVM_ABI InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind, InstDesc &Prev, ScalarEvolution *SE)
Returns a struct describing if the instruction 'I' can be a recurrence variable of type 'Kind' for a ...
static LLVM_ABI bool AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes, DemandedBits *DB=nullptr, AssumptionCache *AC=nullptr, DominatorTree *DT=nullptr, ScalarEvolution *SE=nullptr)
Returns true if Phi is a reduction of type Kind and adds it to the RecurrenceDescriptor.
static LLVM_ABI InstDesc isFindPattern(Loop *TheLoop, PHINode *OrigPhi, Instruction *I, ScalarEvolution &SE)
Returns a struct describing whether the instruction is either a Select(ICmp(A, B),...
static LLVM_ABI bool isIntegerRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer kind.
static bool isIntMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is an integer min/max kind.
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
This node represents a polynomial recurrence on the trip count of the specified loop.
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
This class represents an analyzed expression in the program.
LLVM_ABI Type * getType() const
Return the LLVM type of this SCEV expression.
The main scalar evolution driver.
LLVM_ABI const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
LLVM_ABI bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
LLVM_ABI bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
LLVM_ABI const SCEV * getUnknown(Value *V)
This class represents the LLVM 'select' instruction.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
iterator_range< user_iterator > users()
Definition Value.h:427
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition Value.cpp:150
bool use_empty() const
Definition Value.h:347
const ParentTy * getParent() const
Definition ilist_node.h:34
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMinimum(const Opnd0 &Op0, const Opnd1 &Op1)
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > > m_OrdOrUnordFMin(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMaximum(const Opnd0 &Op0, const Opnd1 &Op1)
BinaryOp_match< LHS, RHS, Instruction::FAdd > m_FAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
match_combine_or< MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty >, MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > > m_OrdOrUnordFMax(const LHS &L, const RHS &R)
Match an 'ordered' or 'unordered' floating point maximum function.
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
specificloop_ty m_SpecificLoop(const Loop *L)
SCEVAffineAddRec_match< Op0_t, Op1_t, class_match< const Loop > > m_scev_AffineAddRec(const Op0_t &Op0, const Op1_t &Op1)
class_match< const SCEV > m_SCEV()
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
MachineInstr * getDef(const MachineOperand &MO, const MachineRegisterInfo *MRI)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
RecurKind
These are the kinds of recurrences that we support.
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
@ FMinimumNum
FP min with llvm.minimumnum semantics.
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
@ Or
Bitwise or logical OR of integers.
@ FMinimum
FP min with llvm.minimum semantics.
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
@ Mul
Product of integers.
@ None
Not a recurrence.
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
@ Xor
Bitwise or logical XOR of integers.
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
@ FMax
FP max implemented in terms of select(cmp()).
@ FMaximum
FP max with llvm.maximum semantics.
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
@ FMul
Product of floats.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
@ FMin
FP min implemented in terms of select(cmp()).
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
@ AddChainWithSubs
A chain of adds and subs.
@ FAdd
Sum of floats.
@ FMaximumNum
FP max with llvm.maximumnum semantics.
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
DWARFExpression::Operation Op
LLVM_ABI unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Return the number of times the sign bit of the register is replicated into the other bits.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static bool isMinOrMax(SelectPatternFlavor SPF)
When implementing this min/max pattern as fcmp; select, does the fcmp have to be ordered?