LLVM 23.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1//===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines vectorizer utilities.
10//
11//===----------------------------------------------------------------------===//
12
23#include "llvm/IR/Constants.h"
25#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Value.h"
30
31#define DEBUG_TYPE "vectorutils"
32
33using namespace llvm;
34using namespace llvm::PatternMatch;
35
36/// Maximum factor for an interleaved memory access.
38 "max-interleave-group-factor", cl::Hidden,
39 cl::desc("Maximum factor for an interleaved access group (default = 8)"),
40 cl::init(8));
41
42/// Return true if all of the intrinsic's arguments and return type are scalars
43/// for the scalar form of the intrinsic, and vectors for the vector form of the
44/// intrinsic (except operands that are marked as always being scalar by
45/// isVectorIntrinsicWithScalarOpAtArg).
47 switch (ID) {
48 case Intrinsic::abs: // Begin integer bit-manipulation.
49 case Intrinsic::bswap:
50 case Intrinsic::bitreverse:
51 case Intrinsic::ctpop:
52 case Intrinsic::ctlz:
53 case Intrinsic::cttz:
54 case Intrinsic::fshl:
55 case Intrinsic::fshr:
56 case Intrinsic::smax:
57 case Intrinsic::smin:
58 case Intrinsic::umax:
59 case Intrinsic::umin:
60 case Intrinsic::sadd_sat:
61 case Intrinsic::ssub_sat:
62 case Intrinsic::uadd_sat:
63 case Intrinsic::usub_sat:
64 case Intrinsic::smul_fix:
65 case Intrinsic::smul_fix_sat:
66 case Intrinsic::umul_fix:
67 case Intrinsic::umul_fix_sat:
68 case Intrinsic::uadd_with_overflow:
69 case Intrinsic::sadd_with_overflow:
70 case Intrinsic::usub_with_overflow:
71 case Intrinsic::ssub_with_overflow:
72 case Intrinsic::umul_with_overflow:
73 case Intrinsic::smul_with_overflow:
74 case Intrinsic::sqrt: // Begin floating-point.
75 case Intrinsic::asin:
76 case Intrinsic::acos:
77 case Intrinsic::atan:
78 case Intrinsic::atan2:
79 case Intrinsic::sin:
80 case Intrinsic::cos:
81 case Intrinsic::sincos:
82 case Intrinsic::sincospi:
83 case Intrinsic::tan:
84 case Intrinsic::sinh:
85 case Intrinsic::cosh:
86 case Intrinsic::tanh:
87 case Intrinsic::exp:
88 case Intrinsic::exp10:
89 case Intrinsic::exp2:
90 case Intrinsic::frexp:
91 case Intrinsic::ldexp:
92 case Intrinsic::log:
93 case Intrinsic::log10:
94 case Intrinsic::log2:
95 case Intrinsic::fabs:
96 case Intrinsic::minnum:
97 case Intrinsic::maxnum:
98 case Intrinsic::minimum:
99 case Intrinsic::maximum:
100 case Intrinsic::minimumnum:
101 case Intrinsic::maximumnum:
102 case Intrinsic::modf:
103 case Intrinsic::copysign:
104 case Intrinsic::floor:
105 case Intrinsic::ceil:
106 case Intrinsic::trunc:
107 case Intrinsic::rint:
108 case Intrinsic::nearbyint:
109 case Intrinsic::round:
110 case Intrinsic::roundeven:
111 case Intrinsic::pow:
112 case Intrinsic::fma:
113 case Intrinsic::fmuladd:
114 case Intrinsic::is_fpclass:
115 case Intrinsic::powi:
116 case Intrinsic::canonicalize:
117 case Intrinsic::fptosi_sat:
118 case Intrinsic::fptoui_sat:
119 case Intrinsic::lround:
120 case Intrinsic::llround:
121 case Intrinsic::lrint:
122 case Intrinsic::llrint:
123 case Intrinsic::ucmp:
124 case Intrinsic::scmp:
125 case Intrinsic::clmul:
126 return true;
127 default:
128 return false;
129 }
130}
131
133 const TargetTransformInfo *TTI) {
135 return true;
136
138 return TTI->isTargetIntrinsicTriviallyScalarizable(ID);
139
140 return false;
141}
142
143/// Identifies if the vector form of the intrinsic has a scalar operand.
145 unsigned ScalarOpdIdx,
146 const TargetTransformInfo *TTI) {
147
149 return TTI->isTargetIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx);
150
151 // Vector predication intrinsics have the EVL as the last operand.
152 if (VPIntrinsic::getVectorLengthParamPos(ID) == ScalarOpdIdx)
153 return true;
154
155 switch (ID) {
156 case Intrinsic::abs:
157 case Intrinsic::vp_abs:
158 case Intrinsic::ctlz:
159 case Intrinsic::vp_ctlz:
160 case Intrinsic::cttz:
161 case Intrinsic::vp_cttz:
162 case Intrinsic::is_fpclass:
163 case Intrinsic::vp_is_fpclass:
164 case Intrinsic::powi:
165 case Intrinsic::vector_extract:
166 return (ScalarOpdIdx == 1);
167 case Intrinsic::smul_fix:
168 case Intrinsic::smul_fix_sat:
169 case Intrinsic::umul_fix:
170 case Intrinsic::umul_fix_sat:
171 return (ScalarOpdIdx == 2);
172 case Intrinsic::experimental_vp_splice:
173 return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
174 default:
175 return false;
176 }
177}
178
180 Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI) {
181 assert(ID != Intrinsic::not_intrinsic && "Not an intrinsic!");
182
184 return TTI->isTargetIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);
185
187 return OpdIdx == -1 || OpdIdx == 0;
188
189 switch (ID) {
190 case Intrinsic::fptosi_sat:
191 case Intrinsic::fptoui_sat:
192 case Intrinsic::lround:
193 case Intrinsic::llround:
194 case Intrinsic::lrint:
195 case Intrinsic::llrint:
196 case Intrinsic::vp_lrint:
197 case Intrinsic::vp_llrint:
198 case Intrinsic::ucmp:
199 case Intrinsic::scmp:
200 case Intrinsic::vector_extract:
201 return OpdIdx == -1 || OpdIdx == 0;
202 case Intrinsic::modf:
203 case Intrinsic::sincos:
204 case Intrinsic::sincospi:
205 case Intrinsic::is_fpclass:
206 case Intrinsic::vp_is_fpclass:
207 return OpdIdx == 0;
208 case Intrinsic::powi:
209 case Intrinsic::ldexp:
210 return OpdIdx == -1 || OpdIdx == 1;
211 default:
212 return OpdIdx == -1;
213 }
214}
215
217 Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI) {
218
220 return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);
221
222 switch (ID) {
223 case Intrinsic::frexp:
224 return RetIdx == 0 || RetIdx == 1;
225 default:
226 return RetIdx == 0;
227 }
228}
229
230/// Returns intrinsic ID for call.
231/// For the input call instruction it finds mapping intrinsic and returns
232/// its ID, in case it does not found it return not_intrinsic.
234 const TargetLibraryInfo *TLI) {
238
239 if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
240 ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
241 ID == Intrinsic::experimental_noalias_scope_decl ||
242 ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
243 return ID;
245}
246
248 switch (ID) {
249 case Intrinsic::vector_interleave2:
250 return 2;
251 case Intrinsic::vector_interleave3:
252 return 3;
253 case Intrinsic::vector_interleave4:
254 return 4;
255 case Intrinsic::vector_interleave5:
256 return 5;
257 case Intrinsic::vector_interleave6:
258 return 6;
259 case Intrinsic::vector_interleave7:
260 return 7;
261 case Intrinsic::vector_interleave8:
262 return 8;
263 default:
264 return 0;
265 }
266}
267
269 switch (ID) {
270 case Intrinsic::vector_deinterleave2:
271 return 2;
272 case Intrinsic::vector_deinterleave3:
273 return 3;
274 case Intrinsic::vector_deinterleave4:
275 return 4;
276 case Intrinsic::vector_deinterleave5:
277 return 5;
278 case Intrinsic::vector_deinterleave6:
279 return 6;
280 case Intrinsic::vector_deinterleave7:
281 return 7;
282 case Intrinsic::vector_deinterleave8:
283 return 8;
284 default:
285 return 0;
286 }
287}
288
290 [[maybe_unused]] unsigned Factor =
292 ArrayRef<Type *> DISubtypes = DI->getType()->subtypes();
293 assert(Factor && Factor == DISubtypes.size() &&
294 "unexpected deinterleave factor or result type");
295 return cast<VectorType>(DISubtypes[0]);
296}
297
298/// Given a vector and an element number, see if the scalar value is
299/// already around as a register, for example if it were inserted then extracted
300/// from the vector.
301Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
302 assert(V->getType()->isVectorTy() && "Not looking at a vector?");
303 VectorType *VTy = cast<VectorType>(V->getType());
304 // For fixed-length vector, return poison for out of range access.
305 if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
306 unsigned Width = FVTy->getNumElements();
307 if (EltNo >= Width)
308 return PoisonValue::get(FVTy->getElementType());
309 }
310
311 if (Constant *C = dyn_cast<Constant>(V))
312 return C->getAggregateElement(EltNo);
313
315 // If this is an insert to a variable element, we don't know what it is.
316 uint64_t IIElt;
317 if (!match(III->getOperand(2), m_ConstantInt(IIElt)))
318 return nullptr;
319
320 // If this is an insert to the element we are looking for, return the
321 // inserted value.
322 if (EltNo == IIElt)
323 return III->getOperand(1);
324
325 // Guard against infinite loop on malformed, unreachable IR.
326 if (III == III->getOperand(0))
327 return nullptr;
328
329 // Otherwise, the insertelement doesn't modify the value, recurse on its
330 // vector input.
331 return findScalarElement(III->getOperand(0), EltNo);
332 }
333
335 // Restrict the following transformation to fixed-length vector.
336 if (SVI && isa<FixedVectorType>(SVI->getType())) {
337 unsigned LHSWidth =
338 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
339 int InEl = SVI->getMaskValue(EltNo);
340 if (InEl < 0)
341 return PoisonValue::get(VTy->getElementType());
342 if (InEl < (int)LHSWidth)
343 return findScalarElement(SVI->getOperand(0), InEl);
344 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
345 }
346
347 // Extract a value from a vector add operation with a constant zero.
348 // TODO: Use getBinOpIdentity() to generalize this.
349 Value *Val; Constant *C;
350 if (match(V, m_Add(m_Value(Val), m_Constant(C))))
351 if (Constant *Elt = C->getAggregateElement(EltNo))
352 if (Elt->isNullValue())
353 return findScalarElement(Val, EltNo);
354
355 // If the vector is a splat then we can trivially find the scalar element.
357 if (Value *Splat = getSplatValue(V))
358 if (EltNo < VTy->getElementCount().getKnownMinValue())
359 return Splat;
360
361 // Otherwise, we don't know.
362 return nullptr;
363}
364
366 int SplatIndex = -1;
367 for (int M : Mask) {
368 // Ignore invalid (undefined) mask elements.
369 if (M < 0)
370 continue;
371
372 // There can be only 1 non-negative mask element value if this is a splat.
373 if (SplatIndex != -1 && SplatIndex != M)
374 return -1;
375
376 // Initialize the splat index to the 1st non-negative mask element.
377 SplatIndex = M;
378 }
379 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
380 return SplatIndex;
381}
382
383/// Get splat value if the input is a splat vector or return nullptr.
384/// This function is not fully general. It checks only 2 cases:
385/// the input value is (1) a splat constant vector or (2) a sequence
386/// of instructions that broadcasts a scalar at element 0.
388 if (isa<VectorType>(V->getType()))
389 if (auto *C = dyn_cast<Constant>(V))
390 return C->getSplatValue();
391
392 // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
393 Value *Splat;
394 if (match(V,
396 m_Value(), m_ZeroMask())))
397 return Splat;
398
399 return nullptr;
400}
401
402bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
403 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
404
405 if (isa<VectorType>(V->getType())) {
406 if (isa<UndefValue>(V))
407 return true;
408 // FIXME: We can allow undefs, but if Index was specified, we may want to
409 // check that the constant is defined at that index.
410 if (auto *C = dyn_cast<Constant>(V))
411 return C->getSplatValue() != nullptr;
412 }
413
414 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
415 // FIXME: We can safely allow undefs here. If Index was specified, we will
416 // check that the mask elt is defined at the required index.
417 if (!all_equal(Shuf->getShuffleMask()))
418 return false;
419
420 // Match any index.
421 if (Index == -1)
422 return true;
423
424 // Match a specific element. The mask should be defined at and match the
425 // specified index.
426 return Shuf->getMaskValue(Index) == Index;
427 }
428
429 // The remaining tests are all recursive, so bail out if we hit the limit.
431 return false;
432
433 // If both operands of a binop are splats, the result is a splat.
434 Value *X, *Y, *Z;
435 if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
436 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
437
438 // If all operands of a select are splats, the result is a splat.
439 if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
440 return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
441 isSplatValue(Z, Index, Depth);
442
443 // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
444
445 return false;
446}
447
449 const APInt &DemandedElts, APInt &DemandedLHS,
450 APInt &DemandedRHS, bool AllowUndefElts) {
451 DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);
452
453 // Early out if we don't demand any elements.
454 if (DemandedElts.isZero())
455 return true;
456
457 // Simple case of a shuffle with zeroinitializer.
458 if (all_of(Mask, equal_to(0))) {
459 DemandedLHS.setBit(0);
460 return true;
461 }
462
463 for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
464 int M = Mask[I];
465 assert((-1 <= M) && (M < (SrcWidth * 2)) &&
466 "Invalid shuffle mask constant");
467
468 if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))
469 continue;
470
471 // For undef elements, we don't know anything about the common state of
472 // the shuffle result.
473 if (M < 0)
474 return false;
475
476 if (M < SrcWidth)
477 DemandedLHS.setBit(M);
478 else
479 DemandedRHS.setBit(M - SrcWidth);
480 }
481
482 return true;
483}
484
486 std::array<std::pair<int, int>, 2> &SrcInfo) {
487 const int SignalValue = NumElts * 2;
488 SrcInfo[0] = {-1, SignalValue};
489 SrcInfo[1] = {-1, SignalValue};
490 for (auto [i, M] : enumerate(Mask)) {
491 if (M < 0)
492 continue;
493 int Src = M >= NumElts;
494 int Diff = (int)i - (M % NumElts);
495 bool Match = false;
496 for (int j = 0; j < 2; j++) {
497 auto &[SrcE, DiffE] = SrcInfo[j];
498 if (SrcE == -1) {
499 assert(DiffE == SignalValue);
500 SrcE = Src;
501 DiffE = Diff;
502 }
503 if (SrcE == Src && DiffE == Diff) {
504 Match = true;
505 break;
506 }
507 }
508 if (!Match)
509 return false;
510 }
511 // Avoid all undef masks
512 return SrcInfo[0].first != -1;
513}
514
516 SmallVectorImpl<int> &ScaledMask) {
517 assert(Scale > 0 && "Unexpected scaling factor");
518
519 // Fast-path: if no scaling, then it is just a copy.
520 if (Scale == 1) {
521 ScaledMask.assign(Mask.begin(), Mask.end());
522 return;
523 }
524
525 ScaledMask.clear();
526 for (int MaskElt : Mask) {
527 if (MaskElt >= 0) {
528 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
529 "Overflowed 32-bits");
530 }
531 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
532 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
533 }
534}
535
537 SmallVectorImpl<int> &ScaledMask) {
538 assert(Scale > 0 && "Unexpected scaling factor");
539
540 // Fast-path: if no scaling, then it is just a copy.
541 if (Scale == 1) {
542 ScaledMask.assign(Mask.begin(), Mask.end());
543 return true;
544 }
545
546 // We must map the original elements down evenly to a type with less elements.
547 int NumElts = Mask.size();
548 if (NumElts % Scale != 0)
549 return false;
550
551 ScaledMask.clear();
552 ScaledMask.reserve(NumElts / Scale);
553
554 // Step through the input mask by splitting into Scale-sized slices.
555 do {
556 ArrayRef<int> MaskSlice = Mask.take_front(Scale);
557 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
558
559 // The first element of the slice determines how we evaluate this slice.
560 int SliceFront = MaskSlice.front();
561 if (SliceFront < 0) {
562 // Negative values (undef or other "sentinel" values) must be equal across
563 // the entire slice.
564 if (!all_equal(MaskSlice))
565 return false;
566 ScaledMask.push_back(SliceFront);
567 } else {
568 // A positive mask element must be cleanly divisible.
569 if (SliceFront % Scale != 0)
570 return false;
571 // Elements of the slice must be consecutive.
572 for (int i = 1; i < Scale; ++i)
573 if (MaskSlice[i] != SliceFront + i)
574 return false;
575 ScaledMask.push_back(SliceFront / Scale);
576 }
577 Mask = Mask.drop_front(Scale);
578 } while (!Mask.empty());
579
580 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
581
582 // All elements of the original mask can be scaled down to map to the elements
583 // of a mask with wider elements.
584 return true;
585}
586
588 SmallVectorImpl<int> &NewMask) {
589 unsigned NumElts = M.size();
590 if (NumElts % 2 != 0)
591 return false;
592
593 NewMask.clear();
594 for (unsigned i = 0; i < NumElts; i += 2) {
595 int M0 = M[i];
596 int M1 = M[i + 1];
597
598 // If both elements are undef, new mask is undef too.
599 if (M0 == -1 && M1 == -1) {
600 NewMask.push_back(-1);
601 continue;
602 }
603
604 if (M0 == -1 && M1 != -1 && (M1 % 2) == 1) {
605 NewMask.push_back(M1 / 2);
606 continue;
607 }
608
609 if (M0 != -1 && (M0 % 2) == 0 && ((M0 + 1) == M1 || M1 == -1)) {
610 NewMask.push_back(M0 / 2);
611 continue;
612 }
613
614 NewMask.clear();
615 return false;
616 }
617
618 assert(NewMask.size() == NumElts / 2 && "Incorrect size for mask!");
619 return true;
620}
621
622bool llvm::scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,
623 SmallVectorImpl<int> &ScaledMask) {
624 unsigned NumSrcElts = Mask.size();
625 assert(NumSrcElts > 0 && NumDstElts > 0 && "Unexpected scaling factor");
626
627 // Fast-path: if no scaling, then it is just a copy.
628 if (NumSrcElts == NumDstElts) {
629 ScaledMask.assign(Mask.begin(), Mask.end());
630 return true;
631 }
632
633 // Ensure we can find a whole scale factor.
634 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&
635 "Unexpected scaling factor");
636
637 if (NumSrcElts > NumDstElts) {
638 int Scale = NumSrcElts / NumDstElts;
639 return widenShuffleMaskElts(Scale, Mask, ScaledMask);
640 }
641
642 int Scale = NumDstElts / NumSrcElts;
643 narrowShuffleMaskElts(Scale, Mask, ScaledMask);
644 return true;
645}
646
648 SmallVectorImpl<int> &ScaledMask) {
649 std::array<SmallVector<int, 16>, 2> TmpMasks;
650 SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];
651 ArrayRef<int> InputMask = Mask;
652 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {
653 while (widenShuffleMaskElts(Scale, InputMask, *Output)) {
654 InputMask = *Output;
655 std::swap(Output, Tmp);
656 }
657 }
658 ScaledMask.assign(InputMask.begin(), InputMask.end());
659}
660
662 ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
663 unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
664 function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
665 function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>
666 ManyInputsAction) {
667 SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
668 // Try to perform better estimation of the permutation.
669 // 1. Split the source/destination vectors into real registers.
670 // 2. Do the mask analysis to identify which real registers are
671 // permuted.
672 int Sz = Mask.size();
673 unsigned SzDest = Sz / NumOfDestRegs;
674 unsigned SzSrc = Sz / NumOfSrcRegs;
675 for (unsigned I = 0; I < NumOfDestRegs; ++I) {
676 auto &RegMasks = Res[I];
677 RegMasks.assign(2 * NumOfSrcRegs, {});
678 // Check that the values in dest registers are in the one src
679 // register.
680 for (unsigned K = 0; K < SzDest; ++K) {
681 int Idx = I * SzDest + K;
682 if (Idx == Sz)
683 break;
684 if (Mask[Idx] >= 2 * Sz || Mask[Idx] == PoisonMaskElem)
685 continue;
686 int MaskIdx = Mask[Idx] % Sz;
687 int SrcRegIdx = MaskIdx / SzSrc + (Mask[Idx] >= Sz ? NumOfSrcRegs : 0);
688 // Add a cost of PermuteTwoSrc for each new source register permute,
689 // if we have more than one source registers.
690 if (RegMasks[SrcRegIdx].empty())
691 RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);
692 RegMasks[SrcRegIdx][K] = MaskIdx % SzSrc;
693 }
694 }
695 // Process split mask.
696 for (unsigned I : seq<unsigned>(NumOfUsedRegs)) {
697 auto &Dest = Res[I];
698 int NumSrcRegs =
699 count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
700 switch (NumSrcRegs) {
701 case 0:
702 // No input vectors were used!
703 NoInputAction();
704 break;
705 case 1: {
706 // Find the only mask with at least single undef mask elem.
707 auto *It =
708 find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
709 unsigned SrcReg = std::distance(Dest.begin(), It);
710 SingleInputAction(*It, SrcReg, I);
711 break;
712 }
713 default: {
714 // The first mask is a permutation of a single register. Since we have >2
715 // input registers to shuffle, we merge the masks for 2 first registers
716 // and generate a shuffle of 2 registers rather than the reordering of the
717 // first register and then shuffle with the second register. Next,
718 // generate the shuffles of the resulting register + the remaining
719 // registers from the list.
720 auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
721 ArrayRef<int> SecondMask) {
722 for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
723 if (SecondMask[Idx] != PoisonMaskElem) {
724 assert(FirstMask[Idx] == PoisonMaskElem &&
725 "Expected undefined mask element.");
726 FirstMask[Idx] = SecondMask[Idx] + VF;
727 }
728 }
729 };
730 auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
731 for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
732 if (Mask[Idx] != PoisonMaskElem)
733 Mask[Idx] = Idx;
734 }
735 };
736 int SecondIdx;
737 bool NewReg = true;
738 do {
739 int FirstIdx = -1;
740 SecondIdx = -1;
741 MutableArrayRef<int> FirstMask, SecondMask;
742 for (unsigned I : seq<unsigned>(2 * NumOfSrcRegs)) {
743 SmallVectorImpl<int> &RegMask = Dest[I];
744 if (RegMask.empty())
745 continue;
746
747 if (FirstIdx == SecondIdx) {
748 FirstIdx = I;
749 FirstMask = RegMask;
750 continue;
751 }
752 SecondIdx = I;
753 SecondMask = RegMask;
754 CombineMasks(FirstMask, SecondMask);
755 ManyInputsAction(FirstMask, FirstIdx, SecondIdx, NewReg);
756 NewReg = false;
757 NormalizeMask(FirstMask);
758 RegMask.clear();
759 SecondMask = FirstMask;
760 SecondIdx = FirstIdx;
761 }
762 if (FirstIdx != SecondIdx && SecondIdx >= 0) {
763 CombineMasks(SecondMask, FirstMask);
764 ManyInputsAction(SecondMask, SecondIdx, FirstIdx, NewReg);
765 NewReg = false;
766 Dest[FirstIdx].clear();
767 NormalizeMask(SecondMask);
768 }
769 } while (SecondIdx >= 0);
770 break;
771 }
772 }
773 }
774}
775
776void llvm::getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,
777 const APInt &DemandedElts,
778 APInt &DemandedLHS,
779 APInt &DemandedRHS) {
780 assert(VectorBitWidth >= 128 && "Vectors smaller than 128 bit not supported");
781 int NumLanes = VectorBitWidth / 128;
782 int NumElts = DemandedElts.getBitWidth();
783 int NumEltsPerLane = NumElts / NumLanes;
784 int HalfEltsPerLane = NumEltsPerLane / 2;
785
786 DemandedLHS = APInt::getZero(NumElts);
787 DemandedRHS = APInt::getZero(NumElts);
788
789 // Map DemandedElts to the horizontal operands.
790 for (int Idx = 0; Idx != NumElts; ++Idx) {
791 if (!DemandedElts[Idx])
792 continue;
793 int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane;
794 int LocalIdx = Idx % NumEltsPerLane;
795 if (LocalIdx < HalfEltsPerLane) {
796 DemandedLHS.setBit(LaneIdx + 2 * LocalIdx);
797 } else {
798 LocalIdx -= HalfEltsPerLane;
799 DemandedRHS.setBit(LaneIdx + 2 * LocalIdx);
800 }
801 }
802}
803
806 const TargetTransformInfo *TTI) {
807
808 // DemandedBits will give us every value's live-out bits. But we want
809 // to ensure no extra casts would need to be inserted, so every DAG
810 // of connected values must have the same minimum bitwidth.
816 SmallPtrSet<Instruction *, 4> InstructionSet;
818
819 // Determine the roots. We work bottom-up, from truncs or icmps.
820 bool SeenExtFromIllegalType = false;
821 for (auto *BB : Blocks)
822 for (auto &I : *BB) {
823 InstructionSet.insert(&I);
824
825 if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
826 !TTI->isTypeLegal(I.getOperand(0)->getType()))
827 SeenExtFromIllegalType = true;
828
829 // Only deal with non-vector integers up to 64-bits wide.
830 if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
831 !I.getType()->isVectorTy() &&
832 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
833 // Don't make work for ourselves. If we know the loaded type is legal,
834 // don't add it to the worklist.
835 if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
836 continue;
837
838 Worklist.push_back(&I);
839 Roots.insert(&I);
840 }
841 }
842 // Early exit.
843 if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
844 return MinBWs;
845
846 // Now proceed breadth-first, unioning values together.
847 while (!Worklist.empty()) {
848 Instruction *I = Worklist.pop_back_val();
849 Value *Leader = ECs.getOrInsertLeaderValue(I);
850
851 if (!Visited.insert(I).second)
852 continue;
853
854 // If we encounter a type that is larger than 64 bits, we can't represent
855 // it so bail out.
856 if (DB.getDemandedBits(I).getBitWidth() > 64)
858
859 uint64_t V = DB.getDemandedBits(I).getZExtValue();
860 DBits[Leader] |= V;
861 DBits[I] = V;
862
863 // Casts, loads and instructions outside of our range terminate a chain
864 // successfully.
866 !InstructionSet.count(I))
867 continue;
868
869 // Unsafe casts terminate a chain unsuccessfully. We can't do anything
870 // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
871 // transform anything that relies on them.
873 !I->getType()->isIntegerTy()) {
874 DBits[Leader] |= ~0ULL;
875 continue;
876 }
877
878 // We don't modify the types of PHIs. Reductions will already have been
879 // truncated if possible, and inductions' sizes will have been chosen by
880 // indvars.
881 if (isa<PHINode>(I))
882 continue;
883
884 // Don't modify the types of operands of a call, as doing that would cause a
885 // signature mismatch.
886 if (isa<CallBase>(I))
887 continue;
888
889 if (DBits[Leader] == ~0ULL)
890 // All bits demanded, no point continuing.
891 continue;
892
893 for (Value *O : I->operands()) {
894 ECs.unionSets(Leader, O);
895 if (auto *OI = dyn_cast<Instruction>(O))
896 Worklist.push_back(OI);
897 }
898 }
899
900 // Now we've discovered all values, walk them to see if there are
901 // any users we didn't see. If there are, we can't optimize that
902 // chain.
903 for (auto &I : DBits)
904 for (auto *U : I.first->users())
905 if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
906 DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
907
908 for (const auto &E : ECs) {
909 if (!E->isLeader())
910 continue;
911 uint64_t LeaderDemandedBits = 0;
912 for (Value *M : ECs.members(*E))
913 LeaderDemandedBits |= DBits[M];
914
915 uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);
916 // Round up to a power of 2
917 MinBW = llvm::bit_ceil(MinBW);
918
919 // We don't modify the types of PHIs. Reductions will already have been
920 // truncated if possible, and inductions' sizes will have been chosen by
921 // indvars.
922 // If we are required to shrink a PHI, abandon this entire equivalence class.
923 bool Abort = false;
924 for (Value *M : ECs.members(*E))
925 if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
926 Abort = true;
927 break;
928 }
929 if (Abort)
930 continue;
931
932 for (Value *M : ECs.members(*E)) {
933 auto *MI = dyn_cast<Instruction>(M);
934 if (!MI)
935 continue;
936 Type *Ty = M->getType();
937 if (Roots.count(MI))
938 Ty = MI->getOperand(0)->getType();
939
940 if (MinBW >= Ty->getScalarSizeInBits())
941 continue;
942
943 // If any of M's operands demand more bits than MinBW then M cannot be
944 // performed safely in MinBW.
945 auto *Call = dyn_cast<CallBase>(MI);
946 auto Ops = Call ? Call->args() : MI->operands();
947 if (any_of(Ops, [&DB, MinBW](Use &U) {
948 auto *CI = dyn_cast<ConstantInt>(U);
949 // For constants shift amounts, check if the shift would result in
950 // poison.
951 if (CI &&
953 U.getOperandNo() == 1)
954 return CI->uge(MinBW);
955 uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
956 return bit_ceil(BW) > MinBW;
957 }))
958 continue;
959
960 MinBWs[MI] = MinBW;
961 }
962 }
963
964 return MinBWs;
965}
966
967/// Add all access groups in @p AccGroups to @p List.
968template <typename ListT>
969static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
970 // Interpret an access group as a list containing itself.
971 if (AccGroups->getNumOperands() == 0) {
972 assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
973 List.insert(AccGroups);
974 return;
975 }
976
977 for (const auto &AccGroupListOp : AccGroups->operands()) {
978 auto *Item = cast<MDNode>(AccGroupListOp.get());
979 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
980 List.insert(Item);
981 }
982}
983
984MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
985 if (!AccGroups1)
986 return AccGroups2;
987 if (!AccGroups2)
988 return AccGroups1;
989 if (AccGroups1 == AccGroups2)
990 return AccGroups1;
991
993 addToAccessGroupList(Union, AccGroups1);
994 addToAccessGroupList(Union, AccGroups2);
995
996 if (Union.size() == 0)
997 return nullptr;
998 if (Union.size() == 1)
999 return cast<MDNode>(Union.front());
1000
1001 LLVMContext &Ctx = AccGroups1->getContext();
1002 return MDNode::get(Ctx, Union.getArrayRef());
1003}
1004
1006 const Instruction *Inst2) {
1007 bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
1008 bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
1009
1010 if (!MayAccessMem1 && !MayAccessMem2)
1011 return nullptr;
1012 if (!MayAccessMem1)
1013 return Inst2->getMetadata(LLVMContext::MD_access_group);
1014 if (!MayAccessMem2)
1015 return Inst1->getMetadata(LLVMContext::MD_access_group);
1016
1017 MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
1018 MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
1019 if (!MD1 || !MD2)
1020 return nullptr;
1021 if (MD1 == MD2)
1022 return MD1;
1023
1024 // Use set for scalable 'contains' check.
1025 SmallPtrSet<Metadata *, 4> AccGroupSet2;
1026 addToAccessGroupList(AccGroupSet2, MD2);
1027
1028 SmallVector<Metadata *, 4> Intersection;
1029 if (MD1->getNumOperands() == 0) {
1030 assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
1031 if (AccGroupSet2.count(MD1))
1032 Intersection.push_back(MD1);
1033 } else {
1034 for (const MDOperand &Node : MD1->operands()) {
1035 auto *Item = cast<MDNode>(Node.get());
1036 assert(isValidAsAccessGroup(Item) && "List item must be an access group");
1037 if (AccGroupSet2.count(Item))
1038 Intersection.push_back(Item);
1039 }
1040 }
1041
1042 if (Intersection.size() == 0)
1043 return nullptr;
1044 if (Intersection.size() == 1)
1045 return cast<MDNode>(Intersection.front());
1046
1047 LLVMContext &Ctx = Inst1->getContext();
1048 return MDNode::get(Ctx, Intersection);
1049}
1050
1051/// Add metadata from \p Inst to \p Metadata, if it can be preserved after
1052/// vectorization.
1054 Instruction *Inst,
1055 SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata) {
1057 static const unsigned SupportedIDs[] = {
1058 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1059 LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
1060 LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
1061 LLVMContext::MD_access_group, LLVMContext::MD_mmra};
1062
1063 // Remove any unsupported metadata kinds from Metadata.
1064 for (unsigned Idx = 0; Idx != Metadata.size();) {
1065 if (is_contained(SupportedIDs, Metadata[Idx].first)) {
1066 ++Idx;
1067 } else {
1068 // Swap element to end and remove it.
1069 std::swap(Metadata[Idx], Metadata.back());
1070 Metadata.pop_back();
1071 }
1072 }
1073}
1074
1075/// \returns \p I after propagating metadata from \p VL.
1077 if (VL.empty())
1078 return Inst;
1081
1082 for (auto &[Kind, MD] : Metadata) {
1083 // Skip MMRA metadata if the instruction cannot have it.
1084 if (Kind == LLVMContext::MD_mmra && !canInstructionHaveMMRAs(*Inst))
1085 continue;
1086
1087 for (int J = 1, E = VL.size(); MD && J != E; ++J) {
1088 const Instruction *IJ = cast<Instruction>(VL[J]);
1089 MDNode *IMD = IJ->getMetadata(Kind);
1090
1091 switch (Kind) {
1092 case LLVMContext::MD_mmra: {
1093 MD = MMRAMetadata::combine(Inst->getContext(), MD, IMD);
1094 break;
1095 }
1096 case LLVMContext::MD_tbaa:
1097 MD = MDNode::getMostGenericTBAA(MD, IMD);
1098 break;
1099 case LLVMContext::MD_alias_scope:
1101 break;
1102 case LLVMContext::MD_fpmath:
1103 MD = MDNode::getMostGenericFPMath(MD, IMD);
1104 break;
1105 case LLVMContext::MD_noalias:
1106 case LLVMContext::MD_nontemporal:
1107 case LLVMContext::MD_invariant_load:
1108 MD = MDNode::intersect(MD, IMD);
1109 break;
1110 case LLVMContext::MD_access_group:
1111 MD = intersectAccessGroups(Inst, IJ);
1112 break;
1113 default:
1114 llvm_unreachable("unhandled metadata");
1115 }
1116 }
1117
1118 Inst->setMetadata(Kind, MD);
1119 }
1120
1121 return Inst;
1122}
1123
1124Constant *
1126 const InterleaveGroup<Instruction> &Group) {
1127 // All 1's means mask is not needed.
1128 if (Group.isFull())
1129 return nullptr;
1130
1131 // TODO: support reversed access.
1132 assert(!Group.isReverse() && "Reversed group not supported.");
1133
1135 for (unsigned i = 0; i < VF; i++)
1136 for (unsigned j = 0; j < Group.getFactor(); ++j) {
1137 unsigned HasMember = Group.getMember(j) ? 1 : 0;
1138 Mask.push_back(Builder.getInt1(HasMember));
1139 }
1140
1141 return ConstantVector::get(Mask);
1142}
1143
1145llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
1146 SmallVector<int, 16> MaskVec;
1147 for (unsigned i = 0; i < VF; i++)
1148 for (unsigned j = 0; j < ReplicationFactor; j++)
1149 MaskVec.push_back(i);
1150
1151 return MaskVec;
1152}
1153
1155 unsigned NumVecs) {
1157 for (unsigned i = 0; i < VF; i++)
1158 for (unsigned j = 0; j < NumVecs; j++)
1159 Mask.push_back(j * VF + i);
1160
1161 return Mask;
1162}
1163
1165llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
1167 for (unsigned i = 0; i < VF; i++)
1168 Mask.push_back(Start + i * Stride);
1169
1170 return Mask;
1171}
1172
1174 unsigned NumInts,
1175 unsigned NumUndefs) {
1177 for (unsigned i = 0; i < NumInts; i++)
1178 Mask.push_back(Start + i);
1179
1180 for (unsigned i = 0; i < NumUndefs; i++)
1181 Mask.push_back(-1);
1182
1183 return Mask;
1184}
1185
1187 unsigned NumElts) {
1188 // Avoid casts in the loop and make sure we have a reasonable number.
1189 int NumEltsSigned = NumElts;
1190 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
1191
1192 // If the mask chooses an element from operand 1, reduce it to choose from the
1193 // corresponding element of operand 0. Undef mask elements are unchanged.
1194 SmallVector<int, 16> UnaryMask;
1195 for (int MaskElt : Mask) {
1196 assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
1197 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1198 UnaryMask.push_back(UnaryElt);
1199 }
1200 return UnaryMask;
1201}
1202
1203/// A helper function for concatenating vectors. This function concatenates two
1204/// vectors having the same element type. If the second vector has fewer
1205/// elements than the first, it is padded with undefs.
1207 Value *V2) {
1208 VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
1209 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
1210 assert(VecTy1 && VecTy2 &&
1211 VecTy1->getScalarType() == VecTy2->getScalarType() &&
1212 "Expect two vectors with the same element type");
1213
1214 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1215 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1216 assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
1217
1218 if (NumElts1 > NumElts2) {
1219 // Extend with UNDEFs.
1220 V2 = Builder.CreateShuffleVector(
1221 V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
1222 }
1223
1224 return Builder.CreateShuffleVector(
1225 V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
1226}
1227
1229 ArrayRef<Value *> Vecs) {
1230 unsigned NumVecs = Vecs.size();
1231 assert(NumVecs > 1 && "Should be at least two vectors");
1232
1234 ResList.append(Vecs.begin(), Vecs.end());
1235 do {
1237 for (unsigned i = 0; i < NumVecs - 1; i += 2) {
1238 Value *V0 = ResList[i], *V1 = ResList[i + 1];
1239 assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
1240 "Only the last vector may have a different type");
1241
1242 TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
1243 }
1244
1245 // Push the last vector if the total number of vectors is odd.
1246 if (NumVecs % 2 != 0)
1247 TmpList.push_back(ResList[NumVecs - 1]);
1248
1249 ResList = TmpList;
1250 NumVecs = ResList.size();
1251 } while (NumVecs > 1);
1252
1253 return ResList[0];
1254}
1255
1257 assert(isa<VectorType>(Mask->getType()) &&
1258 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1259 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1260 1 &&
1261 "Mask must be a vector of i1");
1262
1263 auto *ConstMask = dyn_cast<Constant>(Mask);
1264 if (!ConstMask)
1265 return false;
1266 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1267 return true;
1268 if (isa<ScalableVectorType>(ConstMask->getType()))
1269 return false;
1270 for (unsigned
1271 I = 0,
1272 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1273 I != E; ++I) {
1274 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1275 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1276 continue;
1277 return false;
1278 }
1279 return true;
1280}
1281
1283 assert(isa<VectorType>(Mask->getType()) &&
1284 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1285 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1286 1 &&
1287 "Mask must be a vector of i1");
1288
1289 auto *ConstMask = dyn_cast<Constant>(Mask);
1290 if (!ConstMask)
1291 return false;
1292 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1293 return true;
1294 if (isa<ScalableVectorType>(ConstMask->getType()))
1295 return false;
1296 for (unsigned
1297 I = 0,
1298 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1299 I != E; ++I) {
1300 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1301 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1302 continue;
1303 return false;
1304 }
1305 return true;
1306}
1307
1309 assert(isa<VectorType>(Mask->getType()) &&
1310 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1311 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1312 1 &&
1313 "Mask must be a vector of i1");
1314
1315 auto *ConstMask = dyn_cast<Constant>(Mask);
1316 if (!ConstMask)
1317 return false;
1318 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1319 return true;
1320 if (isa<ScalableVectorType>(ConstMask->getType()))
1321 return false;
1322 for (unsigned
1323 I = 0,
1324 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1325 I != E; ++I) {
1326 if (auto *MaskElt = ConstMask->getAggregateElement(I))
1327 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1328 return true;
1329 }
1330 return false;
1331}
1332
1333/// TODO: This is a lot like known bits, but for
1334/// vectors. Is there something we can common this with?
1336 assert(isa<FixedVectorType>(Mask->getType()) &&
1337 isa<IntegerType>(Mask->getType()->getScalarType()) &&
1338 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1339 1 &&
1340 "Mask must be a fixed width vector of i1");
1341
1342 const unsigned VWidth =
1343 cast<FixedVectorType>(Mask->getType())->getNumElements();
1344 APInt DemandedElts = APInt::getAllOnes(VWidth);
1345 if (auto *CV = dyn_cast<ConstantVector>(Mask))
1346 for (unsigned i = 0; i < VWidth; i++)
1347 if (CV->getAggregateElement(i)->isNullValue())
1348 DemandedElts.clearBit(i);
1349 return DemandedElts;
1350}
1351
1352bool InterleavedAccessInfo::isStrided(int Stride) {
1353 unsigned Factor = std::abs(Stride);
1354 return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
1355}
1356
1357void InterleavedAccessInfo::collectConstStrideAccesses(
1359 const DenseMap<Value*, const SCEV*> &Strides) {
1360 auto &DL = TheLoop->getHeader()->getDataLayout();
1361
1362 // Since it's desired that the load/store instructions be maintained in
1363 // "program order" for the interleaved access analysis, we have to visit the
1364 // blocks in the loop in reverse postorder (i.e., in a topological order).
1365 // Such an ordering will ensure that any load/store that may be executed
1366 // before a second load/store will precede the second load/store in
1367 // AccessStrideInfo.
1368 LoopBlocksDFS DFS(TheLoop);
1369 DFS.perform(LI);
1370 for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
1371 for (auto &I : *BB) {
1373 if (!Ptr)
1374 continue;
1375 Type *ElementTy = getLoadStoreType(&I);
1376
1377 // Currently, codegen doesn't support cases where the type size doesn't
1378 // match the alloc size. Skip them for now.
1379 uint64_t Size = DL.getTypeAllocSize(ElementTy);
1380 if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
1381 continue;
1382
1383 // We don't check wrapping here because we don't know yet if Ptr will be
1384 // part of a full group or a group with gaps. Checking wrapping for all
1385 // pointers (even those that end up in groups with no gaps) will be overly
1386 // conservative. For full groups, wrapping should be ok since if we would
1387 // wrap around the address space we would do a memory access at nullptr
1388 // even without the transformation. The wrapping checks are therefore
1389 // deferred until after we've formed the interleaved groups.
1390 int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides,
1391 /*Assume=*/true, /*ShouldCheckWrap=*/false)
1392 .value_or(0);
1393
1394 const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1395 AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
1397 }
1398}
1399
1400// Analyze interleaved accesses and collect them into interleaved load and
1401// store groups.
1402//
1403// When generating code for an interleaved load group, we effectively hoist all
1404// loads in the group to the location of the first load in program order. When
1405// generating code for an interleaved store group, we sink all stores to the
1406// location of the last store. This code motion can change the order of load
1407// and store instructions and may break dependences.
1408//
1409// The code generation strategy mentioned above ensures that we won't violate
1410// any write-after-read (WAR) dependences.
1411//
1412// E.g., for the WAR dependence: a = A[i]; // (1)
1413// A[i] = b; // (2)
1414//
1415// The store group of (2) is always inserted at or below (2), and the load
1416// group of (1) is always inserted at or above (1). Thus, the instructions will
1417// never be reordered. All other dependences are checked to ensure the
1418// correctness of the instruction reordering.
1419//
1420// The algorithm visits all memory accesses in the loop in bottom-up program
1421// order. Program order is established by traversing the blocks in the loop in
1422// reverse postorder when collecting the accesses.
1423//
1424// We visit the memory accesses in bottom-up order because it can simplify the
1425// construction of store groups in the presence of write-after-write (WAW)
1426// dependences.
1427//
1428// E.g., for the WAW dependence: A[i] = a; // (1)
1429// A[i] = b; // (2)
1430// A[i + 1] = c; // (3)
1431//
1432// We will first create a store group with (3) and (2). (1) can't be added to
1433// this group because it and (2) are dependent. However, (1) can be grouped
1434// with other accesses that may precede it in program order. Note that a
1435// bottom-up order does not imply that WAW dependences should not be checked.
1437 bool EnablePredicatedInterleavedMemAccesses) {
1438 LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1439 const auto &Strides = LAI->getSymbolicStrides();
1440
1441 // Holds all accesses with a constant stride.
1443 collectConstStrideAccesses(AccessStrideInfo, Strides);
1444
1445 if (AccessStrideInfo.empty())
1446 return;
1447
1448 // Collect the dependences in the loop.
1449 collectDependences();
1450
1451 // Holds all interleaved store groups temporarily.
1453 // Holds all interleaved load groups temporarily.
1455 // Groups added to this set cannot have new members added.
1456 SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;
1457
1458 // Search in bottom-up program order for pairs of accesses (A and B) that can
1459 // form interleaved load or store groups. In the algorithm below, access A
1460 // precedes access B in program order. We initialize a group for B in the
1461 // outer loop of the algorithm, and then in the inner loop, we attempt to
1462 // insert each A into B's group if:
1463 //
1464 // 1. A and B have the same stride,
1465 // 2. A and B have the same memory object size, and
1466 // 3. A belongs in B's group according to its distance from B.
1467 //
1468 // Special care is taken to ensure group formation will not break any
1469 // dependences.
1470 for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1471 BI != E; ++BI) {
1472 Instruction *B = BI->first;
1473 StrideDescriptor DesB = BI->second;
1474
1475 // Initialize a group for B if it has an allowable stride. Even if we don't
1476 // create a group for B, we continue with the bottom-up algorithm to ensure
1477 // we don't break any of B's dependences.
1478 InterleaveGroup<Instruction> *GroupB = nullptr;
1479 if (isStrided(DesB.Stride) &&
1480 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1481 GroupB = getInterleaveGroup(B);
1482 if (!GroupB) {
1483 LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1484 << '\n');
1485 GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1486 if (B->mayWriteToMemory())
1487 StoreGroups.insert(GroupB);
1488 else
1489 LoadGroups.insert(GroupB);
1490 }
1491 }
1492
1493 for (auto AI = std::next(BI); AI != E; ++AI) {
1494 Instruction *A = AI->first;
1495 StrideDescriptor DesA = AI->second;
1496
1497 // Our code motion strategy implies that we can't have dependences
1498 // between accesses in an interleaved group and other accesses located
1499 // between the first and last member of the group. Note that this also
1500 // means that a group can't have more than one member at a given offset.
1501 // The accesses in a group can have dependences with other accesses, but
1502 // we must ensure we don't extend the boundaries of the group such that
1503 // we encompass those dependent accesses.
1504 //
1505 // For example, assume we have the sequence of accesses shown below in a
1506 // stride-2 loop:
1507 //
1508 // (1, 2) is a group | A[i] = a; // (1)
1509 // | A[i-1] = b; // (2) |
1510 // A[i-3] = c; // (3)
1511 // A[i] = d; // (4) | (2, 4) is not a group
1512 //
1513 // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1514 // but not with (4). If we did, the dependent access (3) would be within
1515 // the boundaries of the (2, 4) group.
1516 auto DependentMember = [&](InterleaveGroup<Instruction> *Group,
1517 StrideEntry *A) -> Instruction * {
1518 for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) {
1519 Instruction *MemberOfGroupB = Group->getMember(Index);
1520 if (MemberOfGroupB && !canReorderMemAccessesForInterleavedGroups(
1521 A, &*AccessStrideInfo.find(MemberOfGroupB)))
1522 return MemberOfGroupB;
1523 }
1524 return nullptr;
1525 };
1526
1527 auto GroupA = getInterleaveGroup(A);
1528 // If A is a load, dependencies are tolerable, there's nothing to do here.
1529 // If both A and B belong to the same (store) group, they are independent,
1530 // even if dependencies have not been recorded.
1531 // If both GroupA and GroupB are null, there's nothing to do here.
1532 if (A->mayWriteToMemory() && GroupA != GroupB) {
1533 Instruction *DependentInst = nullptr;
1534 // If GroupB is a load group, we have to compare AI against all
1535 // members of GroupB because if any load within GroupB has a dependency
1536 // on AI, we need to mark GroupB as complete and also release the
1537 // store GroupA (if A belongs to one). The former prevents incorrect
1538 // hoisting of load B above store A while the latter prevents incorrect
1539 // sinking of store A below load B.
1540 if (GroupB && LoadGroups.contains(GroupB))
1541 DependentInst = DependentMember(GroupB, &*AI);
1542 else if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI))
1543 DependentInst = B;
1544
1545 if (DependentInst) {
1546 // A has a store dependence on B (or on some load within GroupB) and
1547 // is part of a store group. Release A's group to prevent illegal
1548 // sinking of A below B. A will then be free to form another group
1549 // with instructions that precede it.
1550 if (GroupA && StoreGroups.contains(GroupA)) {
1551 LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1552 "dependence between "
1553 << *A << " and " << *DependentInst << '\n');
1554 StoreGroups.remove(GroupA);
1555 releaseGroup(GroupA);
1556 }
1557 // If B is a load and part of an interleave group, no earlier loads
1558 // can be added to B's interleave group, because this would mean the
1559 // DependentInst would move across store A. Mark the interleave group
1560 // as complete.
1561 if (GroupB && LoadGroups.contains(GroupB)) {
1562 LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
1563 << " as complete.\n");
1564 CompletedLoadGroups.insert(GroupB);
1565 }
1566 }
1567 }
1568 if (CompletedLoadGroups.contains(GroupB)) {
1569 // Skip trying to add A to B, continue to look for other conflicting A's
1570 // in groups to be released.
1571 continue;
1572 }
1573
1574 // At this point, we've checked for illegal code motion. If either A or B
1575 // isn't strided, there's nothing left to do.
1576 if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1577 continue;
1578
1579 // Ignore A if it's already in a group or isn't the same kind of memory
1580 // operation as B.
1581 // Note that mayReadFromMemory() isn't mutually exclusive to
1582 // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1583 // here, canVectorizeMemory() should have returned false - except for the
1584 // case we asked for optimization remarks.
1585 if (isInterleaved(A) ||
1586 (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1587 (A->mayWriteToMemory() != B->mayWriteToMemory()))
1588 continue;
1589
1590 // Check rules 1 and 2. Ignore A if its stride or size is different from
1591 // that of B.
1592 if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1593 continue;
1594
1595 // Ignore A if the memory object of A and B don't belong to the same
1596 // address space
1598 continue;
1599
1600 // Calculate the distance from A to B.
1601 const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1602 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1603 if (!DistToB)
1604 continue;
1605 int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1606
1607 // Check rule 3. Ignore A if its distance to B is not a multiple of the
1608 // size.
1609 if (DistanceToB % static_cast<int64_t>(DesB.Size))
1610 continue;
1611
1612 // All members of a predicated interleave-group must have the same predicate,
1613 // and currently must reside in the same BB.
1614 BasicBlock *BlockA = A->getParent();
1615 BasicBlock *BlockB = B->getParent();
1616 if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1617 (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1618 continue;
1619
1620 // The index of A is the index of B plus A's distance to B in multiples
1621 // of the size.
1622 int IndexA =
1623 GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1624
1625 // Try to insert A into B's group.
1626 if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {
1627 LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1628 << " into the interleave group with" << *B
1629 << '\n');
1630 InterleaveGroupMap[A] = GroupB;
1631
1632 // Set the first load in program order as the insert position.
1633 if (A->mayReadFromMemory())
1634 GroupB->setInsertPos(A);
1635 }
1636 } // Iteration over A accesses.
1637 } // Iteration over B accesses.
1638
1639 auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
1640 int Index,
1641 const char *FirstOrLast) -> bool {
1642 Instruction *Member = Group->getMember(Index);
1643 assert(Member && "Group member does not exist");
1644 Value *MemberPtr = getLoadStorePointerOperand(Member);
1645 Type *AccessTy = getLoadStoreType(Member);
1646 if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, *DT, Strides,
1647 /*Assume=*/false, /*ShouldCheckWrap=*/true)
1648 .value_or(0))
1649 return false;
1650 LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
1651 << FirstOrLast
1652 << " group member potentially pointer-wrapping.\n");
1653 releaseGroup(Group);
1654 return true;
1655 };
1656
1657 // Remove interleaved groups with gaps whose memory
1658 // accesses may wrap around. We have to revisit the getPtrStride analysis,
1659 // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1660 // not check wrapping (see documentation there).
1661 // FORNOW we use Assume=false;
1662 // TODO: Change to Assume=true but making sure we don't exceed the threshold
1663 // of runtime SCEV assumptions checks (thereby potentially failing to
1664 // vectorize altogether).
1665 // Additional optional optimizations:
1666 // TODO: If we are peeling the loop and we know that the first pointer doesn't
1667 // wrap then we can deduce that all pointers in the group don't wrap.
1668 // This means that we can forcefully peel the loop in order to only have to
1669 // check the first pointer for no-wrap. When we'll change to use Assume=true
1670 // we'll only need at most one runtime check per interleaved group.
1671 for (auto *Group : LoadGroups) {
1672 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1673 // load would wrap around the address space we would do a memory access at
1674 // nullptr even without the transformation.
1675 if (Group->isFull())
1676 continue;
1677
1678 // Case 2: If first and last members of the group don't wrap this implies
1679 // that all the pointers in the group don't wrap.
1680 // So we check only group member 0 (which is always guaranteed to exist),
1681 // and group member Factor - 1; If the latter doesn't exist we rely on
1682 // peeling (if it is a non-reversed access -- see Case 3).
1683 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1684 continue;
1685 if (Group->getMember(Group->getFactor() - 1))
1686 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, "last");
1687 else {
1688 // Case 3: A non-reversed interleaved load group with gaps: We need
1689 // to execute at least one scalar epilogue iteration. This will ensure
1690 // we don't speculatively access memory out-of-bounds. We only need
1691 // to look for a member at index factor - 1, since every group must have
1692 // a member at index zero.
1693 if (Group->isReverse()) {
1694 LLVM_DEBUG(
1695 dbgs() << "LV: Invalidate candidate interleaved group due to "
1696 "a reverse access with gaps.\n");
1697 releaseGroup(Group);
1698 continue;
1699 }
1700 LLVM_DEBUG(
1701 dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1702 RequiresScalarEpilogue = true;
1703 }
1704 }
1705
1706 for (auto *Group : StoreGroups) {
1707 // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1708 // store would wrap around the address space we would do a memory access at
1709 // nullptr even without the transformation.
1710 if (Group->isFull())
1711 continue;
1712
1713 // Interleave-store-group with gaps is implemented using masked wide store.
1714 // Remove interleaved store groups with gaps if
1715 // masked-interleaved-accesses are not enabled by the target.
1716 if (!EnablePredicatedInterleavedMemAccesses) {
1717 LLVM_DEBUG(
1718 dbgs() << "LV: Invalidate candidate interleaved store group due "
1719 "to gaps.\n");
1720 releaseGroup(Group);
1721 continue;
1722 }
1723
1724 // Case 2: If first and last members of the group don't wrap this implies
1725 // that all the pointers in the group don't wrap.
1726 // So we check only group member 0 (which is always guaranteed to exist),
1727 // and the last group member. Case 3 (scalar epilog) is not relevant for
1728 // stores with gaps, which are implemented with masked-store (rather than
1729 // speculative access, as in loads).
1730 if (InvalidateGroupIfMemberMayWrap(Group, 0, "first"))
1731 continue;
1732 for (int Index = Group->getFactor() - 1; Index > 0; Index--)
1733 if (Group->getMember(Index)) {
1734 InvalidateGroupIfMemberMayWrap(Group, Index, "last");
1735 break;
1736 }
1737 }
1738}
1739
1741 // If no group had triggered the requirement to create an epilogue loop,
1742 // there is nothing to do.
1744 return;
1745
1746 // Release groups requiring scalar epilogues. Note that this also removes them
1747 // from InterleaveGroups.
1748 bool ReleasedGroup = InterleaveGroups.remove_if([&](auto *Group) {
1749 if (!Group->requiresScalarEpilogue())
1750 return false;
1751 LLVM_DEBUG(
1752 dbgs()
1753 << "LV: Invalidate candidate interleaved group due to gaps that "
1754 "require a scalar epilogue (not allowed under optsize) and cannot "
1755 "be masked (not enabled). \n");
1756 releaseGroupWithoutRemovingFromSet(Group);
1757 return true;
1758 });
1759 assert(ReleasedGroup && "At least one group must be invalidated, as a "
1760 "scalar epilogue was required");
1761 (void)ReleasedGroup;
1762 RequiresScalarEpilogue = false;
1763}
1764
1765template <typename InstT>
1766void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1767 llvm_unreachable("addMetadata can only be used for Instruction");
1768}
1769
1770namespace llvm {
1771template <>
1776} // namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1421
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
This class represents a function call, abstracting a target machine's calling convention.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator_range< member_iterator > members(const ECValue &ECV) const
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * > > &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
The group of interleaved loads/stores sharing the same stride and close to each other.
uint32_t getFactor() const
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
void setInsertPos(InstTy *Inst)
bool isReverse() const
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Metadata node.
Definition Metadata.h:1080
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
ArrayRef< MDOperand > operands() const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
static LLVM_ABI MDNode * combine(LLVMContext &Ctx, const MMRAMetadata &A, const MMRAMetadata &B)
Combines A and B according to MMRA semantics.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
reverse_iterator rend()
Definition MapVector.h:74
iterator find(const KeyT &Key)
Definition MapVector.h:154
bool empty() const
Definition MapVector.h:77
reverse_iterator rbegin()
Definition MapVector.h:70
Root of the metadata hierarchy.
Definition Metadata.h:64
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a constant integer value.
const APInt & getAPInt() const
bool remove(const value_type &X)
Remove an item from the set vector.
Definition SetVector.h:181
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provides information about what library functions are available for the current target.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
ArrayRef< Type * > subtypes() const
Definition Type.h:365
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:207
static LLVM_ABI bool isVPCast(Intrinsic::ID ID)
static LLVM_ABI std::optional< unsigned > getVectorLengthParamPos(Intrinsic::ID IntrinsicID)
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
Base class of all SIMD vector types.
Type * getElementType() const
An efficient, type-erasing, non-owning reference to a callable.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isTargetIntrinsic(ID IID)
isTargetIntrinsic - Returns true if IID is an intrinsic specific to a certain target.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID, const TargetTransformInfo *TTI)
Identify if the intrinsic is trivially scalarizable.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
unsigned getLoadStoreAddressSpace(const Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2554
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition bit.h:303
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
unsigned M1(unsigned Val)
Definition VE.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
constexpr unsigned MaxAnalysisRecursionDepth
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
constexpr int PoisonMaskElem
LLVM_ABI bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
LLVM_ABI Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
TargetTransformInfo TTI
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const DenseMap< Value *, const SCEV * > &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
unsigned M0(unsigned Val)
Definition VE.h:376
auto make_second_range(ContainerTy &&c)
Given a container of pairs, return a range over the second elements.
Definition STLExtras.h:1409
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2019
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2166
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
LLVM_ABI std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap< Value *, const SCEV * > &StridesMap=DenseMap< Value *, const SCEV * >(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872