doxygen/VectorUtils_8h_source.html

//===- llvm/Analysis/VectorUtils.h - Vector utilities -----------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file defines some vectorizer utilities.

//

//===----------------------------------------------------------------------===//


#ifndef LLVM_ANALYSIS_VECTORUTILS_H

#define LLVM_ANALYSIS_VECTORUTILS_H


#include "llvm/ADT/MapVector.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/Sequence.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/LoopAccessAnalysis.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/VFABIDemangler.h"

#include "llvm/IR/VectorTypeUtils.h"

#include "llvm/Support/CheckedArithmetic.h"

#include "llvm/Support/Compiler.h"


namespace llvm {

class TargetLibraryInfo;

class IntrinsicInst;


/// The Vector Function Database.

///

/// Helper class used to find the vector functions associated to a

/// scalar CallInst.


class VFDatabase {

  /// The Module of the CallInst CI.

  const Module *M;

  /// The CallInst instance being queried for scalar to vector mappings.

  const CallInst &CI;

  /// List of vector functions descriptors associated to the call

  /// instruction.

  const SmallVector<VFInfo, 8> ScalarToVectorMappings;


  /// Retrieve the scalar-to-vector mappings associated to the rule of

  /// a vector Function ABI.

  static void getVFABIMappings(const CallInst &CI,

                               SmallVectorImpl<VFInfo> &Mappings) {

    if (!CI.getCalledFunction())

      return;


    const StringRef ScalarName = CI.getCalledFunction()->getName();


    SmallVector<std::string, 8> ListOfStrings;

    // The check for the vector-function-abi-variant attribute is done when

    // retrieving the vector variant names here.

    VFABI::getVectorVariantNames(CI, ListOfStrings);

    if (ListOfStrings.empty())

      return;

    for (const auto &MangledName : ListOfStrings) {

      const std::optional<VFInfo> Shape =

          VFABI::tryDemangleForVFABI(MangledName, CI.getFunctionType());

      // A match is found via scalar and vector names, and also by

      // ensuring that the variant described in the attribute has a

      // corresponding definition or declaration of the vector

      // function in the Module M.

      if (Shape && (Shape->ScalarName == ScalarName)) {

        assert(CI.getModule()->getFunction(Shape->VectorName) &&

               "Vector function is missing.");

        Mappings.push_back(*Shape);

      }

    }

  }


public:

  /// Retrieve all the VFInfo instances associated to the CallInst CI.


  static SmallVector<VFInfo, 8> getMappings(const CallInst &CI) {

    SmallVector<VFInfo, 8> Ret;


    // Get mappings from the Vector Function ABI variants.

    getVFABIMappings(CI, Ret);


    // Other non-VFABI variants should be retrieved here.


    return Ret;

  }


  static bool hasMaskedVariant(const CallInst &CI,

                               std::optional<ElementCount> VF = std::nullopt) {

    // Check whether we have at least one masked vector version of a scalar

    // function. If no VF is specified then we check for any masked variant,

    // otherwise we look for one that matches the supplied VF.

    auto Mappings = VFDatabase::getMappings(CI);

    for (VFInfo Info : Mappings)

      if (!VF || Info.Shape.VF == *VF)

        if (Info.isMasked())

          return true;


    return false;

  }


  /// Constructor, requires a CallInst instance.


  VFDatabase(CallInst &CI)

      : M(CI.getModule()), CI(CI),

        ScalarToVectorMappings(VFDatabase::getMappings(CI)) {}


  /// \defgroup VFDatabase query interface.

  ///

  /// @{

  /// Retrieve the Function with VFShape \p Shape.


  Function *getVectorizedFunction(const VFShape &Shape) const {

    if (Shape == VFShape::getScalarShape(CI.getFunctionType()))

      return CI.getCalledFunction();


    for (const auto &Info : ScalarToVectorMappings)

      if (Info.Shape == Shape)

        return M->getFunction(Info.VectorName);


    return nullptr;

  }


  /// @}

};


template <typename T> class ArrayRef;

class DemandedBits;

template <typename InstTy> class InterleaveGroup;

class IRBuilderBase;

class Loop;

class TargetTransformInfo;

class Value;


namespace Intrinsic {

typedef unsigned ID;

}


/// Identify if the intrinsic is trivially vectorizable.

/// This method returns true if the intrinsic's argument types are all scalars

/// for the scalar form of the intrinsic and all vectors (or scalars handled by

/// isVectorIntrinsicWithScalarOpAtArg) for the vector form of the intrinsic.

///

/// Note: isTriviallyVectorizable implies isTriviallyScalarizable.

LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID);


/// Identify if the intrinsic is trivially scalarizable.

/// This method returns true following the same predicates of

/// isTriviallyVectorizable.


/// Note: There are intrinsics where implementing vectorization for the

/// intrinsic is redundant, but we want to implement scalarization of the

/// vector. To prevent the requirement that an intrinsic also implements

/// vectorization we provide this separate function.

LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID);


/// Identifies if the vector form of the intrinsic has a scalar operand.

/// \p TTI is used to consider target specific intrinsics, if no target specific

/// intrinsics will be considered then it is appropriate to pass in nullptr.

LLVM_ABI bool

isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx,

                                   const TargetTransformInfo *TTI);


/// Identifies if the vector form of the intrinsic is overloaded on the type of

/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.

/// \p TTI is used to consider target specific intrinsics, if no target specific

/// intrinsics will be considered then it is appropriate to pass in nullptr.

LLVM_ABI bool

isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx,

                                       const TargetTransformInfo *TTI);


/// Identifies if the vector form of the intrinsic that returns a struct is

/// overloaded at the struct element index \p RetIdx. /// \p TTI is used to

/// consider target specific intrinsics, if no target specific intrinsics

/// will be considered then it is appropriate to pass in nullptr.

LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(

    Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI);


/// Returns intrinsic ID for call.

/// For the input call instruction it finds mapping intrinsic and returns

/// its intrinsic ID, in case it does not found it return not_intrinsic.

LLVM_ABI Intrinsic::ID

getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI);


/// Returns the corresponding factor of llvm.vector.interleaveN intrinsics.

LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID);


/// Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.

LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID);


/// Given a deinterleaveN intrinsic, return the (narrow) vector type of each

/// factor.

LLVM_ABI VectorType *getDeinterleavedVectorType(IntrinsicInst *DI);


/// Given a vector and an element number, see if the scalar value is

/// already around as a register, for example if it were inserted then extracted

/// from the vector.

LLVM_ABI Value *findScalarElement(Value *V, unsigned EltNo);


/// If all non-negative \p Mask elements are the same value, return that value.

/// If all elements are negative (undefined) or \p Mask contains different

/// non-negative values, return -1.

LLVM_ABI int getSplatIndex(ArrayRef<int> Mask);


/// Get splat value if the input is a splat vector or return nullptr.

/// The value may be extracted from a splat constants vector or from

/// a sequence of instructions that broadcast a single value into a vector.

LLVM_ABI Value *getSplatValue(const Value *V);


/// Return true if each element of the vector value \p V is poisoned or equal to

/// every other non-poisoned element. If an index element is specified, either

/// every element of the vector is poisoned or the element at that index is not

/// poisoned and equal to every other non-poisoned element.

/// This may be more powerful than the related getSplatValue() because it is

/// not limited by finding a scalar source value to a splatted vector.

LLVM_ABI bool isSplatValue(const Value *V, int Index = -1, unsigned Depth = 0);


/// Transform a shuffle mask's output demanded element mask into demanded

/// element masks for the 2 operands, returns false if the mask isn't valid.

/// Both \p DemandedLHS and \p DemandedRHS are initialised to [SrcWidth].

/// \p AllowUndefElts permits "-1" indices to be treated as undef.

LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef<int> Mask,

                                     const APInt &DemandedElts,

                                     APInt &DemandedLHS, APInt &DemandedRHS,

                                     bool AllowUndefElts = false);


/// Does this shuffle mask represent either one slide shuffle or a pair of

/// two slide shuffles, combined with a select on some constant vector mask?

/// A slide is a shuffle mask which shifts some set of elements up or down

/// the vector, with all other elements being undefined.  An identity shuffle

/// will be matched a slide by 0.  The output parameter provides the source

/// (-1 means no source), and slide direction for each slide.

LLVM_ABI bool isMaskedSlidePair(ArrayRef<int> Mask, int NumElts,

                                std::array<std::pair<int, int>, 2> &SrcInfo);


/// Replace each shuffle mask index with the scaled sequential indices for an

/// equivalent mask of narrowed elements. Mask elements that are less than 0

/// (sentinel values) are repeated in the output mask.

///

/// Example with Scale = 4:

///   <4 x i32> <3, 2, 0, -1> -->

///   <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1>

///

/// This is the reverse process of widening shuffle mask elements, but it always

/// succeeds because the indexes can always be multiplied (scaled up) to map to

/// narrower vector elements.

LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask,

                                    SmallVectorImpl<int> &ScaledMask);


/// Try to transform a shuffle mask by replacing elements with the scaled index

/// for an equivalent mask of widened elements. If all mask elements that would

/// map to a wider element of the new mask are the same negative number

/// (sentinel value), that element of the new mask is the same value. If any

/// element in a given slice is negative and some other element in that slice is

/// not the same value, return false (partial matches with sentinel values are

/// not allowed).

///

/// Example with Scale = 4:

///   <16 x i8> <12, 13, 14, 15, 8, 9, 10, 11, 0, 1, 2, 3, -1, -1, -1, -1> -->

///   <4 x i32> <3, 2, 0, -1>

///

/// This is the reverse process of narrowing shuffle mask elements if it

/// succeeds. This transform is not always possible because indexes may not

/// divide evenly (scale down) to map to wider vector elements.

LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef<int> Mask,

                                   SmallVectorImpl<int> &ScaledMask);


/// A variant of the previous method which is specialized for Scale=2, and

/// treats -1 as undef and allows widening when a wider element is partially

/// undef in the narrow form of the mask.  This transformation discards

/// information about which bytes in the original shuffle were undef.

LLVM_ABI bool widenShuffleMaskElts(ArrayRef<int> M,

                                   SmallVectorImpl<int> &NewMask);


/// Attempt to narrow/widen the \p Mask shuffle mask to the \p NumDstElts target

/// width. Internally this will call narrowShuffleMaskElts/widenShuffleMaskElts.

/// This will assert unless NumDstElts is a multiple of Mask.size (or

/// vice-versa). Returns false on failure, and ScaledMask will be in an

/// undefined state.

LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef<int> Mask,

                                   SmallVectorImpl<int> &ScaledMask);


/// Repetitively apply `widenShuffleMaskElts()` for as long as it succeeds,

/// to get the shuffle mask with widest possible elements.

LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef<int> Mask,

                                           SmallVectorImpl<int> &ScaledMask);


/// Splits and processes shuffle mask depending on the number of input and

/// output registers. The function does 2 main things: 1) splits the

/// source/destination vectors into real registers; 2) do the mask analysis to

/// identify which real registers are permuted. Then the function processes

/// resulting registers mask using provided action items. If no input register

/// is defined, \p NoInputAction action is used. If only 1 input register is

/// used, \p SingleInputAction is used, otherwise \p ManyInputsAction is used to

/// process > 2 input registers and masks.

/// \param Mask Original shuffle mask.

/// \param NumOfSrcRegs Number of source registers.

/// \param NumOfDestRegs Number of destination registers.

/// \param NumOfUsedRegs Number of actually used destination registers.

LLVM_ABI void processShuffleMasks(

    ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,

    unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,

    function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,

    function_ref<void(ArrayRef<int>, unsigned, unsigned, bool)>

        ManyInputsAction);


/// Compute the demanded elements mask of horizontal binary operations. A

/// horizontal operation combines two adjacent elements in a vector operand.

/// This function returns a mask for the elements that correspond to the first

/// operand of this horizontal combination. For example, for two vectors

/// [X1, X2, X3, X4] and [Y1, Y2, Y3, Y4], the resulting mask can include the

/// elements X1, X3, Y1, and Y3. To get the other operands, simply shift the

/// result of this function to the left by 1.

///

/// \param VectorBitWidth the total bit width of the vector

/// \param DemandedElts   the demanded elements mask for the operation

/// \param DemandedLHS    the demanded elements mask for the left operand

/// \param DemandedRHS    the demanded elements mask for the right operand

LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth,

                                                  const APInt &DemandedElts,

                                                  APInt &DemandedLHS,

                                                  APInt &DemandedRHS);


/// Compute a map of integer instructions to their minimum legal type

/// size.

///

/// C semantics force sub-int-sized values (e.g. i8, i16) to be promoted to int

/// type (e.g. i32) whenever arithmetic is performed on them.

///

/// For targets with native i8 or i16 operations, usually InstCombine can shrink

/// the arithmetic type down again. However InstCombine refuses to create

/// illegal types, so for targets without i8 or i16 registers, the lengthening

/// and shrinking remains.

///

/// Most SIMD ISAs (e.g. NEON) however support vectors of i8 or i16 even when

/// their scalar equivalents do not, so during vectorization it is important to

/// remove these lengthens and truncates when deciding the profitability of

/// vectorization.

///

/// This function analyzes the given range of instructions and determines the

/// minimum type size each can be converted to. It attempts to remove or

/// minimize type size changes across each def-use chain, so for example in the

/// following code:

///

///   %1 = load i8, i8*

///   %2 = add i8 %1, 2

///   %3 = load i16, i16*

///   %4 = zext i8 %2 to i32

///   %5 = zext i16 %3 to i32

///   %6 = add i32 %4, %5

///   %7 = trunc i32 %6 to i16

///

/// Instruction %6 must be done at least in i16, so computeMinimumValueSizes

/// will return: {%1: 16, %2: 16, %3: 16, %4: 16, %5: 16, %6: 16, %7: 16}.

///

/// If the optional TargetTransformInfo is provided, this function tries harder

/// to do less work by only looking at illegal types.

LLVM_ABI MapVector<Instruction *, uint64_t>

computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,

                         const TargetTransformInfo *TTI = nullptr);


/// Compute the union of two access-group lists.

///

/// If the list contains just one access group, it is returned directly. If the

/// list is empty, returns nullptr.

LLVM_ABI MDNode *uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2);


/// Compute the access-group list of access groups that @p Inst1 and @p Inst2

/// are both in. If either instruction does not access memory at all, it is

/// considered to be in every list.

///

/// If the list contains just one access group, it is returned directly. If the

/// list is empty, returns nullptr.

LLVM_ABI MDNode *intersectAccessGroups(const Instruction *Inst1,

                                       const Instruction *Inst2);


/// Add metadata from \p Inst to \p Metadata, if it can be preserved after

/// vectorization. It can be preserved after vectorization if the kind is one of

/// [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,

/// MD_access_group, MD_mmra].

LLVM_ABI void getMetadataToPropagate(

    Instruction *Inst,

    SmallVectorImpl<std::pair<unsigned, MDNode *>> &Metadata);


/// Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath,

/// MD_nontemporal, MD_access_group, MD_mmra].

/// For K in Kinds, we get the MDNode for K from each of the

/// elements of VL, compute their "intersection" (i.e., the most generic

/// metadata value that covers all of the individual values), and set I's

/// metadata for M equal to the intersection value.

///

/// This function always sets a (possibly null) value for each K in Kinds.

LLVM_ABI Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL);


/// Create a mask that filters the members of an interleave group where there

/// are gaps.

///

/// For example, the mask for \p Group with interleave-factor 3

/// and \p VF 4, that has only its first member present is:

///

///   <1,0,0,1,0,0,1,0,0,1,0,0>

///

/// Note: The result is a mask of 0's and 1's, as opposed to the other

/// create[*]Mask() utilities which create a shuffle mask (mask that

/// consists of indices).

LLVM_ABI Constant *

createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,

                     const InterleaveGroup<Instruction> &Group);


/// Create a mask with replicated elements.

///

/// This function creates a shuffle mask for replicating each of the \p VF

/// elements in a vector \p ReplicationFactor times. It can be used to

/// transform a mask of \p VF elements into a mask of

/// \p VF * \p ReplicationFactor elements used by a predicated

/// interleaved-group of loads/stores whose Interleaved-factor ==

/// \p ReplicationFactor.

///

/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:

///

///   <0,0,0,1,1,1,2,2,2,3,3,3>

LLVM_ABI llvm::SmallVector<int, 16>

createReplicatedMask(unsigned ReplicationFactor, unsigned VF);


/// Create an interleave shuffle mask.

///

/// This function creates a shuffle mask for interleaving \p NumVecs vectors of

/// vectorization factor \p VF into a single wide vector. The mask is of the

/// form:

///

///   <0, VF, VF * 2, ..., VF * (NumVecs - 1), 1, VF + 1, VF * 2 + 1, ...>

///

/// For example, the mask for VF = 4 and NumVecs = 2 is:

///

///   <0, 4, 1, 5, 2, 6, 3, 7>.

LLVM_ABI llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF,

                                                         unsigned NumVecs);


/// Create a stride shuffle mask.

///

/// This function creates a shuffle mask whose elements begin at \p Start and

/// are incremented by \p Stride. The mask can be used to deinterleave an

/// interleaved vector into separate vectors of vectorization factor \p VF. The

/// mask is of the form:

///

///   <Start, Start + Stride, ..., Start + Stride * (VF - 1)>

///

/// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:

///

///   <0, 2, 4, 6>

LLVM_ABI llvm::SmallVector<int, 16>

createStrideMask(unsigned Start, unsigned Stride, unsigned VF);


/// Create a sequential shuffle mask.

///

/// This function creates shuffle mask whose elements are sequential and begin

/// at \p Start.  The mask contains \p NumInts integers and is padded with \p

/// NumUndefs undef values. The mask is of the form:

///

///   <Start, Start + 1, ... Start + NumInts - 1, undef_1, ... undef_NumUndefs>

///

/// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:

///

///   <0, 1, 2, 3, undef, undef, undef, undef>

LLVM_ABI llvm::SmallVector<int, 16>

createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);


/// Given a shuffle mask for a binary shuffle, create the equivalent shuffle

/// mask assuming both operands are identical. This assumes that the unary

/// shuffle will use elements from operand 0 (operand 1 will be unused).

LLVM_ABI llvm::SmallVector<int, 16> createUnaryMask(ArrayRef<int> Mask,

                                                    unsigned NumElts);


/// Concatenate a list of vectors.

///

/// This function generates code that concatenate the vectors in \p Vecs into a

/// single large vector. The number of vectors should be greater than one, and

/// their element types should be the same. The number of elements in the

/// vectors should also be the same; however, if the last vector has fewer

/// elements, it will be padded with undefs.

LLVM_ABI Value *concatenateVectors(IRBuilderBase &Builder,

                                   ArrayRef<Value *> Vecs);


/// Given a mask vector of i1, Return true if any of the elements of this

/// predicate mask are known to be true or undef.  That is, return true if at

/// least one lane can be assumed active.

LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask);


/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)

/// for each lane which may be active.

LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask);


/// The group of interleaved loads/stores sharing the same stride and

/// close to each other.

///

/// Each member in this group has an index starting from 0, and the largest

/// index should be less than interleaved factor, which is equal to the absolute

/// value of the access's stride.

///

/// E.g. An interleaved load group of factor 4:

///        for (unsigned i = 0; i < 1024; i+=4) {

///          a = A[i];                           // Member of index 0

///          b = A[i+1];                         // Member of index 1

///          d = A[i+3];                         // Member of index 3

///          ...

///        }

///

///      An interleaved store group of factor 4:

///        for (unsigned i = 0; i < 1024; i+=4) {

///          ...

///          A[i]   = a;                         // Member of index 0

///          A[i+1] = b;                         // Member of index 1

///          A[i+2] = c;                         // Member of index 2

///          A[i+3] = d;                         // Member of index 3

///        }

///

/// Note: the interleaved load group could have gaps (missing members), but

/// the interleaved store group doesn't allow gaps.


template <typename InstTy> class InterleaveGroup {

public:


  InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)

      : Factor(Factor), Reverse(Reverse), Alignment(Alignment),

        InsertPos(nullptr) {}


  InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)

      : Alignment(Alignment), InsertPos(Instr) {

    Factor = std::abs(Stride);

    assert(Factor > 1 && "Invalid interleave factor");


    Reverse = Stride < 0;

    Members[0] = Instr;

  }


  bool isReverse() const { return Reverse; }

  uint32_t getFactor() const { return Factor; }

  Align getAlign() const { return Alignment; }

  uint32_t getNumMembers() const { return Members.size(); }


  /// Try to insert a new member \p Instr with index \p Index and

  /// alignment \p NewAlign. The index is related to the leader and it could be

  /// negative if it is the new leader.

  ///

  /// \returns false if the instruction doesn't belong to the group.


  bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign) {

    // Make sure the key fits in an int32_t.

    std::optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);

    if (!MaybeKey)

      return false;

    int32_t Key = *MaybeKey;


    // Skip if the key is used for the empty special value.

    if (DenseMapInfo<int32_t>::getEmptyKey() == Key)

      return false;


    // Skip if there is already a member with the same index.

    if (Members.contains(Key))

      return false;


    if (Key > LargestKey) {

      // The largest index is always less than the interleave factor.

      if (Index >= static_cast<int32_t>(Factor))

        return false;


      LargestKey = Key;

    } else if (Key < SmallestKey) {


      // Make sure the largest index fits in an int32_t.

      std::optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);

      if (!MaybeLargestIndex)

        return false;


      // The largest index is always less than the interleave factor.

      if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))

        return false;


      SmallestKey = Key;

    }


    // It's always safe to select the minimum alignment.

    Alignment = std::min(Alignment, NewAlign);

    Members[Key] = Instr;

    return true;

  }


  /// Get the member with the given index \p Index

  ///

  /// \returns nullptr if contains no such member.


  InstTy *getMember(uint32_t Index) const {

    int32_t Key = SmallestKey + Index;

    return Members.lookup(Key);

  }


  /// Return an iterator range over the non-null members of this group, in

  /// index order.


  auto members() const {

    return make_filter_range(

        map_range(seq<uint32_t>(0, Factor),

                  [this](uint32_t I) { return getMember(I); }),

        [](InstTy *I) { return I != nullptr; });

  }


  /// Get the index for the given member. Unlike the key in the member

  /// map, the index starts from 0.


  uint32_t getIndex(const InstTy *Instr) const {

    for (auto I : Members) {

      if (I.second == Instr)

        return I.first - SmallestKey;

    }


    llvm_unreachable("InterleaveGroup contains no such member");

  }


  InstTy *getInsertPos() const { return InsertPos; }

  void setInsertPos(InstTy *Inst) { InsertPos = Inst; }


  /// Add metadata (e.g. alias info) from the instructions in this group to \p

  /// NewInst.

  ///

  /// FIXME: this function currently does not add noalias metadata a'la

  /// addNewMedata.  To do that we need to compute the intersection of the

  /// noalias info from all members.

  void addMetadata(InstTy *NewInst) const;


  /// Returns true if this Group requires a scalar iteration to handle gaps.


  bool requiresScalarEpilogue() const {

    // If the last member of the Group exists, then a scalar epilog is not

    // needed for this group.

    if (getMember(getFactor() - 1))

      return false;


    // We have a group with gaps. It therefore can't be a reversed access,

    // because such groups get invalidated (TODO).

    assert(!isReverse() && "Group should have been invalidated");


    // This is a group of loads, with gaps, and without a last-member

    return true;

  }


  /// Return true if this group is full, i.e. it has no gaps.

  bool isFull() const { return getNumMembers() == getFactor(); }


private:

  uint32_t Factor; // Interleave Factor.

  bool Reverse;

  Align Alignment;

  DenseMap<int32_t, InstTy *> Members;

  int32_t SmallestKey = 0;

  int32_t LargestKey = 0;


  // To avoid breaking dependences, vectorized instructions of an interleave

  // group should be inserted at either the first load or the last store in

  // program order.

  //

  // E.g. %even = load i32             // Insert Position

  //      %add = add i32 %even         // Use of %even

  //      %odd = load i32

  //

  //      store i32 %even

  //      %odd = add i32               // Def of %odd

  //      store i32 %odd               // Insert Position

  InstTy *InsertPos;

};


/// Drive the analysis of interleaved memory accesses in the loop.

///

/// Use this class to analyze interleaved accesses only when we can vectorize

/// a loop. Otherwise it's meaningless to do analysis as the vectorization

/// on interleaved accesses is unsafe.

///

/// The analysis collects interleave groups and records the relationships

/// between the member and the group in a map.


class InterleavedAccessInfo {

public:


  InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L,

                        DominatorTree *DT, LoopInfo *LI,

                        const LoopAccessInfo *LAI)

      : PSE(PSE), TheLoop(L), DT(DT), LI(LI), LAI(LAI) {}


  ~InterleavedAccessInfo() { invalidateGroups(); }


  /// Analyze the interleaved accesses and collect them in interleave

  /// groups. Substitute symbolic strides using \p Strides.

  /// Consider also predicated loads/stores in the analysis if

  /// \p EnableMaskedInterleavedGroup is true.

  LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup);


  /// Invalidate groups, e.g., in case all blocks in loop will be predicated

  /// contrary to original assumption. Although we currently prevent group

  /// formation for predicated accesses, we may be able to relax this limitation

  /// in the future once we handle more complicated blocks. Returns true if any

  /// groups were invalidated.


  bool invalidateGroups() {

    if (InterleaveGroups.empty()) {

      assert(

          !RequiresScalarEpilogue &&

          "RequiresScalarEpilog should not be set without interleave groups");

      return false;

    }


    InterleaveGroupMap.clear();

    for (auto *Ptr : InterleaveGroups)

      delete Ptr;

    InterleaveGroups.clear();

    RequiresScalarEpilogue = false;

    return true;

  }


  /// Check if \p Instr belongs to any interleave group.


  bool isInterleaved(Instruction *Instr) const {

    return InterleaveGroupMap.contains(Instr);

  }


  /// Get the interleave group that \p Instr belongs to.

  ///

  /// \returns nullptr if doesn't have such group.

  InterleaveGroup<Instruction> *


  getInterleaveGroup(const Instruction *Instr) const {

    return InterleaveGroupMap.lookup(Instr);

  }


  iterator_range<SmallPtrSetIterator<llvm::InterleaveGroup<Instruction> *>>


  getInterleaveGroups() {

    return make_range(InterleaveGroups.begin(), InterleaveGroups.end());

  }


  /// Returns true if an interleaved group that may access memory

  /// out-of-bounds requires a scalar epilogue iteration for correctness.

  bool requiresScalarEpilogue() const { return RequiresScalarEpilogue; }


  /// Invalidate groups that require a scalar epilogue (due to gaps). This can

  /// happen when optimizing for size forbids a scalar epilogue, and the gap

  /// cannot be filtered by masking the load/store.

  LLVM_ABI void invalidateGroupsRequiringScalarEpilogue();


  /// Returns true if we have any interleave groups.

  bool hasGroups() const { return !InterleaveGroups.empty(); }


private:

  /// A wrapper around ScalarEvolution, used to add runtime SCEV checks.

  /// Simplifies SCEV expressions in the context of existing SCEV assumptions.

  /// The interleaved access analysis can also add new predicates (for example

  /// by versioning strides of pointers).

  PredicatedScalarEvolution &PSE;


  Loop *TheLoop;

  DominatorTree *DT;

  LoopInfo *LI;

  const LoopAccessInfo *LAI;


  /// True if the loop may contain non-reversed interleaved groups with

  /// out-of-bounds accesses. We ensure we don't speculatively access memory

  /// out-of-bounds by executing at least one scalar epilogue iteration.

  bool RequiresScalarEpilogue = false;


  /// Holds the relationships between the members and the interleave group.

  DenseMap<Instruction *, InterleaveGroup<Instruction> *> InterleaveGroupMap;


  SmallPtrSet<InterleaveGroup<Instruction> *, 4> InterleaveGroups;


  /// Holds dependences among the memory accesses in the loop. It maps a source

  /// access to a set of dependent sink accesses.

  DenseMap<Instruction *, SmallPtrSet<Instruction *, 2>> Dependences;


  /// The descriptor for a strided memory access.

  struct StrideDescriptor {

    StrideDescriptor() = default;

    StrideDescriptor(int64_t Stride, const SCEV *Scev, uint64_t Size,

                     Align Alignment)

        : Stride(Stride), Scev(Scev), Size(Size), Alignment(Alignment) {}


    // The access's stride. It is negative for a reverse access.

    int64_t Stride = 0;


    // The scalar expression of this access.

    const SCEV *Scev = nullptr;


    // The size of the memory object.

    uint64_t Size = 0;


    // The alignment of this access.

    Align Alignment;

  };


  /// A type for holding instructions and their stride descriptors.

  using StrideEntry = std::pair<Instruction *, StrideDescriptor>;


  /// Create a new interleave group with the given instruction \p Instr,

  /// stride \p Stride and alignment \p Align.

  ///

  /// \returns the newly created interleave group.

  InterleaveGroup<Instruction> *

  createInterleaveGroup(Instruction *Instr, int Stride, Align Alignment) {

    auto [It, Inserted] = InterleaveGroupMap.try_emplace(Instr);

    assert(Inserted && "Already in an interleaved access group");

    It->second = new InterleaveGroup<Instruction>(Instr, Stride, Alignment);

    InterleaveGroups.insert(It->second);

    return It->second;

  }


  /// Release the group and remove all the relationships.

  void releaseGroup(InterleaveGroup<Instruction> *Group) {

    InterleaveGroups.erase(Group);

    releaseGroupWithoutRemovingFromSet(Group);

  }


  /// Do everything necessary to release the group, apart from removing it from

  /// the InterleaveGroups set.

  void releaseGroupWithoutRemovingFromSet(InterleaveGroup<Instruction> *Group) {

    for (unsigned i = 0; i < Group->getFactor(); i++)

      if (Instruction *Member = Group->getMember(i))

        InterleaveGroupMap.erase(Member);


    delete Group;

  }


  /// Collect all the accesses with a constant stride in program order.

  void collectConstStrideAccesses(

      MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,

      const DenseMap<Value *, const SCEV *> &Strides);


  /// Returns true if \p Stride is allowed in an interleaved group.

  LLVM_ABI static bool isStrided(int Stride);


  /// Returns true if \p BB is a predicated block.

  bool isPredicated(BasicBlock *BB) const {

    return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);

  }


  /// Returns true if LoopAccessInfo can be used for dependence queries.

  bool areDependencesValid() const {

    return LAI && LAI->getDepChecker().getDependences();

  }


  /// Returns true if memory accesses \p A and \p B can be reordered, if

  /// necessary, when constructing interleaved groups.

  ///

  /// \p A must precede \p B in program order. We return false if reordering is

  /// not necessary or is prevented because \p A and \p B may be dependent.

  bool canReorderMemAccessesForInterleavedGroups(StrideEntry *A,

                                                 StrideEntry *B) const {

    // Code motion for interleaved accesses can potentially hoist strided loads

    // and sink strided stores. The code below checks the legality of the

    // following two conditions:

    //

    // 1. Potentially moving a strided load (B) before any store (A) that

    //    precedes B, or

    //

    // 2. Potentially moving a strided store (A) after any load or store (B)

    //    that A precedes.

    //

    // It's legal to reorder A and B if we know there isn't a dependence from A

    // to B. Note that this determination is conservative since some

    // dependences could potentially be reordered safely.


    // A is potentially the source of a dependence.

    auto *Src = A->first;

    auto SrcDes = A->second;


    // B is potentially the sink of a dependence.

    auto *Sink = B->first;

    auto SinkDes = B->second;


    // Code motion for interleaved accesses can't violate WAR dependences.

    // Thus, reordering is legal if the source isn't a write.

    if (!Src->mayWriteToMemory())

      return true;


    // At least one of the accesses must be strided.

    if (!isStrided(SrcDes.Stride) && !isStrided(SinkDes.Stride))

      return true;


    // If dependence information is not available from LoopAccessInfo,

    // conservatively assume the instructions can't be reordered.

    if (!areDependencesValid())

      return false;


    // If we know there is a dependence from source to sink, assume the

    // instructions can't be reordered. Otherwise, reordering is legal.

    return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);

  }


  /// Collect the dependences from LoopAccessInfo.

  ///

  /// We process the dependences once during the interleaved access analysis to

  /// enable constant-time dependence queries.

  void collectDependences() {

    if (!areDependencesValid())

      return;

    const auto &DepChecker = LAI->getDepChecker();

    auto *Deps = DepChecker.getDependences();

    for (auto Dep : *Deps)

      Dependences[Dep.getSource(DepChecker)].insert(

          Dep.getDestination(DepChecker));

  }

};


} // llvm namespace


#endif

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CheckedArithmetic.h

Compiler.h

LLVM_ABI
#define LLVM_ABI
Definition Compiler.h:213

Module.h
Module.h This file contains the declarations for the Module class.

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

LoopAccessAnalysis.h

I
#define I(x, y, z)
Definition MD5.cpp:57

MapVector.h
This file implements a map that provides insertion order iteration.

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

Sequence.h
Provides some synthesis utilities to produce sequences of values.

SmallVector.h
This file defines the SmallVector class.

VFABIDemangler.h

VectorTypeUtils.h

llvm::APInt
Class for arbitrary precision integers.
Definition APInt.h:78

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition Instructions.h:1531

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::DemandedBits
Definition DemandedBits.h:41

llvm::DenseMap
Definition DenseMap.h:834

llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:155

llvm::Function
Definition Function.h:65

llvm::Function::getFunction
const Function & getFunction() const
Definition Function.h:166

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114

llvm::Instruction
Definition Instruction.h:70

llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition VectorUtils.h:515

llvm::InterleaveGroup::members
auto members() const
Return an iterator range over the non-null members of this group, in index order.
Definition VectorUtils.h:591

llvm::InterleaveGroup::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition VectorUtils.h:621

llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition VectorUtils.h:531

llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition VectorUtils.h:584

llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(uint32_t Factor, bool Reverse, Align Alignment)
Definition VectorUtils.h:517

llvm::InterleaveGroup::isFull
bool isFull() const
Return true if this group is full, i.e. it has no gaps.
Definition VectorUtils.h:636

llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition VectorUtils.h:600

llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition VectorUtils.h:610

llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition VectorUtils.h:530

llvm::InterleaveGroup::getInsertPos
InstTy * getInsertPos() const
Definition VectorUtils.h:609

llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition VectorUtils.cpp:1717

llvm::InterleaveGroup::getAlign
Align getAlign() const
Definition VectorUtils.h:532

llvm::InterleaveGroup::InterleaveGroup
InterleaveGroup(InstTy *Instr, int32_t Stride, Align Alignment)
Definition VectorUtils.h:521

llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition VectorUtils.h:540

llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition VectorUtils.h:533

llvm::InterleavedAccessInfo::getInterleaveGroup
InterleaveGroup< Instruction > * getInterleaveGroup(const Instruction *Instr) const
Get the interleave group that Instr belongs to.
Definition VectorUtils.h:713

llvm::InterleavedAccessInfo::requiresScalarEpilogue
bool requiresScalarEpilogue() const
Returns true if an interleaved group that may access memory out-of-bounds requires a scalar epilogue ...
Definition VectorUtils.h:724

llvm::InterleavedAccessInfo::hasGroups
bool hasGroups() const
Returns true if we have any interleave groups.
Definition VectorUtils.h:732

llvm::InterleavedAccessInfo::isInterleaved
bool isInterleaved(Instruction *Instr) const
Check if Instr belongs to any interleave group.
Definition VectorUtils.h:705

llvm::InterleavedAccessInfo::invalidateGroups
bool invalidateGroups()
Invalidate groups, e.g., in case all blocks in loop will be predicated contrary to original assumptio...
Definition VectorUtils.h:688

llvm::InterleavedAccessInfo::getInterleaveGroups
iterator_range< SmallPtrSetIterator< llvm::InterleaveGroup< Instruction > * > > getInterleaveGroups()
Definition VectorUtils.h:718

llvm::InterleavedAccessInfo::analyzeInterleaving
LLVM_ABI void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition VectorUtils.cpp:1387

llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
LLVM_ABI void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition VectorUtils.cpp:1691

llvm::InterleavedAccessInfo::~InterleavedAccessInfo
~InterleavedAccessInfo()
Definition VectorUtils.h:675

llvm::InterleavedAccessInfo::InterleavedAccessInfo
InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, DominatorTree *DT, LoopInfo *LI, const LoopAccessInfo *LAI)
Definition VectorUtils.h:670

llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition IntrinsicInst.h:49

llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition LoopAccessAnalysis.h:690

llvm::LoopAccessInfo::blockNeedsPredication
static LLVM_ABI bool blockNeedsPredication(const BasicBlock *BB, const Loop *TheLoop, const DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
Definition LoopAccessAnalysis.cpp:2960

llvm::LoopInfo
Definition LoopInfo.h:426

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::MDNode
Metadata node.
Definition Metadata.h:1075

llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38

llvm::Metadata
Root of the metadata hierarchy.
Definition Metadata.h:64

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition ScalarEvolution.h:2621

llvm::SCEV
This class represents an analyzed expression in the program.
Definition ScalarEvolution.h:254

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:533

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition SmallVector.h:581

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::StringRef
Represent a constant reference to a string, i.e.
Definition StringRef.h:56

llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition TargetLibraryInfo.h:266

llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition TargetTransformInfo.h:268

llvm::VFDatabase::VFDatabase
VFDatabase(CallInst &CI)
Constructor, requires a CallInst instance.
Definition VectorUtils.h:102

llvm::VFDatabase::hasMaskedVariant
static bool hasMaskedVariant(const CallInst &CI, std::optional< ElementCount > VF=std::nullopt)
Definition VectorUtils.h:87

llvm::VFDatabase::getMappings
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
Definition VectorUtils.h:76

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:490

llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition STLFunctionalExtras.h:37

llvm::iterator_range
A range adaptor for a pair of iterators.
Definition iterator_range.h:32

uint32_t

uint64_t

llvm::VFDatabase::getVectorizedFunction
Function * getVectorizedFunction(const VFShape &Shape) const
Definition VectorUtils.h:110

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::VFABI::tryDemangleForVFABI
LLVM_ABI std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const FunctionType *FTy)
Function to construct a VFInfo out of a mangled names in the following format:
Definition VFABIDemangler.cpp:379

llvm::VFABI::getVectorVariantNames
LLVM_ABI void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition VFABIDemangler.cpp:535

llvm::cl::Sink
@ Sink
Definition CommandLine.h:166

llvm::logicalview::LVAttributeKind::Inserted
@ Inserted
Definition LVOptions.h:109

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::getVectorIntrinsicIDForCall
LLVM_ABI Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition VectorUtils.cpp:236

llvm::TailFoldingOpts::Reverse
@ Reverse
Definition AArch64BaseInfo.h:670

llvm::possiblyDemandedEltsInMask
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition VectorUtils.cpp:1286

llvm::Depth
@ Depth
Definition SIMachineScheduler.h:36

llvm::createUnaryMask
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition VectorUtils.cpp:1189

llvm::getMetadataToPropagate
LLVM_ABI void getMetadataToPropagate(Instruction *Inst, SmallVectorImpl< std::pair< unsigned, MDNode * > > &Metadata)
Add metadata from Inst to Metadata, if it can be preserved after vectorization.
Definition VectorUtils.cpp:1056

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::concatenateVectors
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition VectorUtils.cpp:1231

llvm::widenShuffleMaskElts
LLVM_ABI bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition VectorUtils.cpp:539

llvm::propagateMetadata
LLVM_ABI Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition VectorUtils.cpp:1079

llvm::getSplatValue
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition VectorUtils.cpp:390

llvm::map_range
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365

llvm::intersectAccessGroups
LLVM_ABI MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition VectorUtils.cpp:1008

llvm::getShuffleDemandedElts
LLVM_ABI bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
Definition VectorUtils.cpp:451

llvm::isSplatValue
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition VectorUtils.cpp:405

llvm::createBitMaskForGaps
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition VectorUtils.cpp:1128

llvm::createStrideMask
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition VectorUtils.cpp:1168

llvm::getHorizDemandedEltsForFirstOperand
LLVM_ABI void getHorizDemandedEltsForFirstOperand(unsigned VectorBitWidth, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS)
Compute the demanded elements mask of horizontal binary operations.
Definition VectorUtils.cpp:779

llvm::createReplicatedMask
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition VectorUtils.cpp:1148

llvm::getDeinterleaveIntrinsicFactor
LLVM_ABI unsigned getDeinterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.deinterleaveN intrinsics.
Definition VectorUtils.cpp:271

llvm::getInterleaveIntrinsicFactor
LLVM_ABI unsigned getInterleaveIntrinsicFactor(Intrinsic::ID ID)
Returns the corresponding factor of llvm.vector.interleaveN intrinsics.
Definition VectorUtils.cpp:250

llvm::make_filter_range
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:551

llvm::Key
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
Definition PassManager.h:690

llvm::isTriviallyScalarizable
LLVM_ABI bool isTriviallyScalarizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially scalarizable.
Definition VectorUtils.cpp:132

llvm::isVectorIntrinsicWithStructReturnOverloadAtField
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
Definition VectorUtils.cpp:219

llvm::TTI
TargetTransformInfo TTI
Definition TargetTransformInfo.h:263

llvm::narrowShuffleMaskElts
LLVM_ABI void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition VectorUtils.cpp:518

llvm::isMaskedSlidePair
LLVM_ABI bool isMaskedSlidePair(ArrayRef< int > Mask, int NumElts, std::array< std::pair< int, int >, 2 > &SrcInfo)
Does this shuffle mask represent either one slide shuffle or a pair of two slide shuffles,...
Definition VectorUtils.cpp:488

llvm::getDeinterleavedVectorType
LLVM_ABI VectorType * getDeinterleavedVectorType(IntrinsicInst *DI)
Given a deinterleaveN intrinsic, return the (narrow) vector type of each factor.
Definition VectorUtils.cpp:292

llvm::createInterleaveMask
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition VectorUtils.cpp:1157

llvm::isVectorIntrinsicWithScalarOpAtArg
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition VectorUtils.cpp:140

llvm::findScalarElement
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition VectorUtils.cpp:304

llvm::uniteAccessGroups
LLVM_ABI MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition VectorUtils.cpp:987

llvm::ArrayRef
ArrayRef(const T &OneElt) -> ArrayRef< T >

llvm::checkedSub
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedSub(T LHS, T RHS)
Subtract two signed integers LHS and RHS.
Definition CheckedArithmetic.h:55

llvm::getShuffleMaskWithWidestElts
LLVM_ABI void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
Definition VectorUtils.cpp:650

llvm::checkedAdd
std::enable_if_t< std::is_signed_v< T >, std::optional< T > > checkedAdd(T LHS, T RHS)
Add two signed integers LHS and RHS.
Definition CheckedArithmetic.h:46

llvm::processShuffleMasks
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Definition VectorUtils.cpp:664

llvm::maskContainsAllOneOrUndef
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
Definition VectorUtils.cpp:1259

llvm::seq
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305

llvm::isTriviallyVectorizable
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition VectorUtils.cpp:46

llvm::createSequentialMask
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition VectorUtils.cpp:1176

llvm::isVectorIntrinsicWithOverloadTypeAtArg
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Definition VectorUtils.cpp:179

llvm::computeMinimumValueSizes
LLVM_ABI MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition VectorUtils.cpp:808

llvm::scaleShuffleMaskElts
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
Definition VectorUtils.cpp:625

llvm::getSplatIndex
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition VectorUtils.cpp:368

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::DenseMapInfo
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition DenseMapInfo.h:54

llvm::VFInfo
Holds the VFShape for a specific scalar to vector function mapping.
Definition VFABIDemangler.h:125

llvm::VFShape
Contains the information about the kind of vectorization available.
Definition VFABIDemangler.h:84

llvm::VFShape::getScalarShape
static VFShape getScalarShape(const FunctionType *FTy)
Retrieve the VFShape that can be used to map a scalar function to itself, with VF = 1.
Definition VFABIDemangler.h:101