doxygen/VPlanRecipes_8cpp_source.html

//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// This file contains implementations for different VPlan recipes.

///

//===----------------------------------------------------------------------===//


#include "LoopVectorizationPlanner.h"

#include "VPlan.h"

#include "VPlanHelpers.h"

#include "VPlanPatternMatch.h"

#include "VPlanUtils.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/SmallVectorExtras.h"

#include "llvm/ADT/Twine.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/IVDescriptors.h"

#include "llvm/Analysis/LoopInfo.h"

#include "llvm/Analysis/ScalarEvolutionExpressions.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/Intrinsics.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/Value.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Transforms/Utils/BasicBlockUtils.h"

#include "llvm/Transforms/Utils/LoopUtils.h"

#include <cassert>


using namespace llvm;

using namespace llvm::VPlanPatternMatch;


using VectorParts = SmallVector<Value *, 2>;


#define LV_NAME "loop-vectorize"

#define DEBUG_TYPE LV_NAME


bool VPRecipeBase::mayWriteToMemory() const {

  switch (getVPRecipeID()) {

  case VPExpressionSC:

    return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();

  case VPInstructionSC: {

    auto *VPI = cast<VPInstruction>(this);

    // Loads read from memory but don't write to memory.

    if (VPI->getOpcode() == Instruction::Load)

      return false;

    return VPI->opcodeMayReadOrWriteFromMemory();

  }

  case VPInterleaveEVLSC:

  case VPInterleaveSC:

    return cast<VPInterleaveBase>(this)->getNumStoreOperands() > 0;

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    return true;

  case VPReplicateSC:

    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())

        ->mayWriteToMemory();

  case VPWidenCallSC:

    return !cast<VPWidenCallRecipe>(this)

                ->getCalledScalarFunction()

                ->onlyReadsMemory();

  case VPWidenMemIntrinsicSC:

  case VPWidenIntrinsicSC:

    return cast<VPWidenIntrinsicRecipe>(this)->mayWriteToMemory();

  case VPActiveLaneMaskPHISC:

  case VPCurrentIterationPHISC:

  case VPBranchOnMaskSC:

  case VPDerivedIVSC:

  case VPFirstOrderRecurrencePHISC:

  case VPReductionPHISC:

  case VPScalarIVStepsSC:

  case VPPredInstPHISC:

    return false;

  case VPBlendSC:

  case VPReductionEVLSC:

  case VPReductionSC:

  case VPVectorPointerSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

  case VPWidenPHISC:

  case VPWidenPointerInductionSC:

  case VPWidenSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayWriteToMemory()) &&

           "underlying instruction may write to memory");

    return false;

  }

  default:

    return true;

  }

}


bool VPRecipeBase::mayReadFromMemory() const {

  switch (getVPRecipeID()) {

  case VPExpressionSC:

    return cast<VPExpressionRecipe>(this)->mayReadOrWriteMemory();

  case VPInstructionSC:

    return cast<VPInstruction>(this)->opcodeMayReadOrWriteFromMemory();

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

    return true;

  case VPReplicateSC:

    return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())

        ->mayReadFromMemory();

  case VPWidenCallSC:

    return !cast<VPWidenCallRecipe>(this)

                ->getCalledScalarFunction()

                ->onlyWritesMemory();

  case VPWidenMemIntrinsicSC:

  case VPWidenIntrinsicSC:

    return cast<VPWidenIntrinsicRecipe>(this)->mayReadFromMemory();

  case VPBranchOnMaskSC:

  case VPDerivedIVSC:

  case VPCurrentIterationPHISC:

  case VPFirstOrderRecurrencePHISC:

  case VPReductionPHISC:

  case VPPredInstPHISC:

  case VPScalarIVStepsSC:

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    return false;

  case VPBlendSC:

  case VPReductionEVLSC:

  case VPReductionSC:

  case VPVectorPointerSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenPHISC:

  case VPWidenPointerInductionSC:

  case VPWidenSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayReadFromMemory()) &&

           "underlying instruction may read from memory");

    return false;

  }

  default:

    // FIXME: Return false if the recipe represents an interleaved store.

    return true;

  }

}


bool VPRecipeBase::mayHaveSideEffects() const {

  switch (getVPRecipeID()) {

  case VPExpressionSC:

    return cast<VPExpressionRecipe>(this)->mayHaveSideEffects();

  case VPActiveLaneMaskPHISC:

  case VPDerivedIVSC:

  case VPCurrentIterationPHISC:

  case VPFirstOrderRecurrencePHISC:

  case VPReductionPHISC:

  case VPPredInstPHISC:

  case VPVectorEndPointerSC:

    return false;

  case VPInstructionSC: {

    auto *VPI = cast<VPInstruction>(this);

    return mayWriteToMemory() ||

           VPI->getOpcode() == VPInstruction::BranchOnCount ||

           VPI->getOpcode() == VPInstruction::BranchOnCond ||

           VPI->getOpcode() == VPInstruction::BranchOnTwoConds;

  }

  case VPWidenCallSC: {

    Function *Fn = cast<VPWidenCallRecipe>(this)->getCalledScalarFunction();

    return mayWriteToMemory() || !Fn->doesNotThrow() || !Fn->willReturn();

  }

  case VPWidenMemIntrinsicSC:

  case VPWidenIntrinsicSC:

    return cast<VPWidenIntrinsicRecipe>(this)->mayHaveSideEffects();

  case VPBlendSC:

  case VPReductionEVLSC:

  case VPReductionSC:

  case VPScalarIVStepsSC:

  case VPVectorPointerSC:

  case VPWidenCanonicalIVSC:

  case VPWidenCastSC:

  case VPWidenGEPSC:

  case VPWidenIntOrFpInductionSC:

  case VPWidenPHISC:

  case VPWidenPointerInductionSC:

  case VPWidenSC: {

    const Instruction *I =

        dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());

    (void)I;

    assert((!I || !I->mayHaveSideEffects()) &&

           "underlying instruction has side-effects");

    return false;

  }

  case VPInterleaveEVLSC:

  case VPInterleaveSC:

    return mayWriteToMemory();

  case VPWidenLoadEVLSC:

  case VPWidenLoadSC:

  case VPWidenStoreEVLSC:

  case VPWidenStoreSC:

    assert(

        cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==

            mayWriteToMemory() &&

        "mayHaveSideffects result for ingredient differs from this "

        "implementation");

    return mayWriteToMemory();

  case VPReplicateSC: {

    auto *R = cast<VPReplicateRecipe>(this);

    return R->getUnderlyingInstr()->mayHaveSideEffects();

  }

  default:

    return true;

  }

}


bool VPRecipeBase::isSafeToSpeculativelyExecute() const {

  switch (getVPRecipeID()) {

  default:

    return false;

  case VPInstructionSC: {

    unsigned Opcode = cast<VPInstruction>(this)->getOpcode();

    if (Instruction::isCast(Opcode))

      return true;


    switch (Opcode) {

    default:

      return false;

    case Instruction::Add:

    case Instruction::Sub:

    case Instruction::Mul:

    case Instruction::GetElementPtr:

      return true;

    }

  }

  }

}


void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(InsertPos->getParent() &&

         "Insertion position not in any VPBasicBlock");

  InsertPos->getParent()->insert(this, InsertPos->getIterator());

}


void VPRecipeBase::insertBefore(VPBasicBlock &BB,

                                iplist<VPRecipeBase>::iterator I) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(I == BB.end() || I->getParent() == &BB);

  BB.insert(this, I);

}


void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {

  assert(!Parent && "Recipe already in some VPBasicBlock");

  assert(InsertPos->getParent() &&

         "Insertion position not in any VPBasicBlock");

  InsertPos->getParent()->insert(this, std::next(InsertPos->getIterator()));

}


void VPRecipeBase::removeFromParent() {

  assert(getParent() && "Recipe not in any VPBasicBlock");

  getParent()->getRecipeList().remove(getIterator());

  Parent = nullptr;

}


iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {

  assert(getParent() && "Recipe not in any VPBasicBlock");

  return getParent()->getRecipeList().erase(getIterator());

}


void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {

  removeFromParent();

  insertAfter(InsertPos);

}


void VPRecipeBase::moveBefore(VPBasicBlock &BB,

                              iplist<VPRecipeBase>::iterator I) {

  removeFromParent();

  insertBefore(BB, I);

}


InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {

  // Get the underlying instruction for the recipe, if there is one. It is used

  // to

  //   * decide if cost computation should be skipped for this recipe,

  //   * apply forced target instruction cost.

  Instruction *UI = nullptr;

  if (auto *S = dyn_cast<VPSingleDefRecipe>(this))

    UI = dyn_cast_or_null<Instruction>(S->getUnderlyingValue());

  else if (auto *IG = dyn_cast<VPInterleaveBase>(this))

    UI = IG->getInsertPos();

  else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))

    UI = &WidenMem->getIngredient();


  InstructionCost RecipeCost;

  if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {

    RecipeCost = 0;

  } else {

    RecipeCost = computeCost(VF, Ctx);

    if (ForceTargetInstructionCost.getNumOccurrences() > 0 &&

        RecipeCost.isValid()) {

      if (UI)

        RecipeCost = InstructionCost(ForceTargetInstructionCost);

      else

        RecipeCost = InstructionCost(0);

    }

  }


  LLVM_DEBUG({

    dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": ";

    dump();

  });

  return RecipeCost;

}


InstructionCost VPRecipeBase::computeCost(ElementCount VF,

                                          VPCostContext &Ctx) const {

  llvm_unreachable("subclasses should implement computeCost");

}


bool VPRecipeBase::isPhi() const {

  return (getVPRecipeID() >= VPFirstPHISC && getVPRecipeID() <= VPLastPHISC) ||

         isa<VPPhi, VPIRPhi>(this);

}


void VPIRFlags::intersectFlags(const VPIRFlags &Other) {

  assert(OpType == Other.OpType && "OpType must match");

  switch (OpType) {

  case OperationType::OverflowingBinOp:

    WrapFlags.HasNUW &= Other.WrapFlags.HasNUW;

    WrapFlags.HasNSW &= Other.WrapFlags.HasNSW;

    break;

  case OperationType::Trunc:

    TruncFlags.HasNUW &= Other.TruncFlags.HasNUW;

    TruncFlags.HasNSW &= Other.TruncFlags.HasNSW;

    break;

  case OperationType::DisjointOp:

    DisjointFlags.IsDisjoint &= Other.DisjointFlags.IsDisjoint;

    break;

  case OperationType::PossiblyExactOp:

    ExactFlags.IsExact &= Other.ExactFlags.IsExact;

    break;

  case OperationType::GEPOp:

    GEPFlagsStorage &= Other.GEPFlagsStorage;

    break;

  case OperationType::FPMathOp:

  case OperationType::FCmp:

    assert((OpType != OperationType::FCmp ||

            FCmpFlags.CmpPredStorage == Other.FCmpFlags.CmpPredStorage) &&

           "Cannot drop CmpPredicate");

    getFMFsRef().NoNaNs &= Other.getFMFsRef().NoNaNs;

    getFMFsRef().NoInfs &= Other.getFMFsRef().NoInfs;

    break;

  case OperationType::NonNegOp:

    NonNegFlags.NonNeg &= Other.NonNegFlags.NonNeg;

    break;

  case OperationType::Cmp:

    assert(CmpPredStorage == Other.CmpPredStorage &&

           "Cannot drop CmpPredicate");

    break;

  case OperationType::ReductionOp:

    assert(ReductionFlags.Kind == Other.ReductionFlags.Kind &&

           "Cannot change RecurKind");

    assert(ReductionFlags.IsOrdered == Other.ReductionFlags.IsOrdered &&

           "Cannot change IsOrdered");

    assert(ReductionFlags.IsInLoop == Other.ReductionFlags.IsInLoop &&

           "Cannot change IsInLoop");

    getFMFsRef().NoNaNs &= Other.getFMFsRef().NoNaNs;

    getFMFsRef().NoInfs &= Other.getFMFsRef().NoInfs;

    break;

  case OperationType::Other:

    break;

  }

}


FastMathFlags VPIRFlags::getFastMathFlags() const {

  assert((OpType == OperationType::FPMathOp || OpType == OperationType::FCmp ||

          OpType == OperationType::ReductionOp ||

          OpType == OperationType::Other) &&

         "recipe doesn't have fast math flags");

  if (OpType == OperationType::Other)

    return FastMathFlags();

  const FastMathFlagsTy &F = getFMFsRef();

  FastMathFlags Res;

  Res.setAllowReassoc(F.AllowReassoc);

  Res.setNoNaNs(F.NoNaNs);

  Res.setNoInfs(F.NoInfs);

  Res.setNoSignedZeros(F.NoSignedZeros);

  Res.setAllowReciprocal(F.AllowReciprocal);

  Res.setAllowContract(F.AllowContract);

  Res.setApproxFunc(F.ApproxFunc);

  return Res;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

void VPSingleDefRecipe::dump() const { VPRecipeBase::dump(); }


void VPRecipeBase::print(raw_ostream &O, const Twine &Indent,

                         VPSlotTracker &SlotTracker) const {

  printRecipe(O, Indent, SlotTracker);

  if (auto DL = getDebugLoc()) {

    O << ", !dbg ";

    DL.print(O);

  }


  if (auto *Metadata = dyn_cast<VPIRMetadata>(this))

    Metadata->print(O, SlotTracker);

}


#endif


VPExpandSCEVRecipe::VPExpandSCEVRecipe(const SCEV *Expr)

    : VPSingleDefRecipe(VPRecipeBase::VPExpandSCEVSC, {}, Expr->getType()),

      Expr(Expr) {}


/// For call VPInstruction operands, return the operand index of the called

/// function. The function is either the last operand (for unmasked calls) or

/// the second-to-last operand (for masked calls).


static unsigned getCalledFnOperandIndex(ArrayRef<VPValue *> Operands) {

  unsigned NumOps = Operands.size();

  auto *LastOp = dyn_cast<VPIRValue>(Operands[NumOps - 1]);

  if (LastOp && isa<Function>(LastOp->getValue()))

    return NumOps - 1;

  assert(isa<Function>(cast<VPIRValue>(Operands[NumOps - 2])->getValue()) &&

         "expected function operand");

  return NumOps - 2;

}


/// For call VPInstruction operands, return the called function.


static Function *getCalledFunction(ArrayRef<VPValue *> Operands) {

  unsigned Idx = getCalledFnOperandIndex(Operands);

  return cast<Function>(cast<VPIRValue>(Operands[Idx])->getValue());

}


Type *llvm::computeScalarTypeForInstruction(unsigned Opcode,

                                            ArrayRef<VPValue *> Operands) {

  assert(!Operands.empty() &&

         "zero-operand VPInstruction opcodes must pass explicit ResultTy");

  // Assert operand \p Idx (if present and typed) has type \p ExpectedTy.

  [[maybe_unused]] auto AssertOperandType = [&Operands](unsigned Idx,

                                                        Type *ExpectedTy) {

    if (!ExpectedTy || Operands.size() <= Idx)

      return;

    [[maybe_unused]] Type *OpTy = Operands[Idx]->getScalarType();

    assert((!OpTy || OpTy == ExpectedTy) &&

           "different types inferred for different operands");

  };


  Type *Op0Ty = Operands[0]->getScalarType();

  LLVMContext &Ctx = Op0Ty->getContext();

  switch (Opcode) {

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::BranchOnCount:

  case Instruction::Store:

  case Instruction::Switch:

    return Type::getVoidTy(Ctx);

  case Instruction::ICmp:

  case Instruction::FCmp:

  case VPInstruction::ActiveLaneMask:

    AssertOperandType(1, Op0Ty);

    return IntegerType::get(Ctx, 1);

  case VPInstruction::LogicalAnd:

  case VPInstruction::LogicalOr:

  case VPInstruction::MaskedCond:

    assert((!Op0Ty || Op0Ty->isIntegerTy(1)) && "expected bool operand");

    AssertOperandType(1, Op0Ty);

    return IntegerType::get(Ctx, 1);

  case VPInstruction::ExplicitVectorLength:

    return IntegerType::get(Ctx, 32);

  case Instruction::Select: {

    Type *Op1Ty = Operands[1]->getScalarType();

    AssertOperandType(2, Op1Ty);

    return Op1Ty;

  }

  case VPInstruction::ExtractLane: {

    assert(Operands.size() >= 2 && "ExtractLane requires a lane operand and "

                                   "at least one source vector operand");

    Type *Op1Ty = Operands[1]->getScalarType();

    for (unsigned Idx = 2; Idx != Operands.size(); ++Idx)

      AssertOperandType(Idx, Op1Ty);

    return Op1Ty;

  }

  case Instruction::ExtractValue: {

    assert(Operands.size() == 2 && "expected single level extractvalue");

    auto *StructTy = cast<StructType>(Op0Ty);

    return StructTy->getTypeAtIndex(

        cast<VPConstantInt>(Operands[1])->getZExtValue());

  }

  case VPInstruction::FirstActiveLane:

  case VPInstruction::LastActiveLane:

  case VPInstruction::NumActiveLanes:

  case VPInstruction::IncomingAliasMask:

  case Instruction::Load:

  case Instruction::Alloca:

    llvm_unreachable("type must be passed explicitly");

  case Instruction::Call:

    return getCalledFunction(Operands)->getReturnType();

  default:

    break;

  }


  // Opcodes that require all operands to share the same scalar type as the

  // result.

  bool AllOperandsSameType =

      Instruction::isBinaryOp(Opcode) ||

      is_contained({VPInstruction::FirstOrderRecurrenceSplice,

                    VPInstruction::CalculateTripCountMinusVF,

                    VPInstruction::CanonicalIVIncrementForPart,

                    VPInstruction::AnyOf, VPInstruction::BuildVector,

                    VPInstruction::BuildStructVector},

                   Opcode);

  if (AllOperandsSameType)

    for (unsigned Idx = 1; Idx != Operands.size(); ++Idx)

      AssertOperandType(Idx, Op0Ty);


  return Op0Ty;

}


Type *VPReplicateRecipe::computeScalarType(const Instruction *I,

                                           ArrayRef<VPValue *> Operands) {

  unsigned Opcode = I->getOpcode();

  if (Instruction::isCast(Opcode) ||

      is_contained(ArrayRef<unsigned>({Instruction::ExtractValue,

                                       Instruction::Load, Instruction::Alloca}),

                   Opcode))

    return I->getType();

  return computeScalarTypeForInstruction(Opcode, Operands);

}


VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,

                             const VPIRFlags &Flags, const VPIRMetadata &MD,

                             DebugLoc DL, const Twine &Name, Type *ResultTy)

    : VPRecipeWithIRFlags(

          VPRecipeBase::VPInstructionSC, Operands,

          ResultTy ? ResultTy

                   : computeScalarTypeForInstruction(Opcode, Operands),

          Flags, DL),

      VPIRMetadata(MD), Opcode(Opcode), Name(Name.str()) {

  assert(flagsValidForOpcode(getOpcode()) &&

         "Set flags not supported for the provided opcode");

  assert(hasRequiredFlagsForOpcode(getOpcode()) &&

         "Opcode requires specific flags to be set");

  assert((getNumOperandsForOpcode() == -1u ||

          getNumOperandsForOpcode() == getNumOperands() ||

          (isMasked() && getNumOperandsForOpcode() + 1 == getNumOperands())) &&

         "number of operands does not match opcode");

}


unsigned VPInstruction::getNumOperandsForOpcode() const {

  if (Instruction::isUnaryOp(Opcode) || Instruction::isCast(Opcode))

    return 1;


  if (Instruction::isBinaryOp(Opcode))

    return 2;


  switch (Opcode) {

  case VPInstruction::StepVector:

  case VPInstruction::VScale:

  case VPInstruction::IncomingAliasMask:

    return 0;

  case Instruction::Alloca:

  case Instruction::ExtractValue:

  case Instruction::Freeze:

  case Instruction::Load:

  case VPInstruction::BranchOnCond:

  case VPInstruction::Broadcast:

  case VPInstruction::ExitingIVValue:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::ExtractLastLane:

  case VPInstruction::ExtractLastPart:

  case VPInstruction::ExtractPenultimateElement:

  case VPInstruction::MaskedCond:

  case VPInstruction::Not:

  case VPInstruction::ResumeForEpilogue:

  case VPInstruction::Reverse:

  case VPInstruction::Unpack:

  case VPInstruction::NumActiveLanes:

    return 1;

  case Instruction::ICmp:

  case Instruction::FCmp:

  case Instruction::ExtractElement:

  case Instruction::Store:

  case VPInstruction::BranchOnCount:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::FirstOrderRecurrenceSplice:

  case VPInstruction::LogicalAnd:

  case VPInstruction::LogicalOr:

  case VPInstruction::PtrAdd:

  case VPInstruction::WidePtrAdd:

  case VPInstruction::WideIVStep:

  case VPInstruction::CalculateTripCountMinusVF:

    return 2;

  case Instruction::InsertElement:

  case Instruction::Select:

  case VPInstruction::ActiveLaneMask:

  case VPInstruction::ReductionStartVector:

    return 3;

  case Instruction::Call:

    return getCalledFnOperandIndex(ArrayRef<VPValue *>(op_begin(), op_end())) +

           1;

  case Instruction::GetElementPtr:

  case Instruction::PHI:

  case Instruction::Switch:

  case VPInstruction::AnyOf:

  case VPInstruction::BuildStructVector:

  case VPInstruction::BuildVector:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::ComputeReductionResult:

  case VPInstruction::FirstActiveLane:

  case VPInstruction::LastActiveLane:

  case VPInstruction::ExtractLane:

  case VPInstruction::ExtractLastActive:

    // Cannot determine the number of operands from the opcode.

    return -1u;

  }

  llvm_unreachable("all cases should be handled above");

}


bool VPInstruction::doesGeneratePerAllLanes() const {

  return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(this);

}


bool VPInstruction::canGenerateScalarForFirstLane() const {

  if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))

    return true;

  if (isSingleScalar() || isVectorToScalar())

    return true;

  switch (Opcode) {

  case Instruction::Freeze:

  case Instruction::ICmp:

  case Instruction::PHI:

  case Instruction::Select:

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::BranchOnCount:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::PtrAdd:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::AnyOf:

  case VPInstruction::Not:

    return true;

  default:

    return false;

  }

}


static Instruction::BinaryOps getSubRecurOpcode(RecurKind Kind) {

  if (Kind == RecurKind::Sub)

    return Instruction::Add;

  if (Kind == RecurKind::FSub)

    return Instruction::FAdd;

  llvm_unreachable("RecurKind should be Sub/FSub.");

}


Value *VPInstruction::generate(VPTransformState &State) {

  IRBuilderBase &Builder = State.Builder;


  if (Instruction::isBinaryOp(getOpcode())) {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);

    Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);

    auto *Res =

        Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);

    if (auto *I = dyn_cast<Instruction>(Res))

      applyFlags(*I);

    return Res;

  }


  switch (getOpcode()) {

  case VPInstruction::Not: {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);

    return Builder.CreateNot(A, Name);

  }

  case Instruction::ExtractElement: {

    assert(State.VF.isVector() && "Only extract elements from vectors");

    if (auto *Idx = dyn_cast<VPConstantInt>(getOperand(1)))

      return State.get(getOperand(0), VPLane(Idx->getZExtValue()));

    Value *Vec = State.get(getOperand(0));

    Value *Idx = State.get(getOperand(1), /*IsScalar=*/true);

    return Builder.CreateExtractElement(Vec, Idx, Name);

  }

  case Instruction::InsertElement: {

    assert(State.VF.isVector() && "Can only insert elements into vectors");

    Value *Vec = State.get(getOperand(0), /*IsScalar=*/false);

    Value *Elt = State.get(getOperand(1), /*IsScalar=*/true);

    Value *Idx = State.get(getOperand(2), /*IsScalar=*/true);

    return Builder.CreateInsertElement(Vec, Elt, Idx, Name);

  }

  case Instruction::Freeze: {

    Value *Op = State.get(getOperand(0), vputils::onlyFirstLaneUsed(this));

    return Builder.CreateFreeze(Op, Name);

  }

  case Instruction::FCmp:

  case Instruction::ICmp: {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);

    Value *B = State.get(getOperand(1), OnlyFirstLaneUsed);

    return Builder.CreateCmp(getPredicate(), A, B, Name);

  }

  case Instruction::PHI: {

    llvm_unreachable("should be handled by VPPhi::execute");

  }

  case Instruction::Select: {

    bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);

    Value *Cond =

        State.get(getOperand(0),

                  OnlyFirstLaneUsed || vputils::isSingleScalar(getOperand(0)));

    Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed);

    Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed);

    return Builder.CreateSelectFMF(Cond, Op1, Op2, getFastMathFlags(), Name);

  }

  case VPInstruction::ActiveLaneMask: {

    // Get first lane of vector induction variable.

    Value *VIVElem0 = State.get(getOperand(0), VPLane(0));

    // Get the original loop tripcount.

    Value *ScalarTC = State.get(getOperand(1), VPLane(0));


    // If this part of the active lane mask is scalar, generate the CMP directly

    // to avoid unnecessary extracts.

    if (State.VF.isScalar())

      return Builder.CreateCmp(CmpInst::Predicate::ICMP_ULT, VIVElem0, ScalarTC,

                               Name);


    ElementCount EC = State.VF.multiplyCoefficientBy(

        cast<VPConstantInt>(getOperand(2))->getZExtValue());

    auto *PredTy = VectorType::get(Builder.getInt1Ty(), EC);

    return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,

                                   {PredTy, ScalarTC->getType()},

                                   {VIVElem0, ScalarTC}, nullptr, Name);

  }

  case VPInstruction::NumActiveLanes: {

    Value *Op = State.get(getOperand(0));

    auto *VecTy = cast<VectorType>(Op->getType());

    assert(VecTy->getScalarSizeInBits() == 1 &&

           "NumActiveLanes only implemented for i1 vectors");


    Type *Ty = getScalarType();

    Value *ZExt = Builder.CreateCast(

        Instruction::ZExt, Op, VectorType::get(Ty, VecTy->getElementCount()));

    Value *NumActive =

        Builder.CreateUnaryIntrinsic(Intrinsic::vector_reduce_add, ZExt);

    return NumActive;

  }

  case VPInstruction::FirstOrderRecurrenceSplice: {

    // Generate code to combine the previous and current values in vector v3.

    //

    //   vector.ph:

    //     v_init = vector(..., ..., ..., a[-1])

    //     br vector.body

    //

    //   vector.body

    //     i = phi [0, vector.ph], [i+4, vector.body]

    //     v1 = phi [v_init, vector.ph], [v2, vector.body]

    //     v2 = a[i, i+1, i+2, i+3];

    //     v3 = vector(v1(3), v2(0, 1, 2))


    auto *V1 = State.get(getOperand(0));

    if (!V1->getType()->isVectorTy())

      return V1;

    Value *V2 = State.get(getOperand(1));

    return Builder.CreateVectorSpliceRight(V1, V2, 1, Name);

  }

  case VPInstruction::CalculateTripCountMinusVF: {

    Value *ScalarTC = State.get(getOperand(0), VPLane(0));

    Value *VFxUF = State.get(getOperand(1), VPLane(0));

    Value *Sub = Builder.CreateSub(ScalarTC, VFxUF);

    Value *Cmp =

        Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, VFxUF);

    Value *Zero = ConstantInt::getNullValue(ScalarTC->getType());

    return Builder.CreateSelect(Cmp, Sub, Zero);

  }

  case VPInstruction::ExplicitVectorLength: {

    // TODO: Restructure this code with an explicit remainder loop, vsetvli can

    // be outside of the main loop.

    Value *AVL = State.get(getOperand(0), /*IsScalar*/ true);

    // Compute EVL

    assert(AVL->getType()->isIntegerTy() &&

           "Requested vector length should be an integer.");


    assert(State.VF.isScalable() && "Expected scalable vector factor.");

    Value *VFArg = Builder.getInt32(State.VF.getKnownMinValue());


    Value *EVL = Builder.CreateIntrinsic(

        Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,

        {AVL, VFArg, Builder.getTrue()});

    return EVL;

  }

  case VPInstruction::BranchOnCond: {

    Value *Cond = State.get(getOperand(0), VPLane(0));

    // Replace the temporary unreachable terminator with a new conditional

    // branch, hooking it up to backward destination for latch blocks now, and

    // to forward destination(s) later when they are created.

    // Second successor may be backwards - iff it is already in VPBB2IRBB.

    VPBasicBlock *SecondVPSucc =

        cast<VPBasicBlock>(getParent()->getSuccessors()[1]);

    BasicBlock *SecondIRSucc = State.CFG.VPBB2IRBB.lookup(SecondVPSucc);

    BasicBlock *IRBB = State.CFG.VPBB2IRBB[getParent()];

    auto *Br = Builder.CreateCondBr(Cond, IRBB, SecondIRSucc);

    // First successor is always forward, reset it to nullptr.

    Br->setSuccessor(0, nullptr);

    IRBB->getTerminator()->eraseFromParent();

    applyMetadata(*Br);

    return Br;

  }

  case VPInstruction::Broadcast: {

    return Builder.CreateVectorSplat(

        State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast");

  }

  case VPInstruction::BuildStructVector: {

    // For struct types, we need to build a new 'wide' struct type, where each

    // element is widened, i.e., we create a struct of vectors.

    auto *StructTy = cast<StructType>(getOperand(0)->getScalarType());

    Value *Res = PoisonValue::get(toVectorizedTy(StructTy, State.VF));

    for (const auto &[LaneIndex, Op] : enumerate(operands())) {

      for (unsigned FieldIndex = 0; FieldIndex != StructTy->getNumElements();

           FieldIndex++) {

        Value *ScalarValue =

            Builder.CreateExtractValue(State.get(Op, true), FieldIndex);

        Value *VectorValue = Builder.CreateExtractValue(Res, FieldIndex);

        VectorValue =

            Builder.CreateInsertElement(VectorValue, ScalarValue, LaneIndex);

        Res = Builder.CreateInsertValue(Res, VectorValue, FieldIndex);

      }

    }

    return Res;

  }

  case VPInstruction::BuildVector: {

    auto *ScalarTy = getOperand(0)->getScalarType();

    auto NumOfElements = ElementCount::getFixed(getNumOperands());

    Value *Res = PoisonValue::get(toVectorizedTy(ScalarTy, NumOfElements));

    for (const auto &[Idx, Op] : enumerate(operands()))

      Res = Builder.CreateInsertElement(Res, State.get(Op, true),

                                        Builder.getInt32(Idx));

    return Res;

  }

  case VPInstruction::ReductionStartVector: {

    if (State.VF.isScalar())

      return State.get(getOperand(0), true);

    IRBuilderBase::FastMathFlagGuard FMFG(Builder);

    Builder.setFastMathFlags(getFastMathFlags());

    // If this start vector is scaled then it should produce a vector with fewer

    // elements than the VF.

    ElementCount VF = State.VF.divideCoefficientBy(

        cast<VPConstantInt>(getOperand(2))->getZExtValue());

    auto *Iden = Builder.CreateVectorSplat(VF, State.get(getOperand(1), true));

    return Builder.CreateInsertElement(Iden, State.get(getOperand(0), true),

                                       Builder.getInt32(0));

  }

  case VPInstruction::ComputeReductionResult: {

    RecurKind RK = getRecurKind();

    bool IsOrdered = isReductionOrdered();

    bool IsInLoop = isReductionInLoop();

    assert(!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&

           "FindIV should use min/max reduction kinds");


    // The recipe may have multiple operands to be reduced together.

    unsigned NumOperandsToReduce = getNumOperands();

    VectorParts RdxParts(NumOperandsToReduce);

    for (unsigned Part = 0; Part < NumOperandsToReduce; ++Part)

      RdxParts[Part] = State.get(getOperand(Part), IsInLoop);


    IRBuilderBase::FastMathFlagGuard FMFG(Builder);

    Builder.setFastMathFlags(getFastMathFlags());


    // Reduce multiple operands into one.

    Value *ReducedPartRdx = RdxParts[0];

    if (IsOrdered) {

      ReducedPartRdx = RdxParts[NumOperandsToReduce - 1];

    } else {

      // Floating-point operations should have some FMF to enable the reduction.

      for (unsigned Part = 1; Part < NumOperandsToReduce; ++Part) {

        Value *RdxPart = RdxParts[Part];

        if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))

          ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);

        else {

          // For sub-recurrences, each part's reduction variable is already

          // negative, we need to do: reduce.add(-acc_uf0 + -acc_uf1)

          Instruction::BinaryOps Opcode =

              RecurrenceDescriptor::isSubRecurrenceKind(RK)

                  ? getSubRecurOpcode(RK)

                  : (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(RK);

          ReducedPartRdx =

              Builder.CreateBinOp(Opcode, RdxPart, ReducedPartRdx, "bin.rdx");

        }

      }

    }


    // Create the reduction after the loop. Note that inloop reductions create

    // the target reduction in the loop using a Reduction recipe.

    if (State.VF.isVector() && !IsInLoop) {

      // TODO: Support in-order reductions based on the recurrence descriptor.

      // All ops in the reduction inherit fast-math-flags from the recurrence

      // descriptor.

      ReducedPartRdx = createSimpleReduction(Builder, ReducedPartRdx, RK);

    }


    return ReducedPartRdx;

  }

  case VPInstruction::ExtractLastLane:

  case VPInstruction::ExtractPenultimateElement: {

    unsigned Offset =

        getOpcode() == VPInstruction::ExtractPenultimateElement ? 2 : 1;

    Value *Res;

    if (State.VF.isVector()) {

      assert(Offset <= State.VF.getKnownMinValue() &&

             "invalid offset to extract from");

      // Extract lane VF - Offset from the operand.

      Res = State.get(getOperand(0), VPLane::getLaneFromEnd(State.VF, Offset));

    } else {

      // TODO: Remove ExtractLastLane for scalar VFs.

      assert(Offset <= 1 && "invalid offset to extract from");

      Res = State.get(getOperand(0));

    }

    if (isa<ExtractElementInst>(Res))

      Res->setName(Name);

    return Res;

  }

  case VPInstruction::LogicalAnd: {

    Value *A = State.get(getOperand(0));

    Value *B = State.get(getOperand(1));

    return Builder.CreateLogicalAnd(A, B, Name);

  }

  case VPInstruction::LogicalOr: {

    Value *A = State.get(getOperand(0));

    Value *B = State.get(getOperand(1));

    return Builder.CreateLogicalOr(A, B, Name);

  }

  case VPInstruction::PtrAdd: {

    assert((State.VF.isScalar() || vputils::onlyFirstLaneUsed(this)) &&

           "can only generate first lane for PtrAdd");

    Value *Ptr = State.get(getOperand(0), VPLane(0));

    Value *Addend = State.get(getOperand(1), VPLane(0));

    return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());

  }

  case VPInstruction::WidePtrAdd: {

    Value *Ptr =

        State.get(getOperand(0), vputils::isSingleScalar(getOperand(0)));

    Value *Addend = State.get(getOperand(1));

    return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());

  }

  case VPInstruction::AnyOf: {

    Value *Res = Builder.CreateFreeze(State.get(getOperand(0)));

    for (VPValue *Op : drop_begin(operands()))

      Res = Builder.CreateOr(Res, Builder.CreateFreeze(State.get(Op)));

    return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);

  }

  case VPInstruction::ExtractLane: {

    assert(getNumOperands() != 2 && "ExtractLane from single source should be "

                                    "simplified to ExtractElement.");

    Value *LaneToExtract = State.get(getOperand(0), true);

    Type *IdxTy = getOperand(0)->getScalarType();

    Value *Res = nullptr;

    Value *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);


    for (unsigned Idx = 1; Idx != getNumOperands(); ++Idx) {

      Value *VectorStart =

          Builder.CreateMul(RuntimeVF, ConstantInt::get(IdxTy, Idx - 1));

      Value *VectorIdx = Idx == 1

                             ? LaneToExtract

                             : Builder.CreateSub(LaneToExtract, VectorStart);

      Value *Ext = State.VF.isScalar()

                       ? State.get(getOperand(Idx))

                       : Builder.CreateExtractElement(

                             State.get(getOperand(Idx)), VectorIdx);

      if (Res) {

        Value *Cmp = Builder.CreateICmpUGE(LaneToExtract, VectorStart);

        Res = Builder.CreateSelect(Cmp, Ext, Res);

      } else {

        Res = Ext;

      }

    }

    return Res;

  }

  case VPInstruction::FirstActiveLane: {

    Type *Ty = this->getScalarType();

    if (getNumOperands() == 1) {

      Value *Mask = State.get(getOperand(0));

      return Builder.CreateCountTrailingZeroElems(Ty, Mask,

                                                  /*ZeroIsPoison=*/false, Name);

    }

    // If there are multiple operands, create a chain of selects to pick the

    // first operand with an active lane and add the number of lanes of the

    // preceding operands.

    Value *RuntimeVF = getRuntimeVF(Builder, Ty, State.VF);

    unsigned LastOpIdx = getNumOperands() - 1;

    Value *Res = nullptr;

    for (int Idx = LastOpIdx; Idx >= 0; --Idx) {

      Value *TrailingZeros =

          State.VF.isScalar()

              ? Builder.CreateZExt(

                    Builder.CreateICmpEQ(State.get(getOperand(Idx)),

                                         Builder.getFalse()),

                    Ty)

              : Builder.CreateCountTrailingZeroElems(

                    Ty, State.get(getOperand(Idx)),

                    /*ZeroIsPoison=*/false, Name);

      Value *Current = Builder.CreateAdd(

          Builder.CreateMul(RuntimeVF, ConstantInt::get(Ty, Idx)),

          TrailingZeros);

      if (Res) {

        Value *Cmp = Builder.CreateICmpNE(TrailingZeros, RuntimeVF);

        Res = Builder.CreateSelect(Cmp, Current, Res);

      } else {

        Res = Current;

      }

    }


    return Res;

  }

  case VPInstruction::ResumeForEpilogue:

    return State.get(getOperand(0), true);

  case VPInstruction::Reverse:

    return Builder.CreateVectorReverse(State.get(getOperand(0)), "reverse");

  case VPInstruction::ExtractLastActive: {

    Value *Result = State.get(getOperand(0), /*IsScalar=*/true);

    for (unsigned Idx = 1; Idx < getNumOperands(); Idx += 2) {

      Value *Data = State.get(getOperand(Idx));

      Value *Mask = State.get(getOperand(Idx + 1));

      Type *VTy = Data->getType();


      if (State.VF.isScalar())

        Result = Builder.CreateSelect(Mask, Data, Result);

      else

        Result = Builder.CreateIntrinsic(

            Intrinsic::experimental_vector_extract_last_active, {VTy},

            {Data, Mask, Result});

    }


    return Result;

  }

  default:

    llvm_unreachable("Unsupported opcode for instruction");

  }

}


InstructionCost VPRecipeWithIRFlags::getCostForRecipeWithOpcode(

    unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const {

  Type *ScalarTy = this->getScalarType();

  Type *ResultTy = VF.isVector() ? toVectorTy(ScalarTy, VF) : ScalarTy;

  switch (Opcode) {

  case Instruction::FNeg:

    return Ctx.TTI.getArithmeticInstrCost(Opcode, ResultTy, Ctx.CostKind);

  case Instruction::UDiv:

  case Instruction::SDiv:

  case Instruction::SRem:

  case Instruction::URem:

  case Instruction::Add:

  case Instruction::FAdd:

  case Instruction::Sub:

  case Instruction::FSub:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor: {

    // Certain instructions can be cheaper if they have a constant second

    // operand. One example of this are shifts on x86.

    VPValue *RHS = getOperand(1);

    TargetTransformInfo::OperandValueInfo RHSInfo = Ctx.getOperandInfo(RHS);


    if (RHSInfo.Kind == TargetTransformInfo::OK_AnyValue &&

        getOperand(1)->isDefinedOutsideLoopRegions())

      RHSInfo.Kind = TargetTransformInfo::OK_UniformValue;


    Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());

    SmallVector<const Value *, 4> Operands;

    if (CtxI)

      Operands.append(CtxI->value_op_begin(), CtxI->value_op_end());

    return Ctx.TTI.getArithmeticInstrCost(

        Opcode, ResultTy, Ctx.CostKind,

        {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},

        RHSInfo, Operands, CtxI, &Ctx.TLI);

  }

  case Instruction::Freeze:

    // NOTE: The only way to ask for the cost is via getInstructionCost, which

    // requires the actual vector instruction. Instead, both here and in the

    // LoopVectorizationCostModel::getInstructionCost the costs mirror the

    // current behaviour in llvm/Analysis/TargetTransformInfoImpl.h to keep

    // them in sync.

    return TTI::TCC_Free;

  case Instruction::ExtractValue:

    return Ctx.TTI.getInsertExtractValueCost(Instruction::ExtractValue,

                                             Ctx.CostKind);

  case Instruction::ICmp:

  case Instruction::FCmp: {

    Type *ScalarOpTy = getOperand(0)->getScalarType();

    Type *OpTy = VF.isVector() ? toVectorTy(ScalarOpTy, VF) : ScalarOpTy;

    Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());

    return Ctx.TTI.getCmpSelInstrCost(

        Opcode, OpTy, CmpInst::makeCmpResultType(OpTy), getPredicate(),

        Ctx.CostKind, {TTI::OK_AnyValue, TTI::OP_None},

        {TTI::OK_AnyValue, TTI::OP_None}, CtxI);

  }

  case Instruction::BitCast: {

    Type *ScalarTy = this->getScalarType();

    if (ScalarTy->isPointerTy())

      return 0;

    [[fallthrough]];

  }

  case Instruction::SExt:

  case Instruction::ZExt:

  case Instruction::FPToUI:

  case Instruction::FPToSI:

  case Instruction::FPExt:

  case Instruction::PtrToInt:

  case Instruction::PtrToAddr:

  case Instruction::IntToPtr:

  case Instruction::SIToFP:

  case Instruction::UIToFP:

  case Instruction::Trunc:

  case Instruction::FPTrunc:

  case Instruction::AddrSpaceCast: {

    // Computes the CastContextHint from a recipe that may access memory.

    auto ComputeCCH = [&](const VPRecipeBase *R) -> TTI::CastContextHint {

      if (isa<VPInterleaveBase>(R))

        return TTI::CastContextHint::Interleave;

      if (const auto *ReplicateRecipe = dyn_cast<VPReplicateRecipe>(R)) {

        // Only compute CCH for memory operations, matching the legacy model

        // which only considers loads/stores for cast context hints.

        auto *UI = cast<Instruction>(ReplicateRecipe->getUnderlyingValue());

        if (!isa<LoadInst, StoreInst>(UI))

          return TTI::CastContextHint::None;

        return ReplicateRecipe->isPredicated() ? TTI::CastContextHint::Masked

                                               : TTI::CastContextHint::Normal;

      }

      const auto *WidenMemoryRecipe = dyn_cast<VPWidenMemoryRecipe>(R);

      if (WidenMemoryRecipe == nullptr)

        return TTI::CastContextHint::None;

      if (VF.isScalar())

        return TTI::CastContextHint::Normal;

      if (!WidenMemoryRecipe->isConsecutive())

        return TTI::CastContextHint::GatherScatter;

      if (WidenMemoryRecipe->isMasked())

        return TTI::CastContextHint::Masked;

      return TTI::CastContextHint::Normal;

    };


    VPValue *Operand = getOperand(0);

    TTI::CastContextHint CCH = TTI::CastContextHint::None;

    bool IsReverse = false;

    // For Trunc/FPTrunc, get the context from the only user.

    if (Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) {

      auto GetOnlyUser = [](const VPSingleDefRecipe *R) -> VPRecipeBase * {

        if (R->getNumUsers() == 0 || R->hasMoreThanOneUniqueUser())

          return nullptr;

        return dyn_cast<VPRecipeBase>(*R->user_begin());

      };

      if (VPRecipeBase *Recipe = GetOnlyUser(this)) {

        if (match(Recipe,

                  m_CombineOr(

                      m_Reverse(m_VPValue()),

                      m_Intrinsic<Intrinsic::experimental_vp_reverse>()))) {

          Recipe = GetOnlyUser(cast<VPSingleDefRecipe>(Recipe));

          IsReverse = true;

        }

        if (Recipe)

          CCH = ComputeCCH(Recipe);

      }

    }

    // For Z/Sext, get the context from the operand.

    else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt ||

             Opcode == Instruction::FPExt) {

      if (auto *Recipe = Operand->getDefiningRecipe()) {

        VPValue *ReverseOp;

        if (match(Recipe,

                  m_CombineOr(m_Reverse(m_VPValue(ReverseOp)),

                              m_Intrinsic<Intrinsic::experimental_vp_reverse>(

                                  m_VPValue(ReverseOp))))) {

          Recipe = ReverseOp->getDefiningRecipe();

          IsReverse = true;

        }

        if (Recipe)

          CCH = ComputeCCH(Recipe);

      }

    }

    if (IsReverse && CCH != TTI::CastContextHint::None)

      CCH = TTI::CastContextHint::Reversed;


    auto *ScalarSrcTy = Operand->getScalarType();

    Type *SrcTy = VF.isVector() ? toVectorTy(ScalarSrcTy, VF) : ScalarSrcTy;

    // Arm TTI will use the underlying instruction to determine the cost.

    return Ctx.TTI.getCastInstrCost(

        Opcode, ResultTy, SrcTy, CCH, Ctx.CostKind,

        dyn_cast_if_present<Instruction>(getUnderlyingValue()));

  }

  case Instruction::Select: {

    SelectInst *SI = cast_or_null<SelectInst>(getUnderlyingValue());

    bool IsScalarCond = getOperand(0)->isDefinedOutsideLoopRegions();

    Type *ScalarTy = this->getScalarType();


    VPValue *Op0, *Op1;

    bool IsLogicalAnd =

        match(this, m_c_LogicalAnd(m_VPValue(Op0), m_VPValue(Op1)));

    bool IsLogicalOr =

        match(this, m_c_LogicalOr(m_VPValue(Op0), m_VPValue(Op1)));

    // Also match the inverted forms:

    // select x, false, y --> !x & y (still AND)

    // select x, y, true --> !x | y (still OR)

    IsLogicalAnd |=

        match(this, m_Select(m_VPValue(Op0), m_False(), m_VPValue(Op1)));

    IsLogicalOr |=

        match(this, m_Select(m_VPValue(Op0), m_VPValue(Op1), m_True()));


    if (!IsScalarCond && ScalarTy->getScalarSizeInBits() == 1 &&

        (IsLogicalAnd || IsLogicalOr)) {

      // select x, y, false --> x & y

      // select x, true, y --> x | y

      const auto [Op1VK, Op1VP] = Ctx.getOperandInfo(Op0);

      const auto [Op2VK, Op2VP] = Ctx.getOperandInfo(Op1);


      SmallVector<const Value *, 2> Operands;

      if (SI && all_of(operands(),

                       [](VPValue *Op) { return Op->getUnderlyingValue(); }))

        append_range(Operands, SI->operands());

      return Ctx.TTI.getArithmeticInstrCost(

          IsLogicalOr ? Instruction::Or : Instruction::And, ResultTy,

          Ctx.CostKind, {Op1VK, Op1VP}, {Op2VK, Op2VP}, Operands, SI);

    }


    Type *CondTy = getOperand(0)->getScalarType();

    if (!IsScalarCond && VF.isVector())

      CondTy = VectorType::get(CondTy, VF);


    llvm::CmpPredicate Pred;

    if (!match(getOperand(0), m_Cmp(Pred, m_VPValue(), m_VPValue())))

      if (auto *CondIRV = dyn_cast<VPIRValue>(getOperand(0)))

        if (auto *Cmp = dyn_cast<CmpInst>(CondIRV->getValue()))

          Pred = Cmp->getPredicate();

    Type *VectorTy = toVectorTy(this->getScalarType(), VF);

    return Ctx.TTI.getCmpSelInstrCost(

        Instruction::Select, VectorTy, CondTy, Pred, Ctx.CostKind,

        {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, SI);

  }

  }

  llvm_unreachable("called for unsupported opcode");

}


InstructionCost VPInstruction::computeCost(ElementCount VF,

                                           VPCostContext &Ctx) const {

  if (Instruction::isBinaryOp(getOpcode())) {

    if (!getUnderlyingValue() && getOpcode() != Instruction::FMul) {

      // TODO: Compute cost for VPInstructions without underlying values once

      // the legacy cost model has been retired.

      return 0;

    }


    assert(!doesGeneratePerAllLanes() &&

           "Should only generate a vector value or single scalar, not scalars "

           "for all lanes.");

    return getCostForRecipeWithOpcode(

        getOpcode(),

        vputils::onlyFirstLaneUsed(this) ? ElementCount::getFixed(1) : VF, Ctx);

  }


  switch (getOpcode()) {

  case Instruction::Select: {

    llvm::CmpPredicate Pred = CmpInst::BAD_ICMP_PREDICATE;

    match(getOperand(0), m_Cmp(Pred, m_VPValue(), m_VPValue()));

    auto *CondTy = getOperand(0)->getScalarType();

    auto *VecTy = getOperand(1)->getScalarType();

    if (!vputils::onlyFirstLaneUsed(this)) {

      CondTy = toVectorTy(CondTy, VF);

      VecTy = toVectorTy(VecTy, VF);

    }

    return Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VecTy, CondTy, Pred,

                                      Ctx.CostKind);

  }

  case Instruction::ExtractElement:

  case VPInstruction::ExtractLane: {

    if (VF.isScalar()) {

      // ExtractLane with VF=1 takes care of handling extracting across multiple

      // parts.

      return 0;

    }


    // Add on the cost of extracting the element.

    auto *VecTy = toVectorTy(getOperand(0)->getScalarType(), VF);

    return Ctx.TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy,

                                      Ctx.CostKind);

  }

  case VPInstruction::AnyOf: {

    auto *VecTy = toVectorTy(this->getScalarType(), VF);

    return Ctx.TTI.getArithmeticReductionCost(

        Instruction::Or, cast<VectorType>(VecTy), std::nullopt, Ctx.CostKind);

  }

  case VPInstruction::FirstActiveLane: {

    Type *Ty = this->getScalarType();

    Type *ScalarTy = getOperand(0)->getScalarType();

    if (VF.isScalar())

      return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,

                                        CmpInst::makeCmpResultType(ScalarTy),

                                        CmpInst::ICMP_EQ, Ctx.CostKind);

    // Calculate the cost of determining the lane index.

    auto *PredTy = toVectorTy(ScalarTy, VF);

    IntrinsicCostAttributes Attrs(Intrinsic::experimental_cttz_elts, Ty,

                                  {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});

    return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);

  }

  case VPInstruction::LastActiveLane: {

    Type *Ty = this->getScalarType();

    Type *ScalarTy = getOperand(0)->getScalarType();

    if (VF.isScalar())

      return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ScalarTy,

                                        CmpInst::makeCmpResultType(ScalarTy),

                                        CmpInst::ICMP_EQ, Ctx.CostKind);

    // Calculate the cost of determining the lane index: NOT + cttz_elts + SUB.

    auto *PredTy = toVectorTy(ScalarTy, VF);

    IntrinsicCostAttributes Attrs(Intrinsic::experimental_cttz_elts, Ty,

                                  {PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});

    InstructionCost Cost = Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);

    // Add cost of NOT operation on the predicate.

    Cost += Ctx.TTI.getArithmeticInstrCost(

        Instruction::Xor, PredTy, Ctx.CostKind,

        {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},

        {TargetTransformInfo::OK_UniformConstantValue,

         TargetTransformInfo::OP_None});

    // Add cost of SUB operation on the index.

    Cost += Ctx.TTI.getArithmeticInstrCost(Instruction::Sub, Ty, Ctx.CostKind);

    return Cost;

  }

  case VPInstruction::ExtractLastActive: {

    Type *ScalarTy = this->getScalarType();

    Type *VecTy = toVectorTy(ScalarTy, VF);

    Type *MaskTy = toVectorTy(Type::getInt1Ty(Ctx.LLVMCtx), VF);

    IntrinsicCostAttributes ICA(

        Intrinsic::experimental_vector_extract_last_active, ScalarTy,

        {VecTy, MaskTy, ScalarTy});

    return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind);

  }

  case VPInstruction::FirstOrderRecurrenceSplice: {

    assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?");

    Type *VectorTy = toVectorTy(this->getScalarType(), VF);

    return Ctx.TTI.getShuffleCost(

        TargetTransformInfo::SK_Splice, cast<VectorType>(VectorTy),

        cast<VectorType>(VectorTy), {}, Ctx.CostKind, -1);

  }

  case VPInstruction::ActiveLaneMask: {

    Type *ArgTy = getOperand(0)->getScalarType();

    unsigned Multiplier = cast<VPConstantInt>(getOperand(2))->getZExtValue();

    Type *RetTy = toVectorTy(Type::getInt1Ty(Ctx.LLVMCtx), VF * Multiplier);

    IntrinsicCostAttributes Attrs(Intrinsic::get_active_lane_mask, RetTy,

                                  {ArgTy, ArgTy});

    return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);

  }

  case VPInstruction::ExplicitVectorLength: {

    Type *Arg0Ty = getOperand(0)->getScalarType();

    Type *I32Ty = Type::getInt32Ty(Ctx.LLVMCtx);

    Type *I1Ty = Type::getInt1Ty(Ctx.LLVMCtx);

    IntrinsicCostAttributes Attrs(Intrinsic::experimental_get_vector_length,

                                  I32Ty, {Arg0Ty, I32Ty, I1Ty});

    return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);

  }

  case VPInstruction::Reverse: {

    assert(VF.isVector() && "Reverse operation must be vector type");

    Type *EltTy = this->getScalarType();

    // Skip the reverse operation cost for the mask.

    // FIXME: Remove this once redundant mask reverse operations can be

    // eliminated by VPlanTransforms::cse before cost computation.

    if (EltTy->isIntegerTy(1))

      return 0;

    auto *VectorTy = cast<VectorType>(toVectorTy(EltTy, VF));

    return Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy,

                                  VectorTy, /*Mask=*/{}, Ctx.CostKind,

                                  /*Index=*/0);

  }

  case VPInstruction::ExtractLastLane: {

    // Add on the cost of extracting the element.

    auto *VecTy = toVectorTy(getOperand(0)->getScalarType(), VF);

    return Ctx.TTI.getIndexedVectorInstrCostFromEnd(Instruction::ExtractElement,

                                                    VecTy, Ctx.CostKind, 0);

  }

  case VPInstruction::Not: {

    Type *ValTy = this->getScalarType();

    // InstCombine will fold `xor` to the conditional branch.

    if (auto *U = const_cast<VPUser *>(getSingleUser()))

      if (match(U, m_BranchOnCond(m_VPValue())))

        return 0;

    if (!vputils::onlyFirstLaneUsed(this))

      ValTy = toVectorTy(ValTy, VF);

    return Ctx.TTI.getArithmeticInstrCost(Instruction::Xor, ValTy,

                                          Ctx.CostKind);

  }

  case VPInstruction::BranchOnCount: {

    // If TC <= VF then this is just a branch.

    // FIXME: Removing the branch happens in simplifyBranchConditionForVFAndUF

    // where it checks TC <= VF * UF, but we don't know UF yet. This means in

    // some cases we get a cost that's too high due to counting a cmp that

    // later gets removed.

    // FIXME: The compare could also be removed if TC = M * vscale,

    // VF = N * vscale, and M <= N. Detecting that would require having the

    // trip count as a SCEV though.

    Value *TC = getParent()->getPlan()->getTripCount()->getUnderlyingValue();

    ConstantInt *TCConst = dyn_cast_if_present<ConstantInt>(TC);

    if (TCConst && TCConst->getValue().ule(VF.getKnownMinValue()))

      return 0;

    // Otherwise BranchOnCount generates ICmpEQ followed by a branch.

    Type *ValTy = getOperand(0)->getScalarType();

    return Ctx.TTI.getCmpSelInstrCost(Instruction::ICmp, ValTy,

                                      CmpInst::makeCmpResultType(ValTy),

                                      CmpInst::ICMP_EQ, Ctx.CostKind);

  }

  case Instruction::FCmp:

  case Instruction::ICmp:

    return getCostForRecipeWithOpcode(

        getOpcode(),

        vputils::onlyFirstLaneUsed(this) ? ElementCount::getFixed(1) : VF, Ctx);

  case VPInstruction::ExtractPenultimateElement:

    if (VF == ElementCount::getScalable(1))

      return InstructionCost::getInvalid();

    [[fallthrough]];

  default:

    // TODO: Compute cost other VPInstructions once the legacy cost model has

    // been retired.

    assert(!getUnderlyingValue() &&

           "unexpected VPInstruction witht underlying value");

    return 0;

  }

}


bool VPInstruction::isVectorToScalar() const {

  return getOpcode() == VPInstruction::ExtractLastLane ||

         getOpcode() == VPInstruction::ExtractPenultimateElement ||

         getOpcode() == Instruction::ExtractElement ||

         getOpcode() == VPInstruction::ExtractLane ||

         getOpcode() == VPInstruction::FirstActiveLane ||

         getOpcode() == VPInstruction::LastActiveLane ||

         getOpcode() == VPInstruction::ExtractLastActive ||

         getOpcode() == VPInstruction::ComputeReductionResult ||

         getOpcode() == VPInstruction::AnyOf ||

         getOpcode() == VPInstruction::NumActiveLanes;

}


bool VPInstruction::isSingleScalar() const {

  switch (getOpcode()) {

  case Instruction::Load:

  case Instruction::PHI:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::ResumeForEpilogue:

  case VPInstruction::VScale:

    return true;

  default:

    return Instruction::isCast(getOpcode());

  }

}


void VPInstruction::addOperand(VPValue *Op) {

#ifndef NDEBUG

  Type *Ty = Op->getScalarType();

  switch (getOpcode()) {

  case VPInstruction::AnyOf:

  case VPInstruction::FirstActiveLane:

  case VPInstruction::LastActiveLane:

    assert(Ty == getOperand(0)->getScalarType() &&

           "types of operand 0 and new operand must match");

    break;

  case VPInstruction::ComputeReductionResult:

  case VPInstruction::BuildVector:

  case VPInstruction::BuildStructVector:

    assert(Ty == getOperand(0)->getScalarType() &&

           "appended operand must match operand 0's scalar type");

    break;

  case VPInstruction::ExtractLane:

    assert(Ty == getOperand(1)->getScalarType() &&

           "appended operand must match operand 1's scalar type");

    break;

  case VPInstruction::ExtractLastActive: {

    // The recipe is constructed with 3 operands (result, data, mask). Extra

    // operands beyond that are appended in (data, mask) pairs.

    constexpr unsigned NumInitialOperands = 3;

    assert(getNumOperands() >= NumInitialOperands &&

           "ExtractLastActive must have at least the initial 3 operands");

    bool IsMaskSlot = ((getNumOperands() - NumInitialOperands) & 1u) == 1u;

    assert((IsMaskSlot ? Ty->isIntegerTy(1)

                       : Ty == getOperand(1)->getScalarType()) &&

           "ExtractLastActive expects alternating data/mask operands "

           "matching operand 1's type and i1, respectively");

    break;

  }

  default:

    llvm_unreachable("opcode does not support growing the operand list "

                     "outside of construction");

  }

#endif

  VPUser::addOperand(Op);

}


void VPInstruction::execute(VPTransformState &State) {

  assert(!isMasked() && "cannot execute masked VPInstruction");

  IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);

  assert(flagsValidForOpcode(getOpcode()) &&

         "Set flags not supported for the provided opcode");

  assert(hasRequiredFlagsForOpcode(getOpcode()) &&

         "Opcode requires specific flags to be set");

  if (hasFastMathFlags())

    State.Builder.setFastMathFlags(getFastMathFlags());

  Value *GeneratedValue = generate(State);

  if (!hasResult())

    return;

  assert(GeneratedValue && "generate must produce a value");

  bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() &&

                                   (vputils::onlyFirstLaneUsed(this) ||

                                    isVectorToScalar() || isSingleScalar());

  assert((((GeneratedValue->getType()->isVectorTy() ||

            GeneratedValue->getType()->isStructTy()) ==

           !GeneratesPerFirstLaneOnly) ||

          State.VF.isScalar()) &&

         "scalar value but not only first lane defined");

  State.set(this, GeneratedValue,

            /*IsScalar*/ GeneratesPerFirstLaneOnly);

  if (getOpcode() == VPInstruction::ResumeForEpilogue) {

    // FIXME: This is a workaround to enable reliable updates of the scalar loop

    // resume phis, when vectorizing the epilogue. Must be removed once epilogue

    // vectorization explicitly connects VPlans.

    setUnderlyingValue(GeneratedValue);

  }

}


bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {

  if (Instruction::isBinaryOp(getOpcode()) ||

      Instruction::isUnaryOp(getOpcode()) || Instruction::isCast(getOpcode()))

    return false;

  switch (getOpcode()) {

  case Instruction::ExtractValue:

  case Instruction::InsertValue:

  case Instruction::GetElementPtr:

  case Instruction::ExtractElement:

  case Instruction::InsertElement:

  case Instruction::Freeze:

  case Instruction::FCmp:

  case Instruction::ICmp:

  case Instruction::Select:

  case Instruction::PHI:

  case VPInstruction::AnyOf:

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::BranchOnCount:

  case VPInstruction::Broadcast:

  case VPInstruction::BuildStructVector:

  case VPInstruction::BuildVector:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::ExtractLane:

  case VPInstruction::ExtractLastLane:

  case VPInstruction::ExtractLastPart:

  case VPInstruction::ExtractPenultimateElement:

  case VPInstruction::ActiveLaneMask:

  case VPInstruction::IncomingAliasMask:

  case VPInstruction::ExitingIVValue:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::FirstActiveLane:

  case VPInstruction::LastActiveLane:

  case VPInstruction::ExtractLastActive:

  case VPInstruction::FirstOrderRecurrenceSplice:

  case VPInstruction::LogicalAnd:

  case VPInstruction::LogicalOr:

  case VPInstruction::MaskedCond:

  case VPInstruction::Not:

  case VPInstruction::PtrAdd:

  case VPInstruction::WideIVStep:

  case VPInstruction::WidePtrAdd:

  case VPInstruction::StepVector:

  case VPInstruction::ReductionStartVector:

  case VPInstruction::Reverse:

  case VPInstruction::VScale:

  case VPInstruction::Unpack:

    return false;

  case Instruction::Call:

    return !getCalledFunction(ArrayRef<VPValue *>(op_begin(), op_end()))

                ->doesNotAccessMemory();

  default:

    return true;

  }

}


bool VPInstruction::usesFirstLaneOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  if (Instruction::isBinaryOp(getOpcode()) || Instruction::isCast(getOpcode()))

    return vputils::onlyFirstLaneUsed(this);


  switch (getOpcode()) {

  default:

    return false;

  case Instruction::ExtractElement:

    return Op == getOperand(1);

  case Instruction::InsertElement:

    return Op == getOperand(1) || Op == getOperand(2);

  case Instruction::PHI:

    return true;

  case Instruction::FCmp:

  case Instruction::ICmp:

  case Instruction::Select:

  case Instruction::Or:

  case Instruction::Freeze:

  case VPInstruction::Not:

    // TODO: Cover additional opcodes.

    return vputils::onlyFirstLaneUsed(this);

  case Instruction::Load:

  case VPInstruction::ActiveLaneMask:

  case VPInstruction::ExplicitVectorLength:

  case VPInstruction::CalculateTripCountMinusVF:

  case VPInstruction::CanonicalIVIncrementForPart:

  case VPInstruction::BranchOnCount:

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::Broadcast:

  case VPInstruction::ReductionStartVector:

    return true;

  case VPInstruction::BuildStructVector:

  case VPInstruction::BuildVector:

    // Before replicating by VF, Build(Struct)Vector uses all lanes of the

    // operand, after replicating its operands only the first lane is used.

    // Before replicating, it will have only a single operand.

    return getNumOperands() > 1;

  case VPInstruction::PtrAdd:

    return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this);

  case VPInstruction::WidePtrAdd:

    // WidePtrAdd supports scalar and vector base addresses.

    return false;

  case VPInstruction::ExitingIVValue:

  case VPInstruction::ExtractLane:

    return Op == getOperand(0);

  };

  llvm_unreachable("switch should return");

}


bool VPInstruction::usesFirstPartOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  if (Instruction::isBinaryOp(getOpcode()))

    return vputils::onlyFirstPartUsed(this);


  switch (getOpcode()) {

  default:

    return false;

  case Instruction::FCmp:

  case Instruction::ICmp:

  case Instruction::Select:

    return vputils::onlyFirstPartUsed(this);

  case VPInstruction::BranchOnCount:

  case VPInstruction::BranchOnCond:

  case VPInstruction::BranchOnTwoConds:

  case VPInstruction::CanonicalIVIncrementForPart:

    return true;

  };

  llvm_unreachable("switch should return");

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPInstruction::dump() const {

  VPSlotTracker SlotTracker(getParent()->getPlan());

  printRecipe(dbgs(), "", SlotTracker);

}


void VPInstruction::printRecipe(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";


  if (hasResult()) {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  switch (getOpcode()) {

  case VPInstruction::Not:

    O << "not";

    break;

  case VPInstruction::ActiveLaneMask:

    O << "active lane mask";

    break;

  case VPInstruction::IncomingAliasMask:

    O << "incoming-alias-mask";

    break;

  case VPInstruction::ExplicitVectorLength:

    O << "EXPLICIT-VECTOR-LENGTH";

    break;

  case VPInstruction::FirstOrderRecurrenceSplice:

    O << "first-order splice";

    break;

  case VPInstruction::BranchOnCond:

    O << "branch-on-cond";

    break;

  case VPInstruction::BranchOnTwoConds:

    O << "branch-on-two-conds";

    break;

  case VPInstruction::CalculateTripCountMinusVF:

    O << "TC > VF ? TC - VF : 0";

    break;

  case VPInstruction::CanonicalIVIncrementForPart:

    O << "VF * Part +";

    break;

  case VPInstruction::BranchOnCount:

    O << "branch-on-count";

    break;

  case VPInstruction::Broadcast:

    O << "broadcast";

    break;

  case VPInstruction::BuildStructVector:

    O << "buildstructvector";

    break;

  case VPInstruction::BuildVector:

    O << "buildvector";

    break;

  case VPInstruction::ExitingIVValue:

    O << "exiting-iv-value";

    break;

  case VPInstruction::MaskedCond:

    O << "masked-cond";

    break;

  case VPInstruction::ExtractLane:

    O << "extract-lane";

    break;

  case VPInstruction::ExtractLastLane:

    O << "extract-last-lane";

    break;

  case VPInstruction::ExtractLastPart:

    O << "extract-last-part";

    break;

  case VPInstruction::ExtractPenultimateElement:

    O << "extract-penultimate-element";

    break;

  case VPInstruction::ComputeReductionResult:

    O << "compute-reduction-result";

    break;

  case VPInstruction::LogicalAnd:

    O << "logical-and";

    break;

  case VPInstruction::LogicalOr:

    O << "logical-or";

    break;

  case VPInstruction::PtrAdd:

    O << "ptradd";

    break;

  case VPInstruction::WidePtrAdd:

    O << "wide-ptradd";

    break;

  case VPInstruction::AnyOf:

    O << "any-of";

    break;

  case VPInstruction::FirstActiveLane:

    O << "first-active-lane";

    break;

  case VPInstruction::LastActiveLane:

    O << "last-active-lane";

    break;

  case VPInstruction::ReductionStartVector:

    O << "reduction-start-vector";

    break;

  case VPInstruction::ResumeForEpilogue:

    O << "resume-for-epilogue";

    break;

  case VPInstruction::Reverse:

    O << "reverse";

    break;

  case VPInstruction::Unpack:

    O << "unpack";

    break;

  case VPInstruction::ExtractLastActive:

    O << "extract-last-active";

    break;

  case VPInstruction::NumActiveLanes:

    O << "num-active-lanes";

    break;

  default:

    O << Instruction::getOpcodeName(getOpcode());

  }


  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


void VPInstructionWithType::execute(VPTransformState &State) {

  Type *ResultTy = getResultType();

  if (Instruction::isCast(getOpcode())) {

    Value *Op = State.get(getOperand(0), VPLane(0));

    Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()),

                                           Op, ResultTy);

    if (auto *CastOp = dyn_cast<Instruction>(Cast)) {

      applyFlags(*CastOp);

      applyMetadata(*CastOp);

    }

    State.set(this, Cast, VPLane(0));

    return;

  }

  switch (getOpcode()) {

  case VPInstruction::StepVector: {

    Value *StepVector =

        State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));

    State.set(this, StepVector);

    break;

  }

  case VPInstruction::VScale: {

    Value *VScale = State.Builder.CreateVScale(ResultTy);

    State.set(this, VScale, true);

    break;

  }


  default:

    llvm_unreachable("opcode not implemented yet");

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPInstructionWithType::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";

  printAsOperand(O, SlotTracker);

  O << " = ";


  Type *ResultTy = getResultType();

  switch (getOpcode()) {

  case VPInstruction::WideIVStep:

    O << "wide-iv-step ";

    printOperands(O, SlotTracker);

    break;

  case VPInstruction::StepVector:

    O << "step-vector " << *ResultTy;

    break;

  case VPInstruction::VScale:

    O << "vscale " << *ResultTy;

    break;

  case Instruction::Load:

    O << "load ";

    printOperands(O, SlotTracker);

    break;

  default:

    assert(Instruction::isCast(getOpcode()) && "unhandled opcode");

    O << Instruction::getOpcodeName(getOpcode());

    printFlags(O);

    printOperands(O, SlotTracker);

    O << " to " << *ResultTy;

  }

}


#endif


void VPPhi::execute(VPTransformState &State) {

  PHINode *NewPhi = State.Builder.CreatePHI(getScalarType(), 2, getName());

  unsigned NumIncoming = getNumIncoming();

  // Detect header phis: the parent block dominates its second incoming block

  // (the latch). Those IR incoming values have not been generated yet and need

  // to be added after they have been executed.

  if (NumIncoming == 2 &&

      State.VPDT.dominates(getParent(), getIncomingBlock(1))) {

    NumIncoming = 1;

  }

  for (unsigned Idx = 0; Idx != NumIncoming; ++Idx) {

    Value *IncV = State.get(getIncomingValue(Idx), VPLane(0));

    BasicBlock *PredBB = State.CFG.VPBB2IRBB.at(getIncomingBlock(Idx));

    NewPhi->addIncoming(IncV, PredBB);

  }

  State.set(this, NewPhi, VPLane(0));

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPPhi::printRecipe(raw_ostream &O, const Twine &Indent,

                        VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT" << (isSingleScalar() ? "-SCALAR" : "") << " ";

  printAsOperand(O, SlotTracker);

  O << " = phi";

  printFlags(O);

  printPhiOperands(O, SlotTracker);

}


#endif


VPIRInstruction *VPIRInstruction ::create(Instruction &I) {

  if (auto *Phi = dyn_cast<PHINode>(&I))

    return new VPIRPhi(*Phi);

  return new VPIRInstruction(I);

}


void VPIRInstruction::execute(VPTransformState &State) {

  assert(!isa<VPIRPhi>(this) && getNumOperands() == 0 &&

         "PHINodes must be handled by VPIRPhi");

  // Advance the insert point after the wrapped IR instruction. This allows

  // interleaving VPIRInstructions and other recipes.

  State.Builder.SetInsertPoint(I.getParent(), std::next(I.getIterator()));

}


InstructionCost VPIRInstruction::computeCost(ElementCount VF,

                                             VPCostContext &Ctx) const {

  // The recipe wraps an existing IR instruction on the border of VPlan's scope,

  // hence it does not contribute to the cost-modeling for the VPlan.

  return 0;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPIRInstruction::printRecipe(raw_ostream &O, const Twine &Indent,

                                  VPSlotTracker &SlotTracker) const {

  O << Indent << "IR " << I;

}


#endif


void VPIRPhi::execute(VPTransformState &State) {

  PHINode *Phi = &getIRPhi();

  for (const auto &[Idx, Op] : enumerate(operands())) {

    VPValue *ExitValue = Op;

    auto Lane = vputils::isSingleScalar(ExitValue)

                    ? VPLane::getFirstLane()

                    : VPLane::getLastLaneForVF(State.VF);

    VPBlockBase *Pred = getParent()->getPredecessors()[Idx];

    auto *PredVPBB = Pred->getExitingBasicBlock();

    BasicBlock *PredBB = State.CFG.VPBB2IRBB[PredVPBB];

    // Set insertion point in PredBB in case an extract needs to be generated.

    // TODO: Model extracts explicitly.

    State.Builder.SetInsertPoint(PredBB->getTerminator());

    Value *V = State.get(ExitValue, VPLane(Lane));

    // If there is no existing block for PredBB in the phi, add a new incoming

    // value. Otherwise update the existing incoming value for PredBB.

    if (Phi->getBasicBlockIndex(PredBB) == -1)

      Phi->addIncoming(V, PredBB);

    else

      Phi->setIncomingValueForBlock(PredBB, V);

  }


  // Advance the insert point after the wrapped IR instruction. This allows

  // interleaving VPIRInstructions and other recipes.

  State.Builder.SetInsertPoint(Phi->getParent(), std::next(Phi->getIterator()));

}


void VPPhiAccessors::removeIncomingValueFor(VPBlockBase *IncomingBlock) const {

  VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());

  assert(R->getNumOperands() == R->getParent()->getNumPredecessors() &&

         "Number of phi operands must match number of predecessors");

  unsigned Position = R->getParent()->getIndexForPredecessor(IncomingBlock);

  R->removeOperand(Position);

}


VPValue *


VPPhiAccessors::getIncomingValueForBlock(const VPBasicBlock *VPBB) const {

  VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());

  return getIncomingValue(R->getParent()->getIndexForPredecessor(VPBB));

}


void VPPhiAccessors::setIncomingValueForBlock(const VPBasicBlock *VPBB,

                                              VPValue *V) const {

  VPRecipeBase *R = const_cast<VPRecipeBase *>(getAsRecipe());

  R->setOperand(R->getParent()->getIndexForPredecessor(VPBB), V);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPPhiAccessors::printPhiOperands(raw_ostream &O,

                                      VPSlotTracker &SlotTracker) const {

  interleaveComma(enumerate(getAsRecipe()->operands()), O,

                  [this, &O, &SlotTracker](auto Op) {

                    O << "[ ";

                    Op.value()->printAsOperand(O, SlotTracker);

                    O << ", ";

                    getIncomingBlock(Op.index())->printAsOperand(O);

                    O << " ]";

                  });

}


#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPIRPhi::printRecipe(raw_ostream &O, const Twine &Indent,

                          VPSlotTracker &SlotTracker) const {

  VPIRInstruction::printRecipe(O, Indent, SlotTracker);


  if (getNumOperands() != 0) {

    O << " (extra operand" << (getNumOperands() > 1 ? "s" : "") << ": ";

    interleaveComma(incoming_values_and_blocks(), O,

                    [&O, &SlotTracker](auto Op) {

                      std::get<0>(Op)->printAsOperand(O, SlotTracker);

                      O << " from ";

                      std::get<1>(Op)->printAsOperand(O);

                    });

    O << ")";

  }

}


#endif


void VPIRMetadata::applyMetadata(Instruction &I) const {

  for (const auto &[Kind, Node] : Metadata)

    I.setMetadata(Kind, Node);

}


void VPIRMetadata::intersect(const VPIRMetadata &Other) {

  SmallVector<std::pair<unsigned, MDNode *>> MetadataIntersection;

  for (const auto &[KindA, MDA] : Metadata) {

    for (const auto &[KindB, MDB] : Other.Metadata) {

      if (KindA == KindB && MDA == MDB) {

        MetadataIntersection.emplace_back(KindA, MDA);

        break;

      }

    }

  }

  Metadata = std::move(MetadataIntersection);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPIRMetadata::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {

  const Module *M = SlotTracker.getModule();

  if (Metadata.empty() || !M)

    return;


  ArrayRef<StringRef> MDNames = SlotTracker.getMDNames();

  O << " (";

  interleaveComma(Metadata, O, [&](const auto &KindNodePair) {

    auto [Kind, Node] = KindNodePair;

    assert(Kind < MDNames.size() && !MDNames[Kind].empty() &&

           "Unexpected unnamed metadata kind");

    O << "!" << MDNames[Kind] << " ";

    Node->printAsOperand(O, M);

  });

  O << ")";

}


#endif


void VPWidenCallRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");

  assert(Variant != nullptr && "Can't create vector function.");


  FunctionType *VFTy = Variant->getFunctionType();

  // Add return type if intrinsic is overloaded on it.

  SmallVector<Value *, 4> Args;

  for (const auto &I : enumerate(args())) {

    Value *Arg;

    // Some vectorized function variants may also take a scalar argument,

    // e.g. linear parameters for pointers. This needs to be the scalar value

    // from the start of the respective part when interleaving.

    if (!VFTy->getParamType(I.index())->isVectorTy())

      Arg = State.get(I.value(), VPLane(0));

    else

      Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));

    Args.push_back(Arg);

  }


  auto *CI = cast_or_null<CallInst>(getUnderlyingValue());

  SmallVector<OperandBundleDef, 1> OpBundles;

  if (CI)

    CI->getOperandBundlesAsDefs(OpBundles);


  CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);

  applyFlags(*V);

  applyMetadata(*V);

  V->setCallingConv(Variant->getCallingConv());


  if (!V->getType()->isVoidTy())

    State.set(this, V);

}


InstructionCost VPWidenCallRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  assert(getVectorizedTypeVF(Variant->getReturnType()) == VF &&

         "Variant return type must match VF");

  return computeCallCost(Variant, Ctx);

}


InstructionCost VPWidenCallRecipe::computeCallCost(Function *Variant,

                                                   VPCostContext &Ctx) {

  return Ctx.TTI.getCallInstrCost(nullptr, Variant->getReturnType(),

                                  Variant->getFunctionType()->params(),

                                  Ctx.CostKind);

}


bool VPWidenCallRecipe::usesFirstLaneOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  assert(Variant && "Variant not set");

  FunctionType *VFTy = Variant->getFunctionType();

  return all_of(enumerate(args()), [VFTy, &Op](const auto &Arg) {

    auto [Idx, V] = Arg;

    Type *ArgTy = VFTy->getParamType(Idx);

    return V != Op || ArgTy->isIntegerTy() || ArgTy->isFloatingPointTy() ||

           ArgTy->isPointerTy() || ArgTy->isByteTy();

  });

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenCallRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-CALL ";


  Function *CalledFn = getCalledScalarFunction();

  if (CalledFn->getReturnType()->isVoidTy())

    O << "void ";

  else {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  O << "call";

  printFlags(O);

  O << " @" << CalledFn->getName() << "(";

  interleaveComma(args(), O, [&O, &SlotTracker](VPValue *Op) {

    Op->printAsOperand(O, SlotTracker);

  });

  O << ")";


  O << " (using library function";

  if (Variant->hasName())

    O << ": " << Variant->getName();

  O << ")";

}


#endif


CallInst *VPWidenIntrinsicRecipe::createVectorCall(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");


  SmallVector<Type *, 2> TysForDecl;

  // Add return type if intrinsic is overloaded on it.

  if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1,

                                             State.TTI)) {

    Type *RetTy = toVectorizedTy(getScalarType(), State.VF);

    ArrayRef<Type *> ContainedTys = getContainedTypes(RetTy);

    for (auto [Idx, Ty] : enumerate(ContainedTys)) {

      if (isVectorIntrinsicWithStructReturnOverloadAtField(VectorIntrinsicID,

                                                           Idx, State.TTI))

        TysForDecl.push_back(Ty);

    }

  }

  SmallVector<Value *, 4> Args;

  for (const auto &I : enumerate(operands())) {

    // Some intrinsics have a scalar argument - don't replace it with a

    // vector.

    Value *Arg;

    if (isVectorIntrinsicWithScalarOpAtArg(VectorIntrinsicID, I.index(),

                                           State.TTI))

      Arg = State.get(I.value(), VPLane(0));

    else

      Arg = State.get(I.value(), usesFirstLaneOnly(I.value()));

    if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, I.index(),

                                               State.TTI))

      TysForDecl.push_back(Arg->getType());

    Args.push_back(Arg);

  }


  // Use vector version of the intrinsic.

  Module *M = State.Builder.GetInsertBlock()->getModule();

  Function *VectorF =

      Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);

  assert(VectorF &&

         "Can't retrieve vector intrinsic or vector-predication intrinsics.");


  auto *CI = cast_or_null<CallInst>(getUnderlyingValue());

  SmallVector<OperandBundleDef, 1> OpBundles;

  if (CI)

    CI->getOperandBundlesAsDefs(OpBundles);


  CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);


  applyFlags(*V);

  applyMetadata(*V);


  return V;

}


void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {

  CallInst *V = createVectorCall(State);

  if (!V->getType()->isVoidTy())

    State.set(this, V);

}


InstructionCost VPWidenIntrinsicRecipe::computeCallCost(

    Intrinsic::ID ID, ArrayRef<const VPValue *> Operands,

    const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx) {

  Type *ScalarRetTy = R.getScalarType();

  // Skip the reverse operation cost for the mask.

  // FIXME: Remove this once redundant mask reverse operations can be eliminated

  // by VPlanTransforms::cse before cost computation.

  if (ID == Intrinsic::experimental_vp_reverse && ScalarRetTy->isIntegerTy(1))

    return InstructionCost(0);


  // Some backends analyze intrinsic arguments to determine cost. Use the

  // underlying value for the operand if it has one. Otherwise try to use the

  // operand of the underlying call instruction, if there is one. Otherwise

  // clear Arguments.

  // TODO: Rework TTI interface to be independent of concrete IR values.

  SmallVector<const Value *> Arguments;

  for (const auto &[Idx, Op] : enumerate(Operands)) {

    auto *V = Op->getUnderlyingValue();

    if (!V) {

      if (auto *UI = dyn_cast_or_null<CallBase>(R.getUnderlyingValue())) {

        Arguments.push_back(UI->getArgOperand(Idx));

        continue;

      }

      Arguments.clear();

      break;

    }

    Arguments.push_back(V);

  }


  Type *RetTy = VF.isVector() ? toVectorizedTy(ScalarRetTy, VF) : ScalarRetTy;

  SmallVector<Type *> ParamTys =

      map_to_vector(Operands, [&](const VPValue *Op) {

        return toVectorTy(Op->getScalarType(), VF);

      });


  // TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.

  IntrinsicCostAttributes CostAttrs(

      ID, RetTy, Arguments, ParamTys, R.getFastMathFlags(),

      dyn_cast_or_null<IntrinsicInst>(R.getUnderlyingValue()),

      InstructionCost::getInvalid());

  return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, Ctx.CostKind);

}


InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,

                                                    VPCostContext &Ctx) const {

  SmallVector<const VPValue *> ArgOps(operands());

  return computeCallCost(VectorIntrinsicID, ArgOps, *this, VF, Ctx);

}


StringRef VPWidenIntrinsicRecipe::getIntrinsicName() const {

  return Intrinsic::getBaseName(VectorIntrinsicID);

}


bool VPWidenIntrinsicRecipe::usesFirstLaneOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  return all_of(enumerate(operands()), [this, &Op](const auto &X) {

    auto [Idx, V] = X;

    return V != Op || isVectorIntrinsicWithScalarOpAtArg(getVectorIntrinsicID(),

                                                         Idx, nullptr);

  });

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenIntrinsicRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                         VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-INTRINSIC ";

  if (getScalarType()->isVoidTy()) {

    O << "void ";

  } else {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }


  O << "call";

  printFlags(O);

  O << getIntrinsicName() << "(";


  interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {

    Op->printAsOperand(O, SlotTracker);

  });

  O << ")";

}


#endif


void VPWidenMemIntrinsicRecipe::execute(VPTransformState &State) {

  CallInst *MemI = createVectorCall(State);

  MemI->addParamAttr(

      0, Attribute::getWithAlignment(MemI->getContext(), Alignment));

  State.set(this, MemI);

}


InstructionCost VPWidenMemIntrinsicRecipe::computeMemIntrinsicCost(

    Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment,

    VPCostContext &Ctx) {

  return Ctx.TTI.getMemIntrinsicInstrCost(

      MemIntrinsicCostAttributes(IID, Ty, /*Ptr=*/nullptr, IsMasked, Alignment),

      Ctx.CostKind);

}


InstructionCost


VPWidenMemIntrinsicRecipe::computeCost(ElementCount VF,

                                       VPCostContext &Ctx) const {

  Type *Ty = toVectorTy(getScalarType(), VF);

  return computeMemIntrinsicCost(getVectorIntrinsicID(), Ty,

                                 !match(getOperand(2), m_True()), Alignment,

                                 Ctx);

}


void VPHistogramRecipe::execute(VPTransformState &State) {

  IRBuilderBase &Builder = State.Builder;


  Value *Address = State.get(getOperand(0));

  Value *IncAmt = State.get(getOperand(1), /*IsScalar=*/true);

  VectorType *VTy = cast<VectorType>(Address->getType());


  // The histogram intrinsic requires a mask even if the recipe doesn't;

  // if the mask operand was omitted then all lanes should be executed and

  // we just need to synthesize an all-true mask.

  Value *Mask = nullptr;

  if (VPValue *VPMask = getMask())

    Mask = State.get(VPMask);

  else

    Mask =

        Builder.CreateVectorSplat(VTy->getElementCount(), Builder.getInt1(1));


  // If this is a subtract, we want to invert the increment amount. We may

  // add a separate intrinsic in future, but for now we'll try this.

  if (Opcode == Instruction::Sub)

    IncAmt = Builder.CreateNeg(IncAmt);

  else

    assert(Opcode == Instruction::Add && "only add or sub supported for now");


  auto *HistogramInst = State.Builder.CreateIntrinsic(

      Intrinsic::experimental_vector_histogram_add, {VTy, IncAmt->getType()},

      {Address, IncAmt, Mask});

  applyMetadata(*HistogramInst);

}


InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  // FIXME: Take the gather and scatter into account as well. For now we're

  //        generating the same cost as the fallback path, but we'll likely

  //        need to create a new TTI method for determining the cost, including

  //        whether we can use base + vec-of-smaller-indices or just

  //        vec-of-pointers.

  assert(VF.isVector() && "Invalid VF for histogram cost");

  Type *AddressTy = getOperand(0)->getScalarType();

  VPValue *IncAmt = getOperand(1);

  Type *IncTy = IncAmt->getScalarType();

  VectorType *VTy = VectorType::get(IncTy, VF);


  // Assume that a non-constant update value (or a constant != 1) requires

  // a multiply, and add that into the cost.

  InstructionCost MulCost =

      Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy, Ctx.CostKind);

  if (match(IncAmt, m_One()))

    MulCost = TTI::TCC_Free;


  // Find the cost of the histogram operation itself.

  Type *PtrTy = VectorType::get(AddressTy, VF);

  Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);

  IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,

                              Type::getVoidTy(Ctx.LLVMCtx),

                              {PtrTy, IncTy, MaskTy});


  // Add the costs together with the add/sub operation.

  return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind) + MulCost +

         Ctx.TTI.getArithmeticInstrCost(Opcode, VTy, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPHistogramRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-HISTOGRAM buckets: ";

  getOperand(0)->printAsOperand(O, SlotTracker);


  if (Opcode == Instruction::Sub)

    O << ", dec: ";

  else {

    assert(Opcode == Instruction::Add);

    O << ", inc: ";

  }

  getOperand(1)->printAsOperand(O, SlotTracker);


  if (VPValue *Mask = getMask()) {

    O << ", mask: ";

    Mask->printAsOperand(O, SlotTracker);

  }

}


#endif


VPIRFlags::FastMathFlagsTy::FastMathFlagsTy(const FastMathFlags &FMF) {

  AllowReassoc = FMF.allowReassoc();

  NoNaNs = FMF.noNaNs();

  NoInfs = FMF.noInfs();

  NoSignedZeros = FMF.noSignedZeros();

  AllowReciprocal = FMF.allowReciprocal();

  AllowContract = FMF.allowContract();

  ApproxFunc = FMF.approxFunc();

}


VPIRFlags VPIRFlags::getDefaultFlags(unsigned Opcode) {

  switch (Opcode) {

  case Instruction::Add:

  case Instruction::Sub:

  case Instruction::Mul:

  case Instruction::Shl:

  case VPInstruction::CanonicalIVIncrementForPart:

    return WrapFlagsTy(false, false);

  case Instruction::Trunc:

    return TruncFlagsTy(false, false);

  case Instruction::Or:

    return DisjointFlagsTy(false);

  case Instruction::AShr:

  case Instruction::LShr:

  case Instruction::UDiv:

  case Instruction::SDiv:

    return ExactFlagsTy(false);

  case Instruction::GetElementPtr:

  case VPInstruction::PtrAdd:

  case VPInstruction::WidePtrAdd:

    return GEPNoWrapFlags::none();

  case Instruction::ZExt:

  case Instruction::UIToFP:

    return NonNegFlagsTy(false);

  case Instruction::FAdd:

  case Instruction::FSub:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::FNeg:

  case Instruction::FPExt:

  case Instruction::FPTrunc:

    return FastMathFlags();

  case Instruction::ICmp:

  case Instruction::FCmp:

  case VPInstruction::ComputeReductionResult:

    llvm_unreachable("opcode requires explicit flags");

  default:

    return VPIRFlags();

  }

}


#if !defined(NDEBUG)


bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {

  switch (OpType) {

  case OperationType::OverflowingBinOp:

    return Opcode == Instruction::Add || Opcode == Instruction::Sub ||

           Opcode == Instruction::Mul || Opcode == Instruction::Shl ||

           Opcode == VPInstruction::VPInstruction::CanonicalIVIncrementForPart;

  case OperationType::Trunc:

    return Opcode == Instruction::Trunc;

  case OperationType::DisjointOp:

    return Opcode == Instruction::Or;

  case OperationType::PossiblyExactOp:

    return Opcode == Instruction::AShr || Opcode == Instruction::LShr ||

           Opcode == Instruction::UDiv || Opcode == Instruction::SDiv;

  case OperationType::GEPOp:

    return Opcode == Instruction::GetElementPtr ||

           Opcode == VPInstruction::PtrAdd ||

           Opcode == VPInstruction::WidePtrAdd;

  case OperationType::FPMathOp:

    return Opcode == Instruction::Call || Opcode == Instruction::FAdd ||

           Opcode == Instruction::FMul || Opcode == Instruction::FSub ||

           Opcode == Instruction::FNeg || Opcode == Instruction::FDiv ||

           Opcode == Instruction::FRem || Opcode == Instruction::FPExt ||

           Opcode == Instruction::FPTrunc || Opcode == Instruction::PHI ||

           Opcode == Instruction::Select || Opcode == Instruction::SIToFP ||

           Opcode == Instruction::UIToFP ||

           Opcode == VPInstruction::WideIVStep ||

           Opcode == VPInstruction::ReductionStartVector;

  case OperationType::FCmp:

    return Opcode == Instruction::FCmp;

  case OperationType::NonNegOp:

    return Opcode == Instruction::ZExt || Opcode == Instruction::UIToFP;

  case OperationType::Cmp:

    return Opcode == Instruction::FCmp || Opcode == Instruction::ICmp;

  case OperationType::ReductionOp:

    return Opcode == VPInstruction::ComputeReductionResult;

  case OperationType::Other:

    return true;

  }

  llvm_unreachable("Unknown OperationType enum");

}


bool VPIRFlags::hasRequiredFlagsForOpcode(unsigned Opcode) const {

  // Handle opcodes without default flags.

  if (Opcode == Instruction::ICmp)

    return OpType == OperationType::Cmp;

  if (Opcode == Instruction::FCmp)

    return OpType == OperationType::FCmp;

  if (Opcode == VPInstruction::ComputeReductionResult)

    return OpType == OperationType::ReductionOp;


  OperationType Required = getDefaultFlags(Opcode).OpType;

  return Required == OperationType::Other || Required == OpType;

}


#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


static void printRecurrenceKind(raw_ostream &OS, const RecurKind &Kind) {

  switch (Kind) {

  case RecurKind::None:

    OS << "none";

    break;

  case RecurKind::Add:

    OS << "add";

    break;

  case RecurKind::Sub:

    OS << "sub";

    break;

  case RecurKind::AddChainWithSubs:

    OS << "add-chain-with-subs";

    break;

  case RecurKind::Mul:

    OS << "mul";

    break;

  case RecurKind::Or:

    OS << "or";

    break;

  case RecurKind::And:

    OS << "and";

    break;

  case RecurKind::Xor:

    OS << "xor";

    break;

  case RecurKind::SMin:

    OS << "smin";

    break;

  case RecurKind::SMax:

    OS << "smax";

    break;

  case RecurKind::UMin:

    OS << "umin";

    break;

  case RecurKind::UMax:

    OS << "umax";

    break;

  case RecurKind::FAdd:

    OS << "fadd";

    break;

  case RecurKind::FAddChainWithSubs:

    OS << "fadd-chain-with-subs";

    break;

  case RecurKind::FSub:

    OS << "fsub";

    break;

  case RecurKind::FMul:

    OS << "fmul";

    break;

  case RecurKind::FMin:

    OS << "fmin";

    break;

  case RecurKind::FMax:

    OS << "fmax";

    break;

  case RecurKind::FMinNum:

    OS << "fminnum";

    break;

  case RecurKind::FMaxNum:

    OS << "fmaxnum";

    break;

  case RecurKind::FMinimum:

    OS << "fminimum";

    break;

  case RecurKind::FMaximum:

    OS << "fmaximum";

    break;

  case RecurKind::FMinimumNum:

    OS << "fminimumnum";

    break;

  case RecurKind::FMaximumNum:

    OS << "fmaximumnum";

    break;

  case RecurKind::FMulAdd:

    OS << "fmuladd";

    break;

  case RecurKind::AnyOf:

    OS << "any-of";

    break;

  case RecurKind::FindIV:

    OS << "find-iv";

    break;

  case RecurKind::FindLast:

    OS << "find-last";

    break;

  }

}


void VPIRFlags::printFlags(raw_ostream &O) const {

  switch (OpType) {

  case OperationType::Cmp:

    O << " " << CmpInst::getPredicateName(getPredicate());

    break;

  case OperationType::FCmp:

    O << " " << CmpInst::getPredicateName(getPredicate());

    getFastMathFlags().print(O);

    break;

  case OperationType::DisjointOp:

    if (DisjointFlags.IsDisjoint)

      O << " disjoint";

    break;

  case OperationType::PossiblyExactOp:

    if (ExactFlags.IsExact)

      O << " exact";

    break;

  case OperationType::OverflowingBinOp:

    if (WrapFlags.HasNUW)

      O << " nuw";

    if (WrapFlags.HasNSW)

      O << " nsw";

    break;

  case OperationType::Trunc:

    if (TruncFlags.HasNUW)

      O << " nuw";

    if (TruncFlags.HasNSW)

      O << " nsw";

    break;

  case OperationType::FPMathOp:

    getFastMathFlags().print(O);

    break;

  case OperationType::GEPOp: {

    GEPNoWrapFlags Flags = getGEPNoWrapFlags();

    if (Flags.isInBounds())

      O << " inbounds";

    else if (Flags.hasNoUnsignedSignedWrap())

      O << " nusw";

    if (Flags.hasNoUnsignedWrap())

      O << " nuw";

    break;

  }

  case OperationType::NonNegOp:

    if (NonNegFlags.NonNeg)

      O << " nneg";

    break;

  case OperationType::ReductionOp: {

    O << " (";

    printRecurrenceKind(O, getRecurKind());

    if (isReductionInLoop())

      O << ", in-loop";

    if (isReductionOrdered())

      O << ", ordered";

    O << ")";

    getFastMathFlags().print(O);

    break;

  }

  case OperationType::Other:

    break;

  }

  O << " ";

}


#endif


void VPWidenRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  switch (Opcode) {

  case Instruction::Call:

  case Instruction::UncondBr:

  case Instruction::CondBr:

  case Instruction::PHI:

  case Instruction::GetElementPtr:

    llvm_unreachable("This instruction is handled by a different recipe.");

  case Instruction::UDiv:

  case Instruction::SDiv:

  case Instruction::SRem:

  case Instruction::URem:

  case Instruction::Add:

  case Instruction::FAdd:

  case Instruction::Sub:

  case Instruction::FSub:

  case Instruction::FNeg:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor: {

    // Just widen unops and binops.

    SmallVector<Value *, 2> Ops;

    for (VPValue *VPOp : operands())

      Ops.push_back(State.get(VPOp));


    Value *V = Builder.CreateNAryOp(Opcode, Ops);


    if (auto *VecOp = dyn_cast<Instruction>(V)) {

      applyFlags(*VecOp);

      applyMetadata(*VecOp);

    }


    // Use this vector value for all users of the original instruction.

    State.set(this, V);

    break;

  }

  case Instruction::ExtractValue: {

    assert(getNumOperands() == 2 && "expected single level extractvalue");

    Value *Op = State.get(getOperand(0));

    Value *Extract = Builder.CreateExtractValue(

        Op, cast<VPConstantInt>(getOperand(1))->getZExtValue());

    State.set(this, Extract);

    break;

  }

  case Instruction::Freeze: {

    Value *Op = State.get(getOperand(0));

    Value *Freeze = Builder.CreateFreeze(Op);

    State.set(this, Freeze);

    break;

  }

  case Instruction::ICmp:

  case Instruction::FCmp: {

    // Widen compares. Generate vector compares.

    bool FCmp = Opcode == Instruction::FCmp;

    Value *A = State.get(getOperand(0));

    Value *B = State.get(getOperand(1));

    Value *C = nullptr;

    if (FCmp) {

      C = Builder.CreateFCmp(getPredicate(), A, B);

    } else {

      C = Builder.CreateICmp(getPredicate(), A, B);

    }

    if (auto *I = dyn_cast<Instruction>(C)) {

      applyFlags(*I);

      applyMetadata(*I);

    }

    State.set(this, C);

    break;

  }

  case Instruction::Select: {

    VPValue *CondOp = getOperand(0);

    Value *Cond = State.get(CondOp, vputils::isSingleScalar(CondOp));

    Value *Op0 = State.get(getOperand(1));

    Value *Op1 = State.get(getOperand(2));

    Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);

    State.set(this, Sel);

    if (auto *I = dyn_cast<Instruction>(Sel)) {

      if (isa<FPMathOperator>(I))

        applyFlags(*I);

      applyMetadata(*I);

    }

    break;

  }

  default:

    // This instruction is not vectorized by simple widening.

    LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "

                      << Instruction::getOpcodeName(Opcode));

    llvm_unreachable("Unhandled instruction!");

  } // end of switch.


#if !defined(NDEBUG)

  // Verify that VPlan type inference results agree with the type of the

  // generated values.

  assert(VectorType::get(this->getScalarType(), State.VF) ==

             State.get(this)->getType() &&

         "inferred type and type from generated instructions do not match");

#endif

}


InstructionCost VPWidenRecipe::computeCost(ElementCount VF,

                                           VPCostContext &Ctx) const {

  switch (Opcode) {

  case Instruction::UDiv:

  case Instruction::SDiv:

  case Instruction::SRem:

  case Instruction::URem:

    // If the div/rem operation isn't safe to speculate and requires

    // predication, then the only way we can even create a vplan is to insert

    // a select on the second input operand to ensure we use the value of 1

    // for the inactive lanes. The select will be costed separately.

  case Instruction::FNeg:

  case Instruction::Add:

  case Instruction::FAdd:

  case Instruction::Sub:

  case Instruction::FSub:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor:

  case Instruction::Freeze:

  case Instruction::ExtractValue:

  case Instruction::ICmp:

  case Instruction::FCmp:

  case Instruction::Select:

    return getCostForRecipeWithOpcode(getOpcode(), VF, Ctx);

  default:

    llvm_unreachable("Unsupported opcode for instruction");

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode);

  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


void VPWidenCastRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  /// Vectorize casts.

  assert(State.VF.isVector() && "Not vectorizing?");

  Type *DestTy = VectorType::get(getScalarType(), State.VF);

  VPValue *Op = getOperand(0);

  Value *A = State.get(Op);

  Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);

  State.set(this, Cast);

  if (auto *CastOp = dyn_cast<Instruction>(Cast)) {

    applyFlags(*CastOp);

    applyMetadata(*CastOp);

  }

}


InstructionCost VPWidenCastRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  return getCostForRecipeWithOpcode(getOpcode(), VF, Ctx);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenCastRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-CAST ";

  printAsOperand(O, SlotTracker);

  O << " = " << Instruction::getOpcodeName(Opcode);

  printFlags(O);

  printOperands(O, SlotTracker);

  O << " to " << *getScalarType();

}


#endif


InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenIntOrFpInductionRecipe::printRecipe(

    raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-INDUCTION";

  printFlags(O);

  printOperands(O, SlotTracker);


  if (auto *TI = getTruncInst())

    O << " (truncated to " << *TI->getType() << ")";

}


#endif


bool VPWidenIntOrFpInductionRecipe::isCanonical() const {

  // The step may be defined by a recipe in the preheader (e.g. if it requires

  // SCEV expansion), but for the canonical induction the step is required to be

  // 1, which is represented as live-in.

  return match(getStartValue(), m_ZeroInt()) &&

         match(getStepValue(), m_One()) &&

         getScalarType() == getRegion()->getCanonicalIVType();

}


InstructionCost VPDerivedIVRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  // The cost model for this is modelled on expandVPDerivedIV in

  // VPlanTransforms.cpp. In order to avoid overly pessimistic costs that can

  // negatively affect vectorization it takes into account any expected

  // simplifications that happen in simplifyRecipe.

  switch (getInductionKind()) {

  default:

    // TODO: Compute cost for remaining kinds.

    break;

  case InductionDescriptor::IK_IntInduction: {

    // There are currently no tests that expose a path where all lanes are

    // used, so it's better to bail out for now.

    if (!vputils::onlyFirstLaneUsed(this))

      break;


    // Start off by assuming we need both mul and add, then refine this.

    bool NeedsMul = true, NeedsAdd = true, NeedsShl = false;


    // If the start value is zero the add gets folded away.

    if (auto *VPV = dyn_cast<VPIRValue>(getStartValue()))

      if (auto *StartC = dyn_cast<ConstantInt>(VPV->getValue()))

        NeedsAdd = !StartC->isZero();


    // For some values of step the arithmetic changes:

    //  1. A step of 1 requires no operation.

    //  2. A step of -1 requires a negate.

    //  3. A power-of-2 step will use a shl, instead of a mul.

    Type *StepTy = getStepValue()->getScalarType();

    InstructionCost Cost(0);

    if (auto *VPV = dyn_cast<VPIRValue>(getStepValue())) {

      if (auto *StepC = dyn_cast<ConstantInt>(VPV->getValue())) {

        if (StepC->isOne())

          NeedsMul = false;

        else if (StepC->isMinusOne()) {

          // This will most likely end up as a negate in simplifyRecipe, and

          // the negate will be combined with the add to make a sub.

          // NOTE: This is perhaps an invalid assumption that the cost of an

          // 'add' is the same as a 'sub'.

          NeedsMul = false;

          NeedsAdd = true;

        } else if (StepC->getValue().isPowerOf2()) {

          // This will most likely end up as a shift-left in simplifyRecipe

          NeedsMul = false;

          NeedsShl = true;

        }

      }

    }


    // Add the cost of the conversion from index to step type if the index

    // will be used.

    Type *IndexTy = getIndex()->getScalarType();

    unsigned StepTySize = StepTy->getScalarSizeInBits();

    unsigned IndexTySize = IndexTy->getScalarSizeInBits();

    if ((NeedsAdd || NeedsMul || NeedsShl) && StepTySize != IndexTySize) {

      unsigned CastOpc =

          StepTySize < IndexTySize ? Instruction::Trunc : Instruction::SExt;

      Cost += Ctx.TTI.getCastInstrCost(

          CastOpc, StepTy, IndexTy, TTI::CastContextHint::None, Ctx.CostKind);

    }


    if (NeedsMul)

      Cost += Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, StepTy,

                                             Ctx.CostKind);

    if (NeedsShl)

      Cost += Ctx.TTI.getArithmeticInstrCost(

          Instruction::Shl, StepTy, Ctx.CostKind,

          {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},

          {TargetTransformInfo::OK_UniformConstantValue,

           TargetTransformInfo::OP_None});

    if (NeedsAdd)

      Cost += Ctx.TTI.getArithmeticInstrCost(Instruction::Add, StepTy,

                                             Ctx.CostKind);

    return Cost;

  }

  }


  return 0;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPDerivedIVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = DERIVED-IV ";

  getStartValue()->printAsOperand(O, SlotTracker);

  O << " + ";

  getOperand(1)->printAsOperand(O, SlotTracker);

  O << " * ";

  getStepValue()->printAsOperand(O, SlotTracker);

}


#endif


void VPScalarIVStepsRecipe::execute(VPTransformState &State) {

  // Fast-math-flags propagate from the original induction instruction.

  IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);

  State.Builder.setFastMathFlags(getFastMathFlags());


  /// Compute scalar induction steps. \p ScalarIV is the scalar induction

  /// variable on which to base the steps, \p Step is the size of the step.


  Value *BaseIV = State.get(getOperand(0), VPLane(0));

  Value *Step = State.get(getStepValue(), VPLane(0));

  IRBuilderBase &Builder = State.Builder;


  // Ensure step has the same type as that of scalar IV.

  Type *BaseIVTy = BaseIV->getType()->getScalarType();

  assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");


  // We build scalar steps for both integer and floating-point induction

  // variables. Here, we determine the kind of arithmetic we will perform.

  Instruction::BinaryOps AddOp;

  Instruction::BinaryOps MulOp;

  if (BaseIVTy->isIntegerTy()) {

    AddOp = Instruction::Add;

    MulOp = Instruction::Mul;

  } else {

    AddOp = InductionOpcode;

    MulOp = Instruction::FMul;

  }


  // Determine the number of scalars we need to generate.

  bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);

  // Compute the scalar steps and save the results in State.


  unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();

  Value *StartIdx0 = getStartIndex() ? State.get(getStartIndex(), true)

                                     : Constant::getNullValue(BaseIVTy);


  for (unsigned Lane = 0; Lane < EndLane; ++Lane) {

    // It is okay if the induction variable type cannot hold the lane number,

    // we expect truncation in this case.

    Constant *LaneValue =

        BaseIVTy->isIntegerTy()

            ? ConstantInt::get(BaseIVTy, Lane, /*IsSigned=*/false,

                               /*ImplicitTrunc=*/true)

            : ConstantFP::get(BaseIVTy, Lane);

    Value *StartIdx = Builder.CreateBinOp(AddOp, StartIdx0, LaneValue);

    assert((State.VF.isScalable() || isa<Constant>(StartIdx)) &&

           "Expected StartIdx to be folded to a constant when VF is not "

           "scalable");

    auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);

    auto *Add = Builder.CreateBinOp(AddOp, BaseIV, Mul);

    State.set(this, Add, VPLane(Lane));

  }

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPScalarIVStepsRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = SCALAR-STEPS ";

  printOperands(O, SlotTracker);

}


#endif


bool VPWidenGEPRecipe::usesFirstLaneOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  return vputils::isSingleScalar(Op);

}


void VPWidenGEPRecipe::execute(VPTransformState &State) {

  assert(State.VF.isVector() && "not widening");

  // Construct a vector GEP by widening the operands of the scalar GEP as

  // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP

  // results in a vector of pointers when at least one operand of the GEP

  // is vector-typed. Thus, to keep the representation compact, we only use

  // vector-typed operands for loop-varying values.


  bool AllOperandsAreInvariant = all_of(operands(), [](VPValue *Op) {

    return Op->isDefinedOutsideLoopRegions();

  });

  if (AllOperandsAreInvariant) {

    // If we are vectorizing, but the GEP has only loop-invariant operands,

    // the GEP we build (by only using vector-typed operands for

    // loop-varying values) would be a scalar pointer. Thus, to ensure we

    // produce a vector of pointers, we need to either arbitrarily pick an

    // operand to broadcast, or broadcast a clone of the original GEP.

    // Here, we broadcast a clone of the original.


    SmallVector<Value *> Ops;

    for (unsigned I = 0, E = getNumOperands(); I != E; I++)

      Ops.push_back(State.get(getOperand(I), VPLane(0)));


    auto *NewGEP =

        State.Builder.CreateGEP(getSourceElementType(), Ops[0], drop_begin(Ops),

                                "", getGEPNoWrapFlags());

    Value *Splat = State.Builder.CreateVectorSplat(State.VF, NewGEP);

    State.set(this, Splat);

    return;

  }


  // If the GEP has at least one loop-varying operand, we are sure to

  // produce a vector of pointers unless VF is scalar.

  // The pointer operand of the new GEP. If it's loop-invariant, we

  // won't broadcast it.

  auto *Ptr = State.get(getOperand(0), isPointerLoopInvariant());


  // Collect all the indices for the new GEP. If any index is

  // loop-invariant, we won't broadcast it.

  SmallVector<Value *, 4> Indices;

  for (unsigned I = 1, E = getNumOperands(); I < E; I++) {

    VPValue *Operand = getOperand(I);

    Indices.push_back(State.get(Operand, isIndexLoopInvariant(I - 1)));

  }


  // Create the new GEP. Note that this GEP may be a scalar if VF == 1,

  // but it should be a vector, otherwise.

  auto *NewGEP = State.Builder.CreateGEP(getSourceElementType(), Ptr, Indices,

                                         "", getGEPNoWrapFlags());

  assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&

         "NewGEP is not a pointer vector");

  State.set(this, NewGEP);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenGEPRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                   VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-GEP ";

  O << (isPointerLoopInvariant() ? "Inv" : "Var");

  for (size_t I = 0; I < getNumOperands() - 1; ++I)

    O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";


  O << " ";

  printAsOperand(O, SlotTracker);

  O << " = getelementptr";

  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


void VPVectorEndPointerRecipe::materializeOffset(unsigned Part) {

  assert(!getOffset() && "Unexpected offset operand");

  VPBuilder Builder(this);

  VPlan &Plan = *getParent()->getPlan();

  VPValue *VFVal = getVFValue();

  const DataLayout &DL = Plan.getDataLayout();

  Type *IndexTy = DL.getIndexType(this->getScalarType());

  VPValue *Stride =

      Plan.getConstantInt(IndexTy, getStride(), /*IsSigned=*/true);

  Type *VFTy = VFVal->getScalarType();

  VPValue *VF = Builder.createScalarZExtOrTrunc(VFVal, IndexTy, VFTy,

                                                DebugLoc::getUnknown());


  // Offset for Part0 = Offset0 = Stride * (VF - 1).

  VPInstruction *VFMinusOne =

      Builder.createSub(VF, Plan.getConstantInt(IndexTy, 1u),

                        DebugLoc::getUnknown(), "", {true, true});

  VPInstruction *Offset0 =

      Builder.createOverflowingOp(Instruction::Mul, {VFMinusOne, Stride});


  // Offset for PartN = Offset0 + Part * Stride * VF.

  VPValue *PartxStride =

      Plan.getConstantInt(IndexTy, Part * getStride(), /*IsSigned=*/true);

  VPValue *Offset = Builder.createAdd(

      Offset0,

      Builder.createOverflowingOp(Instruction::Mul, {PartxStride, VF}));

  addOffset(Offset);

}


void VPVectorEndPointerRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  assert(getOffset() && "Expected prior materialization of offset");

  Value *Ptr = State.get(getPointer(), true);

  Value *Offset = State.get(getOffset(), true);

  Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Offset, "",

                                       getGEPNoWrapFlags());

  State.set(this, ResultPtr, /*IsScalar*/ true);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPVectorEndPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                           VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = vector-end-pointer";

  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


void VPVectorPointerRecipe::execute(VPTransformState &State) {

  assert(getVFxPart() &&

         "Expected prior simplification of recipe without VFxPart");


  auto &Builder = State.Builder;

  Value *Ptr = State.get(getOperand(0), VPLane(0));

  Value *Offset = State.get(getVFxPart(), true);

  // TODO: Expand to VPInstruction to support constant folding.

  if (!match(getStride(), m_One())) {

    Value *Stride = Builder.CreateZExtOrTrunc(State.get(getStride(), true),

                                              Offset->getType());

    Offset = Builder.CreateMul(Offset, Stride);

  }

  Value *ResultPtr = Builder.CreateGEP(getSourceElementType(), Ptr, Offset, "",

                                       getGEPNoWrapFlags());

  State.set(this, ResultPtr, /*IsScalar*/ true);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPVectorPointerRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  O << Indent;

  printAsOperand(O, SlotTracker);

  O << " = vector-pointer";

  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


InstructionCost VPBlendRecipe::computeCost(ElementCount VF,

                                           VPCostContext &Ctx) const {

  // A blend will be expanded to a select VPInstruction, which will generate a

  // scalar select if only the first lane is used.

  if (vputils::onlyFirstLaneUsed(this))

    VF = ElementCount::getFixed(1);


  Type *ResultTy = toVectorTy(this->getScalarType(), VF);

  Type *CmpTy = toVectorTy(Type::getInt1Ty(Ctx.LLVMCtx), VF);

  return (getNumIncomingValues() - 1) *

         Ctx.TTI.getCmpSelInstrCost(Instruction::Select, ResultTy, CmpTy,

                                    CmpInst::BAD_ICMP_PREDICATE, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPBlendRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                VPSlotTracker &SlotTracker) const {

  O << Indent << "BLEND ";

  printAsOperand(O, SlotTracker);

  O << " =";

  printFlags(O);

  if (getNumIncomingValues() == 1) {

    // Not a User of any mask: not really blending, this is a

    // single-predecessor phi.

    getIncomingValue(0)->printAsOperand(O, SlotTracker);

  } else {

    for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {

      if (I != 0)

        O << " ";

      getIncomingValue(I)->printAsOperand(O, SlotTracker);

      if (I == 0 && isNormalized())

        continue;

      O << "/";

      getMask(I)->printAsOperand(O, SlotTracker);

    }

  }

}


#endif


void VPReductionRecipe::execute(VPTransformState &State) {

  RecurKind Kind = getRecurrenceKind();

  assert(!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&

         "In-loop AnyOf reductions aren't currently supported");

  // Propagate the fast-math flags carried by the underlying instruction.

  IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);

  State.Builder.setFastMathFlags(getFastMathFlags());

  Value *NewVecOp = State.get(getVecOp());

  if (VPValue *Cond = getCondOp()) {

    Value *NewCond = State.get(Cond, State.VF.isScalar());

    VectorType *VecTy = dyn_cast<VectorType>(NewVecOp->getType());

    Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();


    Value *Start = getRecurrenceIdentity(Kind, ElementTy, getFastMathFlags());

    if (State.VF.isVector())

      Start = State.Builder.CreateVectorSplat(VecTy->getElementCount(), Start);


    Value *Select = State.Builder.CreateSelect(NewCond, NewVecOp, Start);

    NewVecOp = Select;

  }

  Value *NewRed;

  Value *NextInChain;

  if (isOrdered()) {

    Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);

    if (State.VF.isVector())

      NewRed =

          createOrderedReduction(State.Builder, Kind, NewVecOp, PrevInChain);

    else

      NewRed = State.Builder.CreateBinOp(

          (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind),

          PrevInChain, NewVecOp);

    PrevInChain = NewRed;

    NextInChain = NewRed;

  } else if (isPartialReduction()) {

    assert((Kind == RecurKind::Add || Kind == RecurKind::FAdd) &&

           "Unexpected partial reduction kind");

    Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ false);

    NewRed = State.Builder.CreateIntrinsic(

        PrevInChain->getType(),

        Kind == RecurKind::Add ? Intrinsic::vector_partial_reduce_add

                               : Intrinsic::vector_partial_reduce_fadd,

        {PrevInChain, NewVecOp}, State.Builder.getFastMathFlags(),

        "partial.reduce");

    PrevInChain = NewRed;

    NextInChain = NewRed;

  } else {

    assert(isInLoop() &&

           "The reduction must either be ordered, partial or in-loop");

    Value *PrevInChain = State.get(getChainOp(), /*IsScalar*/ true);

    NewRed = createSimpleReduction(State.Builder, NewVecOp, Kind);

    if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))

      NextInChain = createMinMaxOp(State.Builder, Kind, NewRed, PrevInChain);

    else

      NextInChain = State.Builder.CreateBinOp(

          (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind),

          PrevInChain, NewRed);

  }

  State.set(this, NextInChain, /*IsScalar*/ !isPartialReduction());

}


void VPReductionEVLRecipe::execute(VPTransformState &State) {


  auto &Builder = State.Builder;

  // Propagate the fast-math flags carried by the underlying instruction.

  IRBuilderBase::FastMathFlagGuard FMFGuard(Builder);

  Builder.setFastMathFlags(getFastMathFlags());


  RecurKind Kind = getRecurrenceKind();

  Value *Prev = State.get(getChainOp(), /*IsScalar*/ true);

  Value *VecOp = State.get(getVecOp());

  Value *EVL = State.get(getEVL(), VPLane(0));


  Value *Mask;

  if (VPValue *CondOp = getCondOp())

    Mask = State.get(CondOp);

  else

    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());


  Value *NewRed;

  if (isOrdered()) {

    NewRed = createOrderedReduction(Builder, Kind, VecOp, Prev, Mask, EVL);

  } else {

    NewRed = createSimpleReduction(Builder, VecOp, Kind, Mask, EVL);

    if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind))

      NewRed = createMinMaxOp(Builder, Kind, NewRed, Prev);

    else

      NewRed = Builder.CreateBinOp(

          (Instruction::BinaryOps)RecurrenceDescriptor::getOpcode(Kind), NewRed,

          Prev);

  }

  State.set(this, NewRed, /*IsScalar*/ true);

}


InstructionCost VPReductionRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  RecurKind RdxKind = getRecurrenceKind();

  Type *ElementTy = this->getScalarType();

  auto *VectorTy = cast<VectorType>(toVectorTy(ElementTy, VF));

  unsigned Opcode = RecurrenceDescriptor::getOpcode(RdxKind);

  FastMathFlags FMFs = getFastMathFlags();

  std::optional<FastMathFlags> OptionalFMF =

      ElementTy->isFloatingPointTy() ? std::make_optional(FMFs) : std::nullopt;


  if (isPartialReduction()) {

    InstructionCost CondCost = 0;

    if (isConditional()) {

      CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;

      auto *CondTy =

          cast<VectorType>(toVectorTy(getCondOp()->getScalarType(), VF));

      CondCost = Ctx.TTI.getCmpSelInstrCost(Instruction::Select, VectorTy,

                                            CondTy, Pred, Ctx.CostKind);

    }

    return CondCost + Ctx.TTI.getPartialReductionCost(

                          Opcode, ElementTy, ElementTy, ElementTy, VF,

                          TTI::PR_None, TTI::PR_None, {}, Ctx.CostKind,

                          OptionalFMF);

  }


  // TODO: Support any-of reductions.

  assert(

      (!RecurrenceDescriptor::isAnyOfRecurrenceKind(RdxKind) ||

       ForceTargetInstructionCost.getNumOccurrences() > 0) &&

      "Any-of reduction not implemented in VPlan-based cost model currently.");


  // Note that TTI should model the cost of moving result to the scalar register

  // and the BinOp cost in the getMinMaxReductionCost().

  if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RdxKind)) {

    Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);

    return Ctx.TTI.getMinMaxReductionCost(Id, VectorTy, FMFs, Ctx.CostKind);

  }


  // Note that TTI should model the cost of moving result to the scalar register

  // and the BinOp cost in the getArithmeticReductionCost().

  return Ctx.TTI.getArithmeticReductionCost(Opcode, VectorTy, OptionalFMF,

                                            Ctx.CostKind);

}


VPExpressionRecipe::VPExpressionRecipe(

    ExpressionTypes ExpressionType,

    ArrayRef<VPSingleDefRecipe *> ExpressionRecipes)

    : VPSingleDefRecipe(VPRecipeBase::VPExpressionSC, {},

                        cast<VPReductionRecipe>(ExpressionRecipes.back())

                            ->getChainOp()

                            ->getScalarType()),

      ExpressionRecipes(ExpressionRecipes), ExpressionType(ExpressionType) {

  assert(!ExpressionRecipes.empty() && "Nothing to combine?");

  assert(

      none_of(ExpressionRecipes,

              [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) &&

      "expression cannot contain recipes with side-effects");


  // Maintain a copy of the expression recipes as a set of users.

  SmallPtrSet<VPUser *, 4> ExpressionRecipesAsSetOfUsers;

  for (auto *R : ExpressionRecipes)

    ExpressionRecipesAsSetOfUsers.insert(R);


  // Recipes in the expression, except the last one, must only be used by

  // (other) recipes inside the expression. If there are other users, external

  // to the expression, use a clone of the recipe for external users.

  for (VPSingleDefRecipe *R : reverse(ExpressionRecipes)) {

    if (R != ExpressionRecipes.back() &&

        any_of(R->users(), [&ExpressionRecipesAsSetOfUsers](VPUser *U) {

          return !ExpressionRecipesAsSetOfUsers.contains(U);

        })) {

      // There are users outside of the expression. Clone the recipe and use the

      // clone those external users.

      VPSingleDefRecipe *CopyForExtUsers = R->clone();

      R->replaceUsesWithIf(CopyForExtUsers, [&ExpressionRecipesAsSetOfUsers](

                                                VPUser &U, unsigned) {

        return !ExpressionRecipesAsSetOfUsers.contains(&U);

      });

      CopyForExtUsers->insertBefore(R);

    }

    if (R->getParent())

      R->removeFromParent();

  }


  // Internalize all external operands to the expression recipes. To do so,

  // create new temporary VPValues for all operands defined by a recipe outside

  // the expression. The original operands are added as operands of the

  // VPExpressionRecipe itself.

  for (auto *R : ExpressionRecipes) {

    for (const auto &[Idx, Op] : enumerate(R->operands())) {

      auto *Def = Op->getDefiningRecipe();

      if (Def && ExpressionRecipesAsSetOfUsers.contains(Def))

        continue;

      addOperand(Op);

      LiveInPlaceholders.push_back(new VPSymbolicValue(Op->getScalarType()));

    }

  }


  // Replace each external operand with the first one created for it in

  // LiveInPlaceholders.

  for (auto *R : ExpressionRecipes)

    for (auto const &[LiveIn, Tmp] : zip(operands(), LiveInPlaceholders))

      R->replaceUsesOfWith(LiveIn, Tmp);

}


void VPExpressionRecipe::decompose() {

  for (auto *R : ExpressionRecipes)

    // Since the list could contain duplicates, make sure the recipe hasn't

    // already been inserted.

    if (!R->getParent())

      R->insertBefore(this);


  for (const auto &[Idx, Op] : enumerate(operands()))

    LiveInPlaceholders[Idx]->replaceAllUsesWith(Op);


  replaceAllUsesWith(ExpressionRecipes.back());

  ExpressionRecipes.clear();

}


InstructionCost VPExpressionRecipe::computeCost(ElementCount VF,

                                                VPCostContext &Ctx) const {

  Type *RedTy = this->getScalarType();

  auto *SrcVecTy =

      cast<VectorType>(toVectorTy(getOperand(0)->getScalarType(), VF));

  unsigned Opcode = RecurrenceDescriptor::getOpcode(

      cast<VPReductionRecipe>(ExpressionRecipes.back())->getRecurrenceKind());

  switch (ExpressionType) {

  case ExpressionTypes::NegatedExtendedReduction:

    assert((Opcode == Instruction::Add || Opcode == Instruction::FAdd) &&

           "Unexpected opcode");

    Opcode = Opcode == Instruction::Add ? Instruction::Sub : Instruction::FSub;

    [[fallthrough]];

  case ExpressionTypes::ExtendedReduction: {

    auto *RedR = cast<VPReductionRecipe>(ExpressionRecipes.back());

    auto *ExtR = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);


    if (RedR->isPartialReduction())

      return Ctx.TTI.getPartialReductionCost(

          Opcode, getOperand(0)->getScalarType(), nullptr, RedTy, VF,

          TargetTransformInfo::getPartialReductionExtendKind(ExtR->getOpcode()),

          TargetTransformInfo::PR_None, std::nullopt, Ctx.CostKind,

          RedTy->isFloatingPointTy() ? std::optional{RedR->getFastMathFlags()}

                                     : std::nullopt);

    else if (!RedTy->isFloatingPointTy())

      // TTI::getExtendedReductionCost only supports integer types.

      return Ctx.TTI.getExtendedReductionCost(

          Opcode, ExtR->getOpcode() == Instruction::ZExt, RedTy, SrcVecTy,

          std::nullopt, Ctx.CostKind);

    else

      return InstructionCost::getInvalid();

  }

  case ExpressionTypes::MulAccReduction:

    return Ctx.TTI.getMulAccReductionCost(false, Opcode, RedTy, SrcVecTy,

                                          Ctx.CostKind);


  case ExpressionTypes::ExtNegatedMulAccReduction:

    switch (Opcode) {

    case Instruction::Add:

      Opcode = Instruction::Sub;

      break;

    case Instruction::FAdd:

      Opcode = Instruction::FSub;

      break;

    default:

      llvm_unreachable("Unsupported opcode for ExtNegatedMulAccReduction");

    }

    [[fallthrough]];

  case ExpressionTypes::ExtMulAccReduction: {

    auto *RedR = cast<VPReductionRecipe>(ExpressionRecipes.back());

    if (RedR->isPartialReduction()) {

      auto *Ext0R = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);

      auto *Ext1R = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);

      auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);

      return Ctx.TTI.getPartialReductionCost(

          Opcode, getOperand(0)->getScalarType(),

          getOperand(1)->getScalarType(), RedTy, VF,

          TargetTransformInfo::getPartialReductionExtendKind(

              Ext0R->getOpcode()),

          TargetTransformInfo::getPartialReductionExtendKind(

              Ext1R->getOpcode()),

          Mul->getOpcode(), Ctx.CostKind,

          RedTy->isFloatingPointTy() ? std::optional{RedR->getFastMathFlags()}

                                     : std::nullopt);

    }

    assert(Opcode != Instruction::FSub && "Only integer types are supported");

    return Ctx.TTI.getMulAccReductionCost(

        cast<VPWidenCastRecipe>(ExpressionRecipes.front())->getOpcode() ==

            Instruction::ZExt,

        Opcode, RedTy, SrcVecTy, Ctx.CostKind);

  }

  }

  llvm_unreachable("Unknown VPExpressionRecipe::ExpressionTypes enum");

}


bool VPExpressionRecipe::mayReadOrWriteMemory() const {

  return any_of(ExpressionRecipes, [](VPSingleDefRecipe *R) {

    return R->mayReadFromMemory() || R->mayWriteToMemory();

  });

}


bool VPExpressionRecipe::mayHaveSideEffects() const {

  assert(

      none_of(ExpressionRecipes,

              [](VPSingleDefRecipe *R) { return R->mayHaveSideEffects(); }) &&

      "expression cannot contain recipes with side-effects");

  return false;

}


bool VPExpressionRecipe::isVectorToScalar() const {

  auto *RR = dyn_cast<VPReductionRecipe>(ExpressionRecipes.back());

  return RR && !RR->isPartialReduction();

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPExpressionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  O << Indent << "EXPRESSION ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  auto *Red = cast<VPReductionRecipe>(ExpressionRecipes.back());

  unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());


  switch (ExpressionType) {

  case ExpressionTypes::NegatedExtendedReduction:

  case ExpressionTypes::ExtendedReduction: {

    bool Negated = ExpressionType == ExpressionTypes::NegatedExtendedReduction;

    getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);

    O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";

    O << Instruction::getOpcodeName(Opcode) << " (";

    if (Negated)

      O << (Opcode == Instruction::Add ? "sub (0, " : "fneg(");

    getOperand(0)->printAsOperand(O, SlotTracker);

    if (Negated)

      O << ")";

    Red->printFlags(O);


    auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);

    O << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "

      << *Ext0->getScalarType();

    if (Red->isConditional()) {

      O << ", ";

      Red->getCondOp()->printAsOperand(O, SlotTracker);

    }

    O << ")";

    break;

  }

  case ExpressionTypes::ExtNegatedMulAccReduction: {

    getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);

    O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";

    O << Instruction::getOpcodeName(

             RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))

      << " (sub (0, mul";

    auto *Mul = cast<VPWidenRecipe>(ExpressionRecipes[2]);

    Mul->printFlags(O);

    O << "(";

    getOperand(0)->printAsOperand(O, SlotTracker);

    auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);

    O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "

      << *Ext0->getScalarType() << "), (";

    getOperand(1)->printAsOperand(O, SlotTracker);

    auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);

    O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "

      << *Ext1->getScalarType() << ")";

    if (Red->isConditional()) {

      O << ", ";

      Red->getCondOp()->printAsOperand(O, SlotTracker);

    }

    O << "))";

    break;

  }

  case ExpressionTypes::MulAccReduction:

  case ExpressionTypes::ExtMulAccReduction: {

    getOperand(getNumOperands() - 1)->printAsOperand(O, SlotTracker);

    O << " + " << (Red->isPartialReduction() ? "partial." : "") << "reduce.";

    O << Instruction::getOpcodeName(

             RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind()))

      << " (";

    O << "mul";

    bool IsExtended = ExpressionType == ExpressionTypes::ExtMulAccReduction;

    auto *Mul = cast<VPWidenRecipe>(IsExtended ? ExpressionRecipes[2]

                                               : ExpressionRecipes[0]);

    Mul->printFlags(O);

    if (IsExtended)

      O << "(";

    getOperand(0)->printAsOperand(O, SlotTracker);

    if (IsExtended) {

      auto *Ext0 = cast<VPWidenCastRecipe>(ExpressionRecipes[0]);

      O << " " << Instruction::getOpcodeName(Ext0->getOpcode()) << " to "

        << *Ext0->getScalarType() << "), (";

    } else {

      O << ", ";

    }

    getOperand(1)->printAsOperand(O, SlotTracker);

    if (IsExtended) {

      auto *Ext1 = cast<VPWidenCastRecipe>(ExpressionRecipes[1]);

      O << " " << Instruction::getOpcodeName(Ext1->getOpcode()) << " to "

        << *Ext1->getScalarType() << ")";

    }

    if (Red->isConditional()) {

      O << ", ";

      Red->getCondOp()->printAsOperand(O, SlotTracker);

    }

    O << ")";

    break;

  }

  }

}


void VPReductionRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  if (isPartialReduction())

    O << Indent << "PARTIAL-REDUCE ";

  else

    O << Indent << "REDUCE ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  getChainOp()->printAsOperand(O, SlotTracker);

  O << " +";

  printFlags(O);

  O << " reduce.";

  printRecurrenceKind(O, getRecurrenceKind());

  O << " (";

  getVecOp()->printAsOperand(O, SlotTracker);

  if (isConditional()) {

    O << ", ";

    getCondOp()->printAsOperand(O, SlotTracker);

  }

  O << ")";

}


void VPReductionEVLRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                       VPSlotTracker &SlotTracker) const {

  O << Indent << "REDUCE ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  getChainOp()->printAsOperand(O, SlotTracker);

  O << " +";

  printFlags(O);

  O << " vp.reduce."

    << Instruction::getOpcodeName(

           RecurrenceDescriptor::getOpcode(getRecurrenceKind()))

    << " (";

  getVecOp()->printAsOperand(O, SlotTracker);

  O << ", ";

  getEVL()->printAsOperand(O, SlotTracker);

  if (isConditional()) {

    O << ", ";

    getCondOp()->printAsOperand(O, SlotTracker);

  }

  O << ")";

}


#endif


void VPReplicateRecipe::execute(VPTransformState &State) {

  assert(IsSingleScalar &&

         "VPReplicateRecipes must be unrolled before ::execute");

  auto *Instr = getUnderlyingInstr();

  Instruction *Cloned = Instr->clone();

  Type *ResultTy = getScalarType();

  if (!ResultTy->isVoidTy()) {

    Cloned->setName(Instr->getName() + ".cloned");

    // The operands of the replicate recipe may have been narrowed, resulting in

    // a narrower result type. Update the type of the cloned instruction to the

    // correct type.

    if (ResultTy != Cloned->getType())

      Cloned->mutateType(ResultTy);

  }


  applyFlags(*Cloned);

  applyMetadata(*Cloned);


  if (hasPredicate())

    cast<CmpInst>(Cloned)->setPredicate(getPredicate());


  // Replace the operands of the cloned instructions with their scalar

  // equivalents in the new loop.

  for (const auto &[Idx, V] : enumerate(operands()))

    Cloned->setOperand(Idx, State.get(V, true));


  // Place the cloned scalar in the new loop.

  State.Builder.Insert(Cloned);


  State.set(this, Cloned, true);


  // If we just cloned a new assumption, add it the assumption cache.

  if (auto *II = dyn_cast<AssumeInst>(Cloned))

    State.AC->registerAssumption(II);

}


/// Returns a SCEV expression for \p Ptr if it is a pointer computation for

/// which the legacy cost model computes a SCEV expression when computing the

/// address cost. Computing SCEVs for VPValues is incomplete and returns

/// SCEVCouldNotCompute in cases the legacy cost model can compute SCEVs. In

/// those cases we fall back to the legacy cost model. Otherwise return nullptr.


static const SCEV *getAddressAccessSCEV(const VPValue *Ptr,

                                        PredicatedScalarEvolution &PSE,

                                        const Loop *L) {

  const SCEV *Addr = vputils::getSCEVExprForVPValue(Ptr, PSE, L);

  if (isa<SCEVCouldNotCompute>(Addr))

    return Addr;


  return vputils::isAddressSCEVForCost(Addr, *PSE.getSE(), L) ? Addr : nullptr;

}


InstructionCost VPReplicateRecipe::computeCost(ElementCount VF,

                                               VPCostContext &Ctx) const {

  Instruction *UI = cast<Instruction>(getUnderlyingValue());

  // VPReplicateRecipe may be cloned as part of an existing VPlan-to-VPlan

  // transform, avoid computing their cost multiple times for now.

  Ctx.SkipCostComputation.insert(UI);


  if (VF.isScalable() && !isSingleScalar())

    return InstructionCost::getInvalid();


  switch (UI->getOpcode()) {

  case Instruction::Alloca:

    if (VF.isScalable())

      return InstructionCost::getInvalid();

    return Ctx.TTI.getArithmeticInstrCost(Instruction::Mul,

                                          this->getScalarType(), Ctx.CostKind);

  case Instruction::GetElementPtr:

    // We mark this instruction as zero-cost because the cost of GEPs in

    // vectorized code depends on whether the corresponding memory instruction

    // is scalarized or not. Therefore, we handle GEPs with the memory

    // instruction cost.

    return 0;

  case Instruction::Call: {

    auto *CalledFn =

        cast<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue());

    Type *ResultTy = this->getScalarType();

    SmallVector<const VPValue *> ArgOps(drop_end(operands()));

    return computeCallCost(CalledFn, ResultTy, ArgOps, isSingleScalar(), VF,

                           Ctx);

  }

  case Instruction::Add:

  case Instruction::Sub:

  case Instruction::FAdd:

  case Instruction::FSub:

  case Instruction::Mul:

  case Instruction::FMul:

  case Instruction::FDiv:

  case Instruction::FRem:

  case Instruction::Shl:

  case Instruction::LShr:

  case Instruction::AShr:

  case Instruction::And:

  case Instruction::Or:

  case Instruction::Xor:

  case Instruction::ICmp:

  case Instruction::FCmp:

    return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1),

                                      Ctx) *

           (isSingleScalar() ? 1 : VF.getFixedValue());

  case Instruction::SDiv:

  case Instruction::UDiv:

  case Instruction::SRem:

  case Instruction::URem: {

    InstructionCost ScalarCost =

        getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1), Ctx);

    if (isSingleScalar())

      return ScalarCost;


    // If any of the operands is from a different replicate region and has its

    // cost skipped, it may have been forced to scalar. Fall back to legacy cost

    // model to avoid cost mis-match.

    if (any_of(operands(), [&Ctx, VF](VPValue *Op) {

          auto *PredR = dyn_cast<VPPredInstPHIRecipe>(Op);

          if (!PredR)

            return false;

          return Ctx.skipCostComputation(

              dyn_cast_or_null<Instruction>(

                  PredR->getOperand(0)->getUnderlyingValue()),

              VF.isVector());

        }))

      break;


    ScalarCost = ScalarCost * VF.getFixedValue() +

                 Ctx.getScalarizationOverhead(this->getScalarType(),

                                              to_vector(operands()), VF);

    // If the recipe is not predicated (i.e. not in a replicate region), return

    // the scalar cost. Otherwise handle predicated cost.

    if (!getRegion()->isReplicator())

      return ScalarCost;


    // Account for the phi nodes that we will create.

    ScalarCost += VF.getFixedValue() *

                  Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

    // Scale the cost by the probability of executing the predicated blocks.

    // This assumes the predicated block for each vector lane is equally

    // likely.

    ScalarCost /= Ctx.getPredBlockCostDivisor(UI->getParent());

    return ScalarCost;

  }

  case Instruction::Load:

  case Instruction::Store: {

    bool IsLoad = UI->getOpcode() == Instruction::Load;

    const VPValue *PtrOp = getOperand(!IsLoad);

    const SCEV *PtrSCEV = getAddressAccessSCEV(PtrOp, Ctx.PSE, Ctx.L);

    if (isa_and_nonnull<SCEVCouldNotCompute>(PtrSCEV))

      break;


    Type *ValTy = (IsLoad ? this : getOperand(0))->getScalarType();

    Type *ScalarPtrTy = PtrOp->getScalarType();

    const Align Alignment = getLoadStoreAlignment(UI);

    unsigned AS = cast<PointerType>(ScalarPtrTy)->getAddressSpace();

    TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(UI->getOperand(0));

    bool PreferVectorizedAddressing = Ctx.TTI.prefersVectorizedAddressing();

    bool UsedByLoadStoreAddress =

        !PreferVectorizedAddressing && vputils::isUsedByLoadStoreAddress(this);

    InstructionCost ScalarMemOpCost = Ctx.TTI.getMemoryOpCost(

        UI->getOpcode(), ValTy, Alignment, AS, Ctx.CostKind, OpInfo,

        UsedByLoadStoreAddress ? UI : nullptr);


    Type *PtrTy = isSingleScalar() ? ScalarPtrTy : toVectorTy(ScalarPtrTy, VF);

    InstructionCost ScalarCost =

        ScalarMemOpCost +

        Ctx.TTI.getAddressComputationCost(

            PtrTy, UsedByLoadStoreAddress ? nullptr : Ctx.PSE.getSE(), PtrSCEV,

            Ctx.CostKind);

    if (isSingleScalar())

      return ScalarCost;


    SmallVector<const VPValue *> OpsToScalarize;

    Type *ResultTy = Type::getVoidTy(PtrTy->getContext());

    // Set ResultTy and OpsToScalarize, if scalarization is needed. Currently we

    // don't assign scalarization overhead in general, if the target prefers

    // vectorized addressing or the loaded value is used as part of an address

    // of another load or store.

    if (!UsedByLoadStoreAddress) {

      bool EfficientVectorLoadStore =

          Ctx.TTI.supportsEfficientVectorElementLoadStore();

      if (!(IsLoad && !PreferVectorizedAddressing) &&

          !(!IsLoad && EfficientVectorLoadStore))

        append_range(OpsToScalarize, operands());


      if (!EfficientVectorLoadStore)

        ResultTy = this->getScalarType();

    }


    TTI::VectorInstrContext VIC =

        IsLoad ? TTI::VectorInstrContext::Load : TTI::VectorInstrContext::Store;

    InstructionCost Cost =

        (ScalarCost * VF.getFixedValue()) +

        Ctx.getScalarizationOverhead(ResultTy, OpsToScalarize, VF, VIC, true);


    const VPRegionBlock *ParentRegion = getRegion();

    if (ParentRegion && ParentRegion->isReplicator()) {

      if (!PtrSCEV)

        break;

      Cost /= Ctx.getPredBlockCostDivisor(UI->getParent());

      Cost += Ctx.TTI.getCFInstrCost(Instruction::CondBr, Ctx.CostKind);


      auto *VecI1Ty = VectorType::get(

          IntegerType::getInt1Ty(Ctx.L->getHeader()->getContext()), VF);

      Cost += Ctx.TTI.getScalarizationOverhead(

          VecI1Ty, APInt::getAllOnes(VF.getFixedValue()),

          /*Insert=*/false, /*Extract=*/true, Ctx.CostKind);


      if (Ctx.useEmulatedMaskMemRefHack(this, VF)) {

        // Artificially setting to a high enough value to practically disable

        // vectorization with such operations.

        return 3000000;

      }

    }

    return Cost;

  }

  case Instruction::SExt:

  case Instruction::ZExt:

  case Instruction::FPToUI:

  case Instruction::FPToSI:

  case Instruction::FPExt:

  case Instruction::PtrToInt:

  case Instruction::PtrToAddr:

  case Instruction::IntToPtr:

  case Instruction::SIToFP:

  case Instruction::UIToFP:

  case Instruction::Trunc:

  case Instruction::FPTrunc:

  case Instruction::Select:

  case Instruction::AddrSpaceCast: {

    return getCostForRecipeWithOpcode(getOpcode(), ElementCount::getFixed(1),

                                      Ctx) *

           (isSingleScalar() ? 1 : VF.getFixedValue());

  }

  case Instruction::ExtractValue:

  case Instruction::InsertValue:

    return Ctx.TTI.getInsertExtractValueCost(getOpcode(), Ctx.CostKind);

  }


  return Ctx.getLegacyCost(UI, VF);

}


InstructionCost VPReplicateRecipe::computeCallCost(

    Function *CalledFn, Type *ResultTy, ArrayRef<const VPValue *> ArgOps,

    bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx) {

  SmallVector<Type *, 4> Tys = map_to_vector<4>(

      ArgOps, [&](const VPValue *Op) { return Op->getScalarType(); });


  Intrinsic::ID IntrinID = CalledFn->getIntrinsicID();

  auto GetIntrinsicCost = [&] {

    if (!IntrinID)

      return InstructionCost::getInvalid();

    return Ctx.TTI.getIntrinsicInstrCost(

        IntrinsicCostAttributes(IntrinID, ResultTy, Tys), Ctx.CostKind);

  };


  if (IntrinID && VPCostContext::isFreeScalarIntrinsic(IntrinID)) {

    assert(GetIntrinsicCost() == 0 && "scalarizing intrinsic should be free");

    return 0;

  }


  InstructionCost ScalarCallCost =

      Ctx.TTI.getCallInstrCost(CalledFn, ResultTy, Tys, Ctx.CostKind);

  if (IsSingleScalar) {

    ScalarCallCost = std::min(ScalarCallCost, GetIntrinsicCost());

    return ScalarCallCost;

  }


  // Scalarization overhead is undefined for scalable VFs.

  if (VF.isScalable())

    return InstructionCost::getInvalid();


  return ScalarCallCost * VF.getFixedValue() +

         Ctx.getScalarizationOverhead(ResultTy, ArgOps, VF);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPReplicateRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << (IsSingleScalar ? "CLONE " : "REPLICATE ");


  if (!getScalarType()->isVoidTy()) {

    printAsOperand(O, SlotTracker);

    O << " = ";

  }

  if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {

    O << "call";

    printFlags(O);

    O << "@" << CB->getCalledFunction()->getName() << "(";

    interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),

                    O, [&O, &SlotTracker](VPValue *Op) {

                      Op->printAsOperand(O, SlotTracker);

                    });

    O << ")";

  } else {

    O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());

    printFlags(O);

    printOperands(O, SlotTracker);

  }


  // Find if the recipe is used by a widened recipe via an intervening

  // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.

  if (any_of(users(), [](const VPUser *U) {

        if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))

          return !vputils::onlyScalarValuesUsed(PredR);

        return false;

      }))

    O << " (S->V)";

}


#endif


void VPBranchOnMaskRecipe::execute(VPTransformState &State) {

  llvm_unreachable("recipe must be removed when dissolving replicate region");

}


InstructionCost VPBranchOnMaskRecipe::computeCost(ElementCount VF,

                                                  VPCostContext &Ctx) const {

  // The legacy cost model doesn't assign costs to branches for individual

  // replicate regions. Match the current behavior in the VPlan cost model for

  // now.

  return 0;

}


void VPPredInstPHIRecipe::execute(VPTransformState &State) {

  llvm_unreachable("recipe must be removed when dissolving replicate region");

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPPredInstPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                      VPSlotTracker &SlotTracker) const {

  O << Indent << "PHI-PREDICATED-INSTRUCTION ";

  printAsOperand(O, SlotTracker);

  O << " = ";

  printOperands(O, SlotTracker);

}


#endif


InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,

                                                 VPCostContext &Ctx) const {

  const VPRecipeBase *R = getAsRecipe();

  bool IsLoad = isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R);

  Type *ScalarTy = IsLoad ? cast<VPSingleDefRecipe>(R)->getScalarType()

                          : R->getOperand(1)->getScalarType();

  Type *Ty = toVectorTy(ScalarTy, VF);

  unsigned AS =

      cast<PointerType>(getAddr()->getScalarType())->getAddressSpace();

  unsigned Opcode = IsLoad ? Instruction::Load : Instruction::Store;


  if (!Consecutive) {

    // TODO: Using the original IR may not be accurate.

    // Currently, ARM will use the underlying IR to calculate gather/scatter

    // instruction cost.

    [[maybe_unused]] auto IsReverseMask = [this, R]() {

      VPValue *Mask = getMask();

      if (!Mask)

        return false;


      if (isa<VPWidenLoadEVLRecipe, VPWidenStoreEVLRecipe>(R))

        return match(Mask, m_Intrinsic<Intrinsic::experimental_vp_reverse>());


      return match(Mask, m_Reverse(m_VPValue()));

    };

    assert(!IsReverseMask() &&

           "Inconsecutive memory access should not have reverse order");

    Type *PtrTy = getAddr()->getScalarType();

    const Value *Ptr = getAddr()->getUnderlyingValue();


    // If the address value is uniform across all lanes, then the address can be

    // calculated with scalar type and broadcast.

    if (!vputils::isSingleScalar(getAddr()))

      PtrTy = toVectorTy(PtrTy, VF);


    unsigned IID = isa<VPWidenLoadRecipe>(R)      ? Intrinsic::masked_gather

                   : isa<VPWidenStoreRecipe>(R)   ? Intrinsic::masked_scatter

                   : isa<VPWidenLoadEVLRecipe>(R) ? Intrinsic::vp_gather

                                                  : Intrinsic::vp_scatter;

    return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr,

                                             Ctx.CostKind) +

           Ctx.TTI.getMemIntrinsicInstrCost(

               MemIntrinsicCostAttributes(IID, Ty, Ptr, IsMasked, Alignment,

                                          &Ingredient),

               Ctx.CostKind);

  }


  InstructionCost Cost = 0;

  if (IsMasked) {

    unsigned IID = isa<VPWidenLoadRecipe>(R) ? Intrinsic::masked_load

                                             : Intrinsic::masked_store;

    Cost += Ctx.TTI.getMemIntrinsicInstrCost(

        MemIntrinsicCostAttributes(IID, Ty, Alignment, AS), Ctx.CostKind);

  } else {

    TTI::OperandValueInfo OpInfo = Ctx.getOperandInfo(

        isa<VPWidenLoadRecipe, VPWidenLoadEVLRecipe>(R) ? R->getOperand(0)

                                                        : R->getOperand(1));

    Cost += Ctx.TTI.getMemoryOpCost(Opcode, Ty, Alignment, AS, Ctx.CostKind,

                                    OpInfo, &Ingredient);

  }

  return Cost;

}


void VPWidenLoadRecipe::execute(VPTransformState &State) {

  Type *ScalarDataTy = getScalarType();

  auto *DataTy = VectorType::get(ScalarDataTy, State.VF);

  bool CreateGather = !isConsecutive();


  auto &Builder = State.Builder;

  Value *Mask = nullptr;

  if (auto *VPMask = getMask())

    Mask = State.get(VPMask);


  Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateGather);

  Value *NewLI;

  if (CreateGather) {

    NewLI = Builder.CreateMaskedGather(DataTy, Addr, Alignment, Mask, nullptr,

                                       "wide.masked.gather");

  } else if (Mask) {

    NewLI =

        Builder.CreateMaskedLoad(DataTy, Addr, Alignment, Mask,

                                 PoisonValue::get(DataTy), "wide.masked.load");

  } else {

    NewLI = Builder.CreateAlignedLoad(DataTy, Addr, Alignment, "wide.load");

  }

  applyMetadata(*cast<Instruction>(NewLI));

  State.set(this, NewLI);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenLoadRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                    VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = load ";

  printOperands(O, SlotTracker);

}


#endif


void VPWidenLoadEVLRecipe::execute(VPTransformState &State) {

  Type *ScalarDataTy = getScalarType();

  auto *DataTy = VectorType::get(ScalarDataTy, State.VF);

  bool CreateGather = !isConsecutive();


  auto &Builder = State.Builder;

  CallInst *NewLI;

  Value *EVL = State.get(getEVL(), VPLane(0));

  Value *Addr = State.get(getAddr(), !CreateGather);

  Value *Mask = nullptr;

  if (VPValue *VPMask = getMask())

    Mask = State.get(VPMask);

  else

    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());


  if (CreateGather) {

    NewLI =

        Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL},

                                nullptr, "wide.masked.gather");

  } else {

    NewLI = Builder.CreateIntrinsic(DataTy, Intrinsic::vp_load,

                                    {Addr, Mask, EVL}, nullptr, "vp.op.load");

  }

  NewLI->addParamAttr(

      0, Attribute::getWithAlignment(NewLI->getContext(), Alignment));

  applyMetadata(*NewLI);

  Instruction *Res = NewLI;

  State.set(this, Res);

}


InstructionCost VPWidenLoadEVLRecipe::computeCost(ElementCount VF,

                                                  VPCostContext &Ctx) const {

  if (!Consecutive || IsMasked)

    return VPWidenMemoryRecipe::computeCost(VF, Ctx);


  // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()

  // here because the EVL recipes using EVL to replace the tail mask. But in the

  // legacy model, it will always calculate the cost of mask.

  // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost  when we

  // don't need to compare to the legacy cost model.

  Type *Ty = toVectorTy(getScalarType(), VF);

  unsigned AS =

      cast<PointerType>(getAddr()->getScalarType())->getAddressSpace();

  return Ctx.TTI.getMemIntrinsicInstrCost(

      MemIntrinsicCostAttributes(Intrinsic::vp_load, Ty, Alignment, AS),

      Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenLoadEVLRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                       VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN ";

  printAsOperand(O, SlotTracker);

  O << " = vp.load ";

  printOperands(O, SlotTracker);

}


#endif


void VPWidenStoreRecipe::execute(VPTransformState &State) {

  VPValue *StoredVPValue = getStoredValue();

  bool CreateScatter = !isConsecutive();


  auto &Builder = State.Builder;


  Value *Mask = nullptr;

  if (auto *VPMask = getMask())

    Mask = State.get(VPMask);


  Value *StoredVal = State.get(StoredVPValue);

  Value *Addr = State.get(getAddr(), /*IsScalar*/ !CreateScatter);

  Instruction *NewSI = nullptr;

  if (CreateScatter)

    NewSI = Builder.CreateMaskedScatter(StoredVal, Addr, Alignment, Mask);

  else if (Mask)

    NewSI = Builder.CreateMaskedStore(StoredVal, Addr, Alignment, Mask);

  else

    NewSI = Builder.CreateAlignedStore(StoredVal, Addr, Alignment);

  applyMetadata(*NewSI);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenStoreRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN store ";

  printOperands(O, SlotTracker);

}


#endif


void VPWidenStoreEVLRecipe::execute(VPTransformState &State) {

  VPValue *StoredValue = getStoredValue();

  bool CreateScatter = !isConsecutive();


  auto &Builder = State.Builder;


  CallInst *NewSI = nullptr;

  Value *StoredVal = State.get(StoredValue);

  Value *EVL = State.get(getEVL(), VPLane(0));

  Value *Mask = nullptr;

  if (VPValue *VPMask = getMask())

    Mask = State.get(VPMask);

  else

    Mask = Builder.CreateVectorSplat(State.VF, Builder.getTrue());


  Value *Addr = State.get(getAddr(), !CreateScatter);

  if (CreateScatter) {

    NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),

                                    Intrinsic::vp_scatter,

                                    {StoredVal, Addr, Mask, EVL});

  } else {

    NewSI = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()),

                                    Intrinsic::vp_store,

                                    {StoredVal, Addr, Mask, EVL});

  }

  NewSI->addParamAttr(

      1, Attribute::getWithAlignment(NewSI->getContext(), Alignment));

  applyMetadata(*NewSI);

}


InstructionCost VPWidenStoreEVLRecipe::computeCost(ElementCount VF,

                                                   VPCostContext &Ctx) const {

  if (!Consecutive || IsMasked)

    return VPWidenMemoryRecipe::computeCost(VF, Ctx);


  // We need to use the getMemIntrinsicInstrCost() instead of getMemoryOpCost()

  // here because the EVL recipes using EVL to replace the tail mask. But in the

  // legacy model, it will always calculate the cost of mask.

  // TODO: Using getMemoryOpCost() instead of getMemIntrinsicInstrCost when we

  // don't need to compare to the legacy cost model.

  Type *Ty = toVectorTy(getStoredValue()->getScalarType(), VF);

  unsigned AS =

      cast<PointerType>(getAddr()->getScalarType())->getAddressSpace();

  return Ctx.TTI.getMemIntrinsicInstrCost(

      MemIntrinsicCostAttributes(Intrinsic::vp_store, Ty, Alignment, AS),

      Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenStoreEVLRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN vp.store ";

  printOperands(O, SlotTracker);

}


#endif


static Value *createBitOrPointerCast(IRBuilderBase &Builder, Value *V,

                                     VectorType *DstVTy, const DataLayout &DL) {

  // Verify that V is a vector type with same number of elements as DstVTy.

  auto VF = DstVTy->getElementCount();

  auto *SrcVecTy = cast<VectorType>(V->getType());

  assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");

  Type *SrcElemTy = SrcVecTy->getElementType();

  Type *DstElemTy = DstVTy->getElementType();

  assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&

         "Vector elements must have same size");


  // Do a direct cast if element types are castable.

  if (CastInst::isBitOrNoopPointerCastable(SrcElemTy, DstElemTy, DL)) {

    return Builder.CreateBitOrPointerCast(V, DstVTy);

  }

  // V cannot be directly casted to desired vector type.

  // May happen when V is a floating point vector but DstVTy is a vector of

  // pointers or vice-versa. Handle this using a two-step bitcast using an

  // intermediate Integer type for the bitcast i.e. Ptr <-> Int <-> Float.

  assert((DstElemTy->isPointerTy() != SrcElemTy->isPointerTy()) &&

         "Only one type should be a pointer type");

  assert((DstElemTy->isFloatingPointTy() != SrcElemTy->isFloatingPointTy()) &&

         "Only one type should be a floating point type");

  Type *IntTy =

      IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));

  auto *VecIntTy = VectorType::get(IntTy, VF);

  Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);

  return Builder.CreateBitOrPointerCast(CastVal, DstVTy);

}


/// Return a vector containing interleaved elements from multiple

/// smaller input vectors.


static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,

                                const Twine &Name) {

  unsigned Factor = Vals.size();

  assert(Factor > 1 && "Tried to interleave invalid number of vectors");


  VectorType *VecTy = cast<VectorType>(Vals[0]->getType());

#ifndef NDEBUG

  for (Value *Val : Vals)

    assert(Val->getType() == VecTy && "Tried to interleave mismatched types");

#endif


  // Scalable vectors cannot use arbitrary shufflevectors (only splats), so

  // must use intrinsics to interleave.

  if (VecTy->isScalableTy()) {

    assert(Factor <= 8 && "Unsupported interleave factor for scalable vectors");

    return Builder.CreateVectorInterleave(Vals, Name);

  }


  // Fixed length. Start by concatenating all vectors into a wide vector.

  Value *WideVec = concatenateVectors(Builder, Vals);


  // Interleave the elements into the wide vector.

  const unsigned NumElts = VecTy->getElementCount().getFixedValue();

  return Builder.CreateShuffleVector(

      WideVec, createInterleaveMask(NumElts, Factor), Name);

}


// Try to vectorize the interleave group that \p Instr belongs to.

//

// E.g. Translate following interleaved load group (factor = 3):

//   for (i = 0; i < N; i+=3) {

//     R = Pic[i];             // Member of index 0

//     G = Pic[i+1];           // Member of index 1

//     B = Pic[i+2];           // Member of index 2

//     ... // do something to R, G, B

//   }

// To:

//   %wide.vec = load <12 x i32>                       ; Read 4 tuples of R,G,B

//   %R.vec = shuffle %wide.vec, poison, <0, 3, 6, 9>   ; R elements

//   %G.vec = shuffle %wide.vec, poison, <1, 4, 7, 10>  ; G elements

//   %B.vec = shuffle %wide.vec, poison, <2, 5, 8, 11>  ; B elements

//

// Or translate following interleaved store group (factor = 3):

//   for (i = 0; i < N; i+=3) {

//     ... do something to R, G, B

//     Pic[i]   = R;           // Member of index 0

//     Pic[i+1] = G;           // Member of index 1

//     Pic[i+2] = B;           // Member of index 2

//   }

// To:

//   %R_G.vec = shuffle %R.vec, %G.vec, <0, 1, 2, ..., 7>

//   %B_U.vec = shuffle %B.vec, poison, <0, 1, 2, 3, u, u, u, u>

//   %interleaved.vec = shuffle %R_G.vec, %B_U.vec,

//        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements

//   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B


void VPInterleaveRecipe::execute(VPTransformState &State) {

  assert((!needsMaskForGaps() || !State.VF.isScalable()) &&

         "Masking gaps for scalable vectors is not yet supported.");

  const InterleaveGroup<Instruction> *Group = getInterleaveGroup();

  Instruction *Instr = Group->getInsertPos();


  // Prepare for the vector type of the interleaved load/store.

  Type *ScalarTy = getLoadStoreType(Instr);

  unsigned InterleaveFactor = Group->getFactor();

  auto *VecTy = VectorType::get(ScalarTy, State.VF * InterleaveFactor);


  VPValue *BlockInMask = getMask();

  VPValue *Addr = getAddr();

  Value *ResAddr = State.get(Addr, VPLane(0));


  auto CreateGroupMask = [&BlockInMask, &State,

                          &InterleaveFactor](Value *MaskForGaps) -> Value * {

    if (State.VF.isScalable()) {

      assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");

      assert(InterleaveFactor <= 8 &&

             "Unsupported deinterleave factor for scalable vectors");

      auto *ResBlockInMask = State.get(BlockInMask);

      SmallVector<Value *> Ops(InterleaveFactor, ResBlockInMask);

      return interleaveVectors(State.Builder, Ops, "interleaved.mask");

    }


    if (!BlockInMask)

      return MaskForGaps;


    Value *ResBlockInMask = State.get(BlockInMask);

    Value *ShuffledMask = State.Builder.CreateShuffleVector(

        ResBlockInMask,

        createReplicatedMask(InterleaveFactor, State.VF.getFixedValue()),

        "interleaved.mask");

    return MaskForGaps ? State.Builder.CreateBinOp(Instruction::And,

                                                   ShuffledMask, MaskForGaps)

                       : ShuffledMask;

  };


  const DataLayout &DL = Instr->getDataLayout();

  // Vectorize the interleaved load group.

  if (isa<LoadInst>(Instr)) {

    Value *MaskForGaps = nullptr;

    if (needsMaskForGaps()) {

      MaskForGaps =

          createBitMaskForGaps(State.Builder, State.VF.getFixedValue(), *Group);

      assert(MaskForGaps && "Mask for Gaps is required but it is null");

    }


    Instruction *NewLoad;

    if (BlockInMask || MaskForGaps) {

      Value *GroupMask = CreateGroupMask(MaskForGaps);

      Value *PoisonVec = PoisonValue::get(VecTy);

      NewLoad = State.Builder.CreateMaskedLoad(VecTy, ResAddr,

                                               Group->getAlign(), GroupMask,

                                               PoisonVec, "wide.masked.vec");

    } else

      NewLoad = State.Builder.CreateAlignedLoad(VecTy, ResAddr,

                                                Group->getAlign(), "wide.vec");

    applyMetadata(*NewLoad);

    // TODO: Also manage existing metadata using VPIRMetadata.

    Group->addMetadata(NewLoad);


    ArrayRef<VPRecipeValue *> VPDefs = definedValues();

    if (VecTy->isScalableTy()) {

      // Scalable vectors cannot use arbitrary shufflevectors (only splats),

      // so must use intrinsics to deinterleave.

      assert(InterleaveFactor <= 8 &&

             "Unsupported deinterleave factor for scalable vectors");

      NewLoad = State.Builder.CreateIntrinsic(

          Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),

          NewLoad->getType(), NewLoad,

          /*FMFSource=*/nullptr, "strided.vec");

    }


    auto CreateStridedVector = [&InterleaveFactor, &State,

                                &NewLoad](unsigned Index) -> Value * {

      assert(Index < InterleaveFactor && "Illegal group index");

      if (State.VF.isScalable())

        return State.Builder.CreateExtractValue(NewLoad, Index);


      // For fixed length VF, use shuffle to extract the sub-vectors from the

      // wide load.

      auto StrideMask =

          createStrideMask(Index, InterleaveFactor, State.VF.getFixedValue());

      return State.Builder.CreateShuffleVector(NewLoad, StrideMask,

                                               "strided.vec");

    };


    for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {

      Instruction *Member = Group->getMember(I);


      // Skip the gaps in the group.

      if (!Member)

        continue;


      Value *StridedVec = CreateStridedVector(I);


      // If this member has different type, cast the result type.

      if (Member->getType() != ScalarTy) {

        VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);

        StridedVec =

            createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);

      }


      if (Group->isReverse())

        StridedVec = State.Builder.CreateVectorReverse(StridedVec, "reverse");


      State.set(VPDefs[J], StridedVec);

      ++J;

    }

    return;

  }


  // The sub vector type for current instruction.

  auto *SubVT = VectorType::get(ScalarTy, State.VF);


  // Vectorize the interleaved store group.

  Value *MaskForGaps =

      createBitMaskForGaps(State.Builder, State.VF.getKnownMinValue(), *Group);

  assert(((MaskForGaps != nullptr) == needsMaskForGaps()) &&

         "Mismatch between NeedsMaskForGaps and MaskForGaps");

  ArrayRef<VPValue *> StoredValues = getStoredValues();

  // Collect the stored vector from each member.

  SmallVector<Value *, 4> StoredVecs;

  unsigned StoredIdx = 0;

  for (unsigned i = 0; i < InterleaveFactor; i++) {

    assert((Group->getMember(i) || MaskForGaps) &&

           "Fail to get a member from an interleaved store group");

    Instruction *Member = Group->getMember(i);


    // Skip the gaps in the group.

    if (!Member) {

      Value *Undef = PoisonValue::get(SubVT);

      StoredVecs.push_back(Undef);

      continue;

    }


    Value *StoredVec = State.get(StoredValues[StoredIdx]);

    ++StoredIdx;


    if (Group->isReverse())

      StoredVec = State.Builder.CreateVectorReverse(StoredVec, "reverse");


    // If this member has different type, cast it to a unified type.


    if (StoredVec->getType() != SubVT)

      StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);


    StoredVecs.push_back(StoredVec);

  }


  // Interleave all the smaller vectors into one wider vector.

  Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");

  Instruction *NewStoreInstr;

  if (BlockInMask || MaskForGaps) {

    Value *GroupMask = CreateGroupMask(MaskForGaps);

    NewStoreInstr = State.Builder.CreateMaskedStore(

        IVec, ResAddr, Group->getAlign(), GroupMask);

  } else

    NewStoreInstr =

        State.Builder.CreateAlignedStore(IVec, ResAddr, Group->getAlign());


  applyMetadata(*NewStoreInstr);

  // TODO: Also manage existing metadata using VPIRMetadata.

  Group->addMetadata(NewStoreInstr);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPInterleaveRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  const InterleaveGroup<Instruction> *IG = getInterleaveGroup();

  O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << ", ";

  getAddr()->printAsOperand(O, SlotTracker);

  VPValue *Mask = getMask();

  if (Mask) {

    O << ", ";

    Mask->printAsOperand(O, SlotTracker);

  }


  unsigned OpIdx = 0;

  for (unsigned i = 0; i < IG->getFactor(); ++i) {

    if (!IG->getMember(i))

      continue;

    if (getNumStoreOperands() > 0) {

      O << "\n" << Indent << "  store ";

      getOperand(1 + OpIdx)->printAsOperand(O, SlotTracker);

      O << " to index " << i;

    } else {

      O << "\n" << Indent << "  ";

      getVPValue(OpIdx)->printAsOperand(O, SlotTracker);

      O << " = load from index " << i;

    }

    ++OpIdx;

  }

}


#endif


void VPInterleaveEVLRecipe::execute(VPTransformState &State) {

  assert(State.VF.isScalable() &&

         "Only support scalable VF for EVL tail-folding.");

  assert(!needsMaskForGaps() &&

         "Masking gaps for scalable vectors is not yet supported.");

  const InterleaveGroup<Instruction> *Group = getInterleaveGroup();

  Instruction *Instr = Group->getInsertPos();


  // Prepare for the vector type of the interleaved load/store.

  Type *ScalarTy = getLoadStoreType(Instr);

  unsigned InterleaveFactor = Group->getFactor();

  assert(InterleaveFactor <= 8 &&

         "Unsupported deinterleave/interleave factor for scalable vectors");

  ElementCount WideVF = State.VF * InterleaveFactor;

  auto *VecTy = VectorType::get(ScalarTy, WideVF);


  VPValue *Addr = getAddr();

  Value *ResAddr = State.get(Addr, VPLane(0));

  Value *EVL = State.get(getEVL(), VPLane(0));

  Value *InterleaveEVL = State.Builder.CreateMul(

      EVL, ConstantInt::get(EVL->getType(), InterleaveFactor), "interleave.evl",

      /* NUW= */ true, /* NSW= */ true);

  LLVMContext &Ctx = State.Builder.getContext();


  Value *GroupMask = nullptr;

  if (VPValue *BlockInMask = getMask()) {

    SmallVector<Value *> Ops(InterleaveFactor, State.get(BlockInMask));

    GroupMask = interleaveVectors(State.Builder, Ops, "interleaved.mask");

  } else {

    GroupMask =

        State.Builder.CreateVectorSplat(WideVF, State.Builder.getTrue());

  }


  // Vectorize the interleaved load group.

  if (isa<LoadInst>(Instr)) {

    CallInst *NewLoad = State.Builder.CreateIntrinsic(

        VecTy, Intrinsic::vp_load, {ResAddr, GroupMask, InterleaveEVL}, nullptr,

        "wide.vp.load");

    NewLoad->addParamAttr(0,

                          Attribute::getWithAlignment(Ctx, Group->getAlign()));


    applyMetadata(*NewLoad);

    // TODO: Also manage existing metadata using VPIRMetadata.

    Group->addMetadata(NewLoad);


    // Scalable vectors cannot use arbitrary shufflevectors (only splats),

    // so must use intrinsics to deinterleave.

    NewLoad = State.Builder.CreateIntrinsic(

        Intrinsic::getDeinterleaveIntrinsicID(InterleaveFactor),

        NewLoad->getType(), NewLoad,

        /*FMFSource=*/nullptr, "strided.vec");


    const DataLayout &DL = Instr->getDataLayout();

    for (unsigned I = 0, J = 0; I < InterleaveFactor; ++I) {

      Instruction *Member = Group->getMember(I);

      // Skip the gaps in the group.

      if (!Member)

        continue;


      Value *StridedVec = State.Builder.CreateExtractValue(NewLoad, I);

      // If this member has different type, cast the result type.

      if (Member->getType() != ScalarTy) {

        VectorType *OtherVTy = VectorType::get(Member->getType(), State.VF);

        StridedVec =

            createBitOrPointerCast(State.Builder, StridedVec, OtherVTy, DL);

      }


      State.set(getVPValue(J), StridedVec);

      ++J;

    }

    return;

  } // End for interleaved load.


  // The sub vector type for current instruction.

  auto *SubVT = VectorType::get(ScalarTy, State.VF);

  // Vectorize the interleaved store group.

  ArrayRef<VPValue *> StoredValues = getStoredValues();

  // Collect the stored vector from each member.

  SmallVector<Value *, 4> StoredVecs;

  const DataLayout &DL = Instr->getDataLayout();

  for (unsigned I = 0, StoredIdx = 0; I < InterleaveFactor; I++) {

    Instruction *Member = Group->getMember(I);

    // Skip the gaps in the group.

    if (!Member) {

      StoredVecs.push_back(PoisonValue::get(SubVT));

      continue;

    }


    Value *StoredVec = State.get(StoredValues[StoredIdx]);

    // If this member has different type, cast it to a unified type.

    if (StoredVec->getType() != SubVT)

      StoredVec = createBitOrPointerCast(State.Builder, StoredVec, SubVT, DL);


    StoredVecs.push_back(StoredVec);

    ++StoredIdx;

  }


  // Interleave all the smaller vectors into one wider vector.

  Value *IVec = interleaveVectors(State.Builder, StoredVecs, "interleaved.vec");

  CallInst *NewStore =

      State.Builder.CreateIntrinsic(Type::getVoidTy(Ctx), Intrinsic::vp_store,

                                    {IVec, ResAddr, GroupMask, InterleaveEVL});

  NewStore->addParamAttr(1,

                         Attribute::getWithAlignment(Ctx, Group->getAlign()));


  applyMetadata(*NewStore);

  // TODO: Also manage existing metadata using VPIRMetadata.

  Group->addMetadata(NewStore);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPInterleaveEVLRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                        VPSlotTracker &SlotTracker) const {

  const InterleaveGroup<Instruction> *IG = getInterleaveGroup();

  O << Indent << "INTERLEAVE-GROUP with factor " << IG->getFactor() << ", ";

  getAddr()->printAsOperand(O, SlotTracker);

  O << ", ";

  getEVL()->printAsOperand(O, SlotTracker);

  if (VPValue *Mask = getMask()) {

    O << ", ";

    Mask->printAsOperand(O, SlotTracker);

  }


  unsigned OpIdx = 0;

  for (unsigned i = 0; i < IG->getFactor(); ++i) {

    if (!IG->getMember(i))

      continue;

    if (getNumStoreOperands() > 0) {

      O << "\n" << Indent << "  vp.store ";

      getOperand(2 + OpIdx)->printAsOperand(O, SlotTracker);

      O << " to index " << i;

    } else {

      O << "\n" << Indent << "  ";

      getVPValue(OpIdx)->printAsOperand(O, SlotTracker);

      O << " = vp.load from index " << i;

    }

    ++OpIdx;

  }

}


#endif


InstructionCost VPInterleaveBase::computeCost(ElementCount VF,

                                              VPCostContext &Ctx) const {

  Instruction *InsertPos = getInsertPos();

  // Find the VPValue index of the interleave group. We need to skip gaps.

  unsigned InsertPosIdx = 0;

  for (unsigned Idx = 0; IG->getFactor(); ++Idx)

    if (auto *Member = IG->getMember(Idx)) {

      if (Member == InsertPos)

        break;

      InsertPosIdx++;

    }

  const VPValue *ValV = getNumDefinedValues() > 0

                            ? getVPValue(InsertPosIdx)

                            : getStoredValues()[InsertPosIdx];

  Type *ValTy = ValV->getScalarType();

  auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF));

  unsigned AS =

      cast<PointerType>(getAddr()->getScalarType())->getAddressSpace();


  unsigned InterleaveFactor = IG->getFactor();

  auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor);


  // Holds the indices of existing members in the interleaved group.

  SmallVector<unsigned, 4> Indices;

  for (unsigned IF = 0; IF < InterleaveFactor; IF++)

    if (IG->getMember(IF))

      Indices.push_back(IF);


  // Calculate the cost of the whole interleaved group.

  InstructionCost Cost = Ctx.TTI.getInterleavedMemoryOpCost(

      InsertPos->getOpcode(), WideVecTy, IG->getFactor(), Indices,

      IG->getAlign(), AS, Ctx.CostKind, getMask(), NeedsMaskForGaps);


  if (!IG->isReverse())

    return Cost;


  return Cost + IG->getNumMembers() *

                    Ctx.TTI.getShuffleCost(TargetTransformInfo::SK_Reverse,

                                           VectorTy, VectorTy, {}, Ctx.CostKind,

                                           0);

}


bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {

  return vputils::onlyScalarValuesUsed(this) &&

         (!IsScalable || vputils::onlyFirstLaneUsed(this));

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenPointerInductionRecipe::printRecipe(

    raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {

  assert((getNumOperands() == 3 || getNumOperands() == 5) &&

         "unexpected number of operands");

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-POINTER-INDUCTION ";

  getStartValue()->printAsOperand(O, SlotTracker);

  O << ", ";

  getStepValue()->printAsOperand(O, SlotTracker);

  O << ", ";

  getOperand(2)->printAsOperand(O, SlotTracker);

  if (getNumOperands() == 5) {

    O << ", ";

    getOperand(3)->printAsOperand(O, SlotTracker);

    O << ", ";

    getOperand(4)->printAsOperand(O, SlotTracker);

  }

}


void VPExpandSCEVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                     VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = EXPAND SCEV " << *Expr;

}


#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenCanonicalIVRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                           VPSlotTracker &SlotTracker) const {

  O << Indent << "EMIT ";

  printAsOperand(O, SlotTracker);

  O << " = WIDEN-CANONICAL-INDUCTION";

  printFlags(O);

  printOperands(O, SlotTracker);

}


#endif


void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {

  auto &Builder = State.Builder;

  // Create a vector from the initial value.

  auto *VectorInit = getStartValue()->getLiveInIRValue();


  Type *VecTy = State.VF.isScalar()

                    ? VectorInit->getType()

                    : VectorType::get(VectorInit->getType(), State.VF);


  BasicBlock *VectorPH =

      State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));

  if (State.VF.isVector()) {

    auto *IdxTy = Builder.getInt32Ty();

    auto *One = ConstantInt::get(IdxTy, 1);

    IRBuilder<>::InsertPointGuard Guard(Builder);

    Builder.SetInsertPoint(VectorPH->getTerminator());

    auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF);

    auto *LastIdx = Builder.CreateSub(RuntimeVF, One);

    VectorInit = Builder.CreateInsertElement(

        PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init");

  }


  // Create a phi node for the new recurrence.

  PHINode *Phi = PHINode::Create(VecTy, 2, "vector.recur");

  Phi->insertBefore(State.CFG.PrevBB->getFirstInsertionPt());

  Phi->addIncoming(VectorInit, VectorPH);

  State.set(this, Phi);

}


InstructionCost


VPFirstOrderRecurrencePHIRecipe::computeCost(ElementCount VF,

                                             VPCostContext &Ctx) const {

  if (VF.isScalar())

    return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);


  return 0;

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPFirstOrderRecurrencePHIRecipe::printRecipe(

    raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {

  O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";

  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}


#endif


void VPReductionPHIRecipe::execute(VPTransformState &State) {

  // Reductions do not have to start at zero. They can start with

  // any loop invariant values.

  VPValue *StartVPV = getStartValue();


  // In order to support recurrences we need to be able to vectorize Phi nodes.

  // Phi nodes have cycles, so we need to vectorize them in two stages. This is

  // stage #1: We create a new vector PHI node with no incoming edges. We'll use

  // this value when we vectorize all of the instructions that use the PHI.

  BasicBlock *VectorPH =

      State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));

  bool ScalarPHI = State.VF.isScalar() || isInLoop();

  Value *StartV = State.get(StartVPV, ScalarPHI);

  Type *VecTy = StartV->getType();


  BasicBlock *HeaderBB = State.CFG.PrevBB;

  assert(State.CurrentParentLoop->getHeader() == HeaderBB &&

         "recipe must be in the vector loop header");

  auto *Phi = PHINode::Create(VecTy, 2, "vec.phi");

  Phi->insertBefore(HeaderBB->getFirstInsertionPt());

  State.set(this, Phi, isInLoop());


  Phi->addIncoming(StartV, VectorPH);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPReductionPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                       VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-REDUCTION-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi (";

  printRecurrenceKind(O, Kind);

  O << ")";

  printFlags(O);

  printOperands(O, SlotTracker);

  if (getVFScaleFactor() > 1)

    O << " (VF scaled by 1/" << getVFScaleFactor() << ")";

}


#endif


bool VPBlendRecipe::usesFirstLaneOnly(const VPValue *Op) const {

  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");

  return vputils::onlyFirstLaneUsed(this);

}


void VPWidenPHIRecipe::execute(VPTransformState &State) {

  Value *Op0 = State.get(getOperand(0));

  Type *VecTy = Op0->getType();

  Instruction *VecPhi = State.Builder.CreatePHI(VecTy, 2, Name);

  State.set(this, VecPhi);

}


InstructionCost VPWidenPHIRecipe::computeCost(ElementCount VF,

                                              VPCostContext &Ctx) const {

  return Ctx.TTI.getCFInstrCost(Instruction::PHI, Ctx.CostKind);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPWidenPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                   VPSlotTracker &SlotTracker) const {

  O << Indent << "WIDEN-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printPhiOperands(O, SlotTracker);

}


#endif


void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {

  BasicBlock *VectorPH =

      State.CFG.VPBB2IRBB.at(getParent()->getCFGPredecessor(0));

  Value *StartMask = State.get(getOperand(0));

  PHINode *Phi =

      State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask");

  Phi->addIncoming(StartMask, VectorPH);

  State.set(this, Phi);

}


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPActiveLaneMaskPHIRecipe::printRecipe(raw_ostream &O, const Twine &Indent,

                                            VPSlotTracker &SlotTracker) const {

  O << Indent << "ACTIVE-LANE-MASK-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}


#endif


#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)


void VPCurrentIterationPHIRecipe::printRecipe(

    raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const {

  O << Indent << "CURRENT-ITERATION-PHI ";


  printAsOperand(O, SlotTracker);

  O << " = phi ";

  printOperands(O, SlotTracker);

}


#endif

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

addOperand
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
Definition AMDGPUDisassembler.cpp:81

Arguments
AMDGPU Lower Kernel Arguments
Definition AMDGPULowerKernelArguments.cpp:393

Select
AMDGPU Register Bank Select
Definition AMDGPURegBankSelect.cpp:68

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

AssumptionCache.h

getParent
static const Function * getParent(const Value *V)
Definition BasicAliasAnalysis.cpp:894

BasicBlockUtils.h

X
#define X(NUM, ENUM, NAME)
Definition ELF.h:853

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

Casting.h

replaceAllUsesWith
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
Definition CodeGenPrepare.cpp:1124

CommandLine.h

IntrinsicCostStrategy::InstructionCost
@ InstructionCost
Definition CostModel.cpp:51

getPointer
Value * getPointer(Value *Ptr)
Definition HexagonVectorCombine.cpp:1994

IRBuilder.h

BasicBlock.h

Instruction.h

Type.h

Value.h

IVDescriptors.h

users
iv users
Definition IVUsers.cpp:48

Instructions.h

getValue
static constexpr Value * getValue(Ty &ValueOrUse)
Definition Instrumentor.cpp:824

getOpcode
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
Definition Instrumentor.cpp:1003

getMask
static std::pair< Value *, APInt > getMask(Value *WideMask, unsigned Factor, ElementCount LeafValueEC)
Definition InterleavedAccessPass.cpp:586

Intrinsics.h

NumOps
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
Definition ItaniumDemangle.h:3473

TemplateParamKind::Type
@ Type
Definition ItaniumDemangle.h:1243

Ops
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
Definition ItaniumDemangle.h:3391

LoopInfo.h

LoopUtils.h

LoopVectorizationPlanner.h
This file provides a LoopVectorizationPlanner class.

getAddressAccessSCEV
static const SCEV * getAddressAccessSCEV(Value *Ptr, PredicatedScalarEvolution &PSE, const Loop *TheLoop)
Gets the address access SCEV for Ptr, if it should be used for cost modeling according to isAddressSC...
Definition LoopVectorize.cpp:4210

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

getCalledFunction
static const Function * getCalledFunction(const Value *V)
Definition MemoryBuiltins.cpp:155

isOrdered
static bool isOrdered(const Instruction *I)
Definition MemorySSA.cpp:1751

OpIdx
MachineInstr unsigned OpIdx
Definition NVPTXPrologEpilogPass.cpp:56

II
uint64_t IntrinsicInst * II
Definition NVVMIntrRange.cpp:46

Cond
const SmallVectorImpl< MachineOperand > & Cond
Definition RISCVRedundantCopyElimination.cpp:73

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

ScalarEvolutionExpressions.h

SmallVectorExtras.h
This file defines less commonly used SmallVector utilities.

SmallVector.h
This file defines the SmallVector class.

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:119

getType
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39

Twine.h

VPlanHelpers.h
This file contains the declarations of different VPlan-related auxiliary helpers.

VPlanPatternMatch.h

interleaveVectors
static Value * interleaveVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vals, const Twine &Name)
Return a vector containing interleaved elements from multiple smaller input vectors.
Definition VPlanRecipes.cpp:4249

createBitOrPointerCast
static Value * createBitOrPointerCast(IRBuilderBase &Builder, Value *V, VectorType *DstVTy, const DataLayout &DL)
Definition VPlanRecipes.cpp:4217

getSubRecurOpcode
static Instruction::BinaryOps getSubRecurOpcode(RecurKind Kind)
Definition VPlanRecipes.cpp:662

VectorParts
SmallVector< Value *, 2 > VectorParts
Definition VPlanRecipes.cpp:45

printRecurrenceKind
static void printRecurrenceKind(raw_ostream &OS, const RecurKind &Kind)
Definition VPlanRecipes.cpp:2473

getCalledFnOperandIndex
static unsigned getCalledFnOperandIndex(ArrayRef< VPValue * > Operands)
For call VPInstruction operands, return the operand index of the called function.
Definition VPlanRecipes.cpp:432

VPlanUtils.h

VPlan.h
This file contains the declarations of the Vectorization Plan base classes:

Node
Definition ItaniumDemangle.h:166

Node::printAsOperand
void printAsOperand(OutputBuffer &OB, Prec P=Prec::Default, bool StrictlyWorse=false) const
Definition ItaniumDemangle.h:275

VectorType
Definition ItaniumDemangle.h:1189

llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235

llvm::APInt::ule
bool ule(const APInt &RHS) const
Unsigned less or equal comparison.
Definition APInt.h:1157

llvm::ArrayRef
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40

llvm::ArrayRef::size
size_t size() const
Get the array size.
Definition ArrayRef.h:141

llvm::ArrayRef::empty
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136

llvm::Attribute::getWithAlignment
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition Attributes.cpp:234

llvm::BasicBlock
LLVM Basic Block Representation.
Definition BasicBlock.h:62

llvm::BasicBlock::getFirstInsertionPt
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition BasicBlock.cpp:365

llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237

llvm::CallBase::addParamAttr
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition InstrTypes.h:1575

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition Instructions.h:1531

llvm::CastInst::isBitOrNoopPointerCastable
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Definition Instructions.cpp:3235

llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition InstrTypes.h:1049

llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740

llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition InstrTypes.h:773

llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763

llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765

llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition InstrTypes.h:761

llvm::CmpInst::getPredicateName
static LLVM_ABI StringRef getPredicateName(Predicate P)
Definition Instructions.cpp:3680

llvm::CmpPredicate
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition CmpPredicate.h:23

llvm::CondBrInst::setSuccessor
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Definition Instructions.h:3263

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition Constants.h:87

llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159

llvm::Constant
This is an important base class in LLVM.
Definition Constant.h:43

llvm::Constant::getNullValue
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition Constants.cpp:363

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64

llvm::DebugLoc
A debug info location.
Definition DebugLoc.h:124

llvm::DebugLoc::getUnknown
static DebugLoc getUnknown()
Definition DebugLoc.h:151

llvm::ElementCount
Definition TypeSize.h:298

llvm::ElementCount::isVector
constexpr bool isVector() const
One or more elements.
Definition TypeSize.h:324

llvm::ElementCount::getScalable
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312

llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309

llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition TypeSize.h:320

llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23

llvm::FastMathFlags::print
LLVM_ABI void print(raw_ostream &O) const
Print fast-math flags to O.
Definition Operator.cpp:283

llvm::FastMathFlags::setAllowContract
void setAllowContract(bool B=true)
Definition FMF.h:90

llvm::FastMathFlags::noSignedZeros
bool noSignedZeros() const
Definition FMF.h:67

llvm::FastMathFlags::noInfs
bool noInfs() const
Definition FMF.h:66

llvm::FastMathFlags::setAllowReciprocal
void setAllowReciprocal(bool B=true)
Definition FMF.h:87

llvm::FastMathFlags::allowReciprocal
bool allowReciprocal() const
Definition FMF.h:68

llvm::FastMathFlags::setNoSignedZeros
void setNoSignedZeros(bool B=true)
Definition FMF.h:84

llvm::FastMathFlags::allowReassoc
bool allowReassoc() const
Flag queries.
Definition FMF.h:64

llvm::FastMathFlags::approxFunc
bool approxFunc() const
Definition FMF.h:70

llvm::FastMathFlags::setNoNaNs
void setNoNaNs(bool B=true)
Definition FMF.h:78

llvm::FastMathFlags::setAllowReassoc
void setAllowReassoc(bool B=true)
Flag setters.
Definition FMF.h:75

llvm::FastMathFlags::noNaNs
bool noNaNs() const
Definition FMF.h:65

llvm::FastMathFlags::setApproxFunc
void setApproxFunc(bool B=true)
Definition FMF.h:93

llvm::FastMathFlags::setNoInfs
void setNoInfs(bool B=true)
Definition FMF.h:81

llvm::FastMathFlags::allowContract
bool allowContract() const
Definition FMF.h:69

llvm::FunctionType
Class to represent function types.
Definition DerivedTypes.h:165

llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition DerivedTypes.h:197

llvm::Function
Definition Function.h:65

llvm::Function::willReturn
bool willReturn() const
Determine if the function will return.
Definition Function.h:669

llvm::Function::getIntrinsicID
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246

llvm::Function::doesNotThrow
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:602

llvm::Function::doesNotAccessMemory
bool doesNotAccessMemory() const
Determine if the function does not access memory.
Definition Function.cpp:867

llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216

llvm::GEPNoWrapFlags
Represents flags for the getelementptr instruction/expression.
Definition GEPNoWrapFlags.h:26

llvm::GEPNoWrapFlags::none
static GEPNoWrapFlags none()
Definition GEPNoWrapFlags.h:46

llvm::IRBuilderBase::FastMathFlagGuard
Definition IRBuilder.h:438

llvm::IRBuilderBase::InsertPointGuard
Definition IRBuilder.h:416

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114

llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2637

llvm::IRBuilderBase::getInt1Ty
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition IRBuilder.h:571

llvm::IRBuilderBase::CreateInsertValue
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2691

llvm::IRBuilderBase::CreateExtractElement
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2625

llvm::IRBuilderBase::CreateVectorSpliceRight
LLVM_ABI Value * CreateVectorSpliceRight(Value *V1, Value *V2, Value *Offset, const Twine &Name="")
Create a vector.splice.right intrinsic call, or a shufflevector that produces the same result if the ...
Definition IRBuilder.cpp:1227

llvm::IRBuilderBase::CreateCondBr
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1238

llvm::IRBuilderBase::CreateSelectFMF
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition IRBuilder.cpp:1117

llvm::IRBuilderBase::CreateVectorSplat
LLVM_ABI Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition IRBuilder.cpp:1245

llvm::IRBuilderBase::CreateExtractValue
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2684

llvm::IRBuilderBase::CreateIntrinsic
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
Definition IRBuilder.cpp:936

llvm::IRBuilderBase::CreateSelect
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition IRBuilder.cpp:1112

llvm::IRBuilderBase::CreateFreeze
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2703

llvm::IRBuilderBase::getInt32Ty
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:586

llvm::IRBuilderBase::CreatePtrAdd
Value * CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition IRBuilder.h:2101

llvm::IRBuilderBase::CreateCast
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2286

llvm::IRBuilderBase::setFastMathFlags
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition IRBuilder.h:352

llvm::IRBuilderBase::CreateVectorReverse
LLVM_ABI Value * CreateVectorReverse(Value *V, const Twine &Name="")
Return a vector value that contains the vector V reversed.
Definition IRBuilder.cpp:1186

llvm::IRBuilderBase::CreateICmpNE
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2388

llvm::IRBuilderBase::CreateOrReduce
LLVM_ABI CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition IRBuilder.cpp:464

llvm::IRBuilderBase::CreateLogicalAnd
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition IRBuilder.h:1792

llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:529

llvm::IRBuilderBase::CreateCmp
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2518

llvm::IRBuilderBase::CreateNot
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1876

llvm::IRBuilderBase::CreateICmpEQ
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2384

llvm::IRBuilderBase::CreateCountTrailingZeroElems
Value * CreateCountTrailingZeroElems(Type *ResTy, Value *Mask, bool ZeroIsPoison=true, const Twine &Name="")
Create a call to llvm.experimental_cttz_elts.
Definition IRBuilder.h:1176

llvm::IRBuilderBase::CreateSub
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1461

llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2130

llvm::IRBuilderBase::CreateAdd
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1444

llvm::IRBuilderBase::getFalse
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:514

llvm::IRBuilderBase::CreateBinOp
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:1753

llvm::IRBuilderBase::CreateICmpUGE
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2396

llvm::IRBuilderBase::CreateLogicalOr
Value * CreateLogicalOr(Value *Cond1, Value *Cond2, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition IRBuilder.h:1800

llvm::IRBuilderBase::CreateICmp
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2494

llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1614

llvm::IRBuilderBase::CreateMul
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1478

llvm::IRBuilderBase::CreateUnaryIntrinsic
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Definition IRBuilder.cpp:914

llvm::InductionDescriptor::IK_IntInduction
@ IK_IntInduction
Integer induction variable. Step = C.
Definition IVDescriptors.h:380

llvm::InstructionCost
Definition InstructionCost.h:30

llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition InstructionCost.h:82

llvm::InstructionCost::isValid
bool isValid() const
Definition InstructionCost.h:88

llvm::Instruction
Definition Instruction.h:70

llvm::Instruction::isCast
bool isCast() const
Definition Instruction.h:353

llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition Instruction.h:349

llvm::Instruction::eraseFromParent
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition Instruction.cpp:112

llvm::Instruction::getOpcodeName
const char * getOpcodeName() const
Definition Instruction.h:346

llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition Instruction.h:344

llvm::Instruction::BinaryOps
BinaryOps
Definition Instruction.h:1056

llvm::Instruction::isUnaryOp
bool isUnaryOp() const
Definition Instruction.h:348

llvm::Instruction::CastOps
CastOps
Definition Instruction.h:1070

llvm::IntegerType::get
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350

llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition VectorUtils.h:515

llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition VectorUtils.h:531

llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition VectorUtils.h:584

llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition VectorUtils.h:530

llvm::InterleaveGroup::getInsertPos
InstTy * getInsertPos() const
Definition VectorUtils.h:609

llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition VectorUtils.cpp:1717

llvm::InterleaveGroup::getAlign
Align getAlign() const
Definition VectorUtils.h:532

llvm::IntrinsicCostAttributes
Definition TargetTransformInfo.h:178

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::Loop
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40

llvm::MemIntrinsicCostAttributes
Information for memory intrinsic cost model.
Definition TargetTransformInfo.h:128

llvm::Metadata
Root of the metadata hierarchy.
Definition Metadata.h:64

llvm::Metadata::print
LLVM_ABI void print(raw_ostream &OS, const Module *M=nullptr, bool IsForDebug=false) const
Print.
Definition AsmWriter.cpp:5426

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::PHINode
Definition Instructions.h:2661

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition Instructions.h:2795

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition Instructions.h:2695

llvm::PoisonValue::get
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition Constants.cpp:2026

llvm::PredicatedScalarEvolution
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
Definition ScalarEvolution.h:2621

llvm::PredicatedScalarEvolution::getSE
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
Definition ScalarEvolution.h:2671

llvm::RecurrenceDescriptor::getOpcode
static LLVM_ABI unsigned getOpcode(RecurKind Kind)
Returns the opcode corresponding to the RecurrenceKind.
Definition IVDescriptors.cpp:1228

llvm::RecurrenceDescriptor::getOpcode
unsigned getOpcode() const
Definition IVDescriptors.h:230

llvm::RecurrenceDescriptor::isAnyOfRecurrenceKind
static bool isAnyOfRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition IVDescriptors.h:281

llvm::RecurrenceDescriptor::isSubRecurrenceKind
static LLVM_ABI bool isSubRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is for a sub operation.
Definition IVDescriptors.cpp:95

llvm::RecurrenceDescriptor::isFindIVRecurrenceKind
static bool isFindIVRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is of the form select(cmp(),x,y) where one of (x,...
Definition IVDescriptors.h:287

llvm::RecurrenceDescriptor::isMinMaxRecurrenceKind
static bool isMinMaxRecurrenceKind(RecurKind Kind)
Returns true if the recurrence kind is any min/max kind.
Definition IVDescriptors.h:275

llvm::SCEV
This class represents an analyzed expression in the program.
Definition ScalarEvolution.h:254

llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition Instructions.h:1710

llvm::SlotTracker
This class provides computation of slot numbers for LLVM Assembly writing.
Definition AsmWriter.cpp:790

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition SmallPtrSet.h:387

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition SmallPtrSet.h:533

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition SmallVector.h:966

llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition SmallVector.h:691

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition SmallVector.h:1225

llvm::StringRef
Represent a constant reference to a string, i.e.
Definition StringRef.h:56

llvm::TargetTransformInfo::VectorInstrContext
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
Definition TargetTransformInfo.h:1064

llvm::TargetTransformInfo::VectorInstrContext::None
@ None
The insert/extract is not used with a load/store.
Definition TargetTransformInfo.h:1065

llvm::TargetTransformInfo::VectorInstrContext::Load
@ Load
The value being inserted comes from a load (InsertElement only).
Definition TargetTransformInfo.h:1066

llvm::TargetTransformInfo::VectorInstrContext::Store
@ Store
The extracted value is stored (ExtractElement only).
Definition TargetTransformInfo.h:1067

llvm::TargetTransformInfo::getPartialReductionExtendKind
static LLVM_ABI PartialReductionExtendKind getPartialReductionExtendKind(Instruction *I)
Get the kind of extension that an instruction represents.
Definition TargetTransformInfo.cpp:1053

llvm::TargetTransformInfo::getOperandInfo
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition TargetTransformInfo.cpp:930

llvm::TargetTransformInfo::PR_None
@ PR_None
Definition TargetTransformInfo.h:271

llvm::TargetTransformInfo::TCC_Free
@ TCC_Free
Expected to fold away in lowering.
Definition TargetTransformInfo.h:357

llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition TargetTransformInfo.h:1257

llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition TargetTransformInfo.h:1246

llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition TargetTransformInfo.h:1579

llvm::TargetTransformInfo::CastContextHint::Reversed
@ Reversed
The cast is used with a reversed load/store.
Definition TargetTransformInfo.h:1585

llvm::TargetTransformInfo::CastContextHint::Masked
@ Masked
The cast is used with a masked load/store.
Definition TargetTransformInfo.h:1582

llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
Definition TargetTransformInfo.h:1581

llvm::TargetTransformInfo::CastContextHint::Interleave
@ Interleave
The cast is used with an interleaved load/store.
Definition TargetTransformInfo.h:1584

llvm::TargetTransformInfo::CastContextHint::GatherScatter
@ GatherScatter
The cast is used with a gather/scatter.
Definition TargetTransformInfo.h:1583

llvm::TargetTransformInfo::OK_UniformValue
@ OK_UniformValue
Definition TargetTransformInfo.h:1266

llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition TargetTransformInfo.h:1265

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Type::isByteTy
bool isByteTy() const
True if this is an instance of ByteType.
Definition Type.h:242

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288

llvm::Type::getInt32Ty
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282

llvm::Type::getVoidTy
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:282

llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368

llvm::Type::isStructTy
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:130

llvm::Type::getScalarSizeInBits
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232

llvm::Type::getInt1Ty
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:306

llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257

llvm::Type::getIntNTy
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313

llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141

llvm::User::value_op_end
value_op_iterator value_op_end()
Definition User.h:288

llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition User.h:212

llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition User.h:207

llvm::User::value_op_begin
value_op_iterator value_op_begin()
Definition User.h:285

llvm::VPActiveLaneMaskPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the active lane mask phi of the vector loop.
Definition VPlanRecipes.cpp:4847

llvm::VPActiveLaneMaskPHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4858

llvm::VPBasicBlock
VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph.
Definition VPlan.h:4399

llvm::VPBasicBlock::getRecipeList
RecipeListTy & getRecipeList()
Returns a reference to the list of recipes.
Definition VPlan.h:4452

llvm::VPBasicBlock::end
iterator end()
Definition VPlan.h:4436

llvm::VPBasicBlock::insert
void insert(VPRecipeBase *Recipe, iterator InsertPt)
Definition VPlan.h:4465

llvm::VPBlendRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenMemoryRecipe.
Definition VPlanRecipes.cpp:3153

llvm::VPBlendRecipe::getIncomingValue
VPValue * getIncomingValue(unsigned Idx) const
Return incoming value number Idx.
Definition VPlan.h:3008

llvm::VPBlendRecipe::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming values, taking into account when normalized the first incoming value wi...
Definition VPlan.h:3003

llvm::VPBlendRecipe::usesFirstLaneOnly
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlanRecipes.cpp:4819

llvm::VPBlendRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3168

llvm::VPBlendRecipe::isNormalized
bool isNormalized() const
A normalized blend is one that has an odd number of operands, whereby the first operand does not have...
Definition VPlan.h:2999

llvm::VPBlockBase
VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
Definition VPlan.h:94

llvm::VPBlockBase::getPredecessors
const VPBlocksTy & getPredecessors() const
Definition VPlan.h:222

llvm::VPBlockBase::getPlan
VPlan * getPlan()
Definition VPlan.cpp:211

llvm::VPBlockBase::printAsOperand
void printAsOperand(raw_ostream &OS, bool PrintType=false) const
Definition VPlan.h:364

llvm::VPBranchOnMaskRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPBranchOnMaskRecipe.
Definition VPlanRecipes.cpp:3952

llvm::VPBranchOnMaskRecipe::execute
void execute(VPTransformState &State) override
Generate the extraction of the appropriate bit from the block mask and the conditional branch.
Definition VPlanRecipes.cpp:3948

llvm::VPBuilder
VPlan-based builder utility analogous to IRBuilder.
Definition LoopVectorizationPlanner.h:99

llvm::VPCurrentIterationPHIRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4869

llvm::VPDef::getNumDefinedValues
unsigned getNumDefinedValues() const
Returns the number of values defined by the VPDef.
Definition VPlanValue.h:561

llvm::VPDef::getVPSingleValue
VPValue * getVPSingleValue()
Returns the only VPValue defined by the VPDef.
Definition VPlanValue.h:534

llvm::VPDef::getVPValue
VPValue * getVPValue(unsigned I)
Returns the VPValue with index I defined by the VPDef.
Definition VPlanValue.h:546

llvm::VPDef::definedValues
ArrayRef< VPRecipeValue * > definedValues()
Returns an ArrayRef of the values defined by the VPDef.
Definition VPlanValue.h:556

llvm::VPDerivedIVRecipe::getInductionKind
InductionDescriptor::InductionKind getInductionKind() const
Definition VPlan.h:4220

llvm::VPDerivedIVRecipe::getIndex
VPValue * getIndex() const
Definition VPlan.h:4217

llvm::VPDerivedIVRecipe::getStartValue
VPIRValue * getStartValue() const
Definition VPlan.h:4216

llvm::VPDerivedIVRecipe::getStepValue
VPValue * getStepValue() const
Definition VPlan.h:4218

llvm::VPDerivedIVRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPDerivedIVRecipe.
Definition VPlanRecipes.cpp:2841

llvm::VPDerivedIVRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2922

llvm::VPExpandSCEVRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4711

llvm::VPExpandSCEVRecipe::VPExpandSCEVRecipe
VPExpandSCEVRecipe(const SCEV *Expr)
Definition VPlanRecipes.cpp:425

llvm::VPExpressionRecipe::isVectorToScalar
bool isVectorToScalar() const
Returns true if this VPExpressionRecipe produces a single scalar.
Definition VPlanRecipes.cpp:3493

llvm::VPExpressionRecipe::decompose
void decompose()
Insert the recipes of the expression back into the VPlan, directly before the current recipe.
Definition VPlanRecipes.cpp:3390

llvm::VPExpressionRecipe::mayHaveSideEffects
bool mayHaveSideEffects() const
Returns true if this expression contains recipes that may have side effects.
Definition VPlanRecipes.cpp:3485

llvm::VPExpressionRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
Definition VPlanRecipes.cpp:3404

llvm::VPExpressionRecipe::mayReadOrWriteMemory
bool mayReadOrWriteMemory() const
Returns true if this expression contains recipes that may read from or write to memory.
Definition VPlanRecipes.cpp:3479

llvm::VPExpressionRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3500

llvm::VPHeaderPHIRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this header phi recipe.
Definition VPlanRecipes.cpp:2813

llvm::VPHeaderPHIRecipe::getStartValue
VPValue * getStartValue()
Returns the start value of the phi, if one is set.
Definition VPlan.h:2473

llvm::VPHistogramRecipe::execute
void execute(VPTransformState &State) override
Produce a vectorized histogram operation.
Definition VPlanRecipes.cpp:2281

llvm::VPHistogramRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPHistogramRecipe.
Definition VPlanRecipes.cpp:2311

llvm::VPHistogramRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2344

llvm::VPHistogramRecipe::getMask
VPValue * getMask() const
Return the mask operand if one was provided, or a null pointer if all lanes should be executed uncond...
Definition VPlan.h:2186

llvm::VPIRFlags
Class to record and manage LLVM IR flags.
Definition VPlan.h:694

llvm::VPIRFlags::FMFs
FastMathFlagsTy FMFs
Definition VPlan.h:782

llvm::VPIRFlags::ReductionFlags
ReductionFlagsTy ReductionFlags
Definition VPlan.h:784

llvm::VPIRFlags::hasRequiredFlagsForOpcode
LLVM_ABI_FOR_TEST bool hasRequiredFlagsForOpcode(unsigned Opcode) const
Returns true if Opcode has its required flags set.
Definition VPlanRecipes.cpp:2458

llvm::VPIRFlags::flagsValidForOpcode
LLVM_ABI_FOR_TEST bool flagsValidForOpcode(unsigned Opcode) const
Returns true if the set flags are valid for Opcode.
Definition VPlanRecipes.cpp:2417

llvm::VPIRFlags::getDefaultFlags
static VPIRFlags getDefaultFlags(unsigned Opcode)
Returns default flags for Opcode for opcodes that support it, asserts otherwise.
Definition VPlanRecipes.cpp:2374

llvm::VPIRFlags::WrapFlags
WrapFlagsTy WrapFlags
Definition VPlan.h:776

llvm::VPIRFlags::printFlags
void printFlags(raw_ostream &O) const
Definition VPlanRecipes.cpp:2562

llvm::VPIRFlags::hasFastMathFlags
bool hasFastMathFlags() const
Returns true if the recipe has fast-math flags.
Definition VPlan.h:999

llvm::VPIRFlags::getFastMathFlags
LLVM_ABI_FOR_TEST FastMathFlags getFastMathFlags() const
Definition VPlanRecipes.cpp:390

llvm::VPIRFlags::isReductionOrdered
bool isReductionOrdered() const
Definition VPlan.h:1063

llvm::VPIRFlags::TruncFlags
TruncFlagsTy TruncFlags
Definition VPlan.h:777

llvm::VPIRFlags::getPredicate
CmpInst::Predicate getPredicate() const
Definition VPlan.h:971

llvm::VPIRFlags::ExactFlags
ExactFlagsTy ExactFlags
Definition VPlan.h:779

llvm::VPIRFlags::intersectFlags
void intersectFlags(const VPIRFlags &Other)
Only keep flags also present in Other.
Definition VPlanRecipes.cpp:340

llvm::VPIRFlags::VPIRFlags
VPIRFlags()
Definition VPlan.h:789

llvm::VPIRFlags::GEPFlagsStorage
uint8_t GEPFlagsStorage
Definition VPlan.h:780

llvm::VPIRFlags::getGEPNoWrapFlags
GEPNoWrapFlags getGEPNoWrapFlags() const
Definition VPlan.h:989

llvm::VPIRFlags::hasPredicate
bool hasPredicate() const
Returns true if the recipe has a comparison predicate.
Definition VPlan.h:994

llvm::VPIRFlags::DisjointFlags
DisjointFlagsTy DisjointFlags
Definition VPlan.h:778

llvm::VPIRFlags::FCmpFlags
FCmpFlagsTy FCmpFlags
Definition VPlan.h:783

llvm::VPIRFlags::NonNegFlags
NonNegFlagsTy NonNegFlags
Definition VPlan.h:781

llvm::VPIRFlags::isReductionInLoop
bool isReductionInLoop() const
Definition VPlan.h:1069

llvm::VPIRFlags::applyFlags
void applyFlags(Instruction &I) const
Apply the IR flags to I.
Definition VPlan.h:928

llvm::VPIRFlags::CmpPredStorage
uint8_t CmpPredStorage
Definition VPlan.h:775

llvm::VPIRFlags::getRecurKind
RecurKind getRecurKind() const
Definition VPlan.h:1057

llvm::VPIRInstruction::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlanRecipes.cpp:1891

llvm::VPIRInstruction::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPIRInstruction.
Definition VPlanRecipes.cpp:1899

llvm::VPIRInstruction::VPIRInstruction
VPIRInstruction(Instruction &I)
VPIRInstruction::create() should be used to create VPIRInstructions, as subclasses may need to be cre...
Definition VPlan.h:1720

llvm::VPIRInstruction::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1907

llvm::VPIRMetadata::intersect
void intersect(const VPIRMetadata &MD)
Intersect this VPIRMetadata object with MD, keeping only metadata nodes that are common to both.
Definition VPlanRecipes.cpp:1997

llvm::VPIRMetadata::VPIRMetadata
VPIRMetadata()=default

llvm::VPIRMetadata::print
void print(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print metadata with node IDs.
Definition VPlanRecipes.cpp:2011

llvm::VPIRMetadata::applyMetadata
void applyMetadata(Instruction &I) const
Add all metadata to I.
Definition VPlanRecipes.cpp:1992

llvm::VPInstructionWithType::getResultType
Type * getResultType() const
Definition VPlan.h:1588

llvm::VPInstructionWithType::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1824

llvm::VPInstructionWithType::execute
void execute(VPTransformState &State) override
Generate the instruction.
Definition VPlanRecipes.cpp:1792

llvm::VPInstruction
This is a concrete Recipe that models a single VPlan-level instruction.
Definition VPlan.h:1225

llvm::VPInstruction::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPInstruction.
Definition VPlanRecipes.cpp:1259

llvm::VPInstruction::VPInstruction
VPInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags={}, const VPIRMetadata &MD={}, DebugLoc DL=DebugLoc::getUnknown(), const Twine &Name="", Type *ResultTy=nullptr)
Definition VPlanRecipes.cpp:544

llvm::VPInstruction::doesGeneratePerAllLanes
bool doesGeneratePerAllLanes() const
Returns true if this VPInstruction generates scalar values for all lanes.
Definition VPlanRecipes.cpp:633

llvm::VPInstruction::BranchOnCond
@ BranchOnCond
Definition VPlan.h:1248

llvm::VPInstruction::ExtractLastActive
@ ExtractLastActive
Extracts the last active lane from a set of vectors.
Definition VPlan.h:1327

llvm::VPInstruction::PtrAdd
@ PtrAdd
Definition VPlan.h:1286

llvm::VPInstruction::Reverse
@ Reverse
Definition VPlan.h:1310

llvm::VPInstruction::ExtractLane
@ ExtractLane
Extracts a single lane (first operand) from a set of vector operands.
Definition VPlan.h:1318

llvm::VPInstruction::ExitingIVValue
@ ExitingIVValue
Compute the exiting value of a wide induction after vectorization, that is the value of the last lane...
Definition VPlan.h:1331

llvm::VPInstruction::Broadcast
@ Broadcast
Definition VPlan.h:1256

llvm::VPInstruction::LastActiveLane
@ LastActiveLane
Definition VPlan.h:1308

llvm::VPInstruction::BranchOnCount
@ BranchOnCount
Definition VPlan.h:1247

llvm::VPInstruction::BranchOnTwoConds
@ BranchOnTwoConds
Definition VPlan.h:1255

llvm::VPInstruction::WideIVStep
@ WideIVStep
Scale the first operand (vector step) by the second operand (scalar-step).
Definition VPlan.h:1343

llvm::VPInstruction::ExtractLastPart
@ ExtractLastPart
Definition VPlan.h:1273

llvm::VPInstruction::LogicalOr
@ LogicalOr
Definition VPlan.h:1281

llvm::VPInstruction::ExtractPenultimateElement
@ ExtractPenultimateElement
Definition VPlan.h:1279

llvm::VPInstruction::ResumeForEpilogue
@ ResumeForEpilogue
Explicit user for the resume phi of the canonical induction in the main VPlan, used by the epilogue v...
Definition VPlan.h:1321

llvm::VPInstruction::Unpack
@ Unpack
Extracts all lanes from its (non-scalable) vector operand.
Definition VPlan.h:1268

llvm::VPInstruction::ActiveLaneMask
@ ActiveLaneMask
Definition VPlan.h:1237

llvm::VPInstruction::FirstActiveLane
@ FirstActiveLane
Definition VPlan.h:1301

llvm::VPInstruction::FirstOrderRecurrenceSplice
@ FirstOrderRecurrenceSplice
Definition VPlan.h:1229

llvm::VPInstruction::ExplicitVectorLength
@ ExplicitVectorLength
Definition VPlan.h:1238

llvm::VPInstruction::ReductionStartVector
@ ReductionStartVector
Start vector for reductions with 3 operands: the original start value, the identity value for the red...
Definition VPlan.h:1314

llvm::VPInstruction::BuildVector
@ BuildVector
Creates a fixed-width vector containing all operands.
Definition VPlan.h:1263

llvm::VPInstruction::WidePtrAdd
@ WidePtrAdd
Definition VPlan.h:1289

llvm::VPInstruction::IncomingAliasMask
@ IncomingAliasMask
Definition VPlan.h:1241

llvm::VPInstruction::LogicalAnd
@ LogicalAnd
Definition VPlan.h:1280

llvm::VPInstruction::BuildStructVector
@ BuildStructVector
Given operands of (the same) struct type, creates a struct of fixed- width vectors each containing a ...
Definition VPlan.h:1260

llvm::VPInstruction::VScale
@ VScale
Returns the value for vscale.
Definition VPlan.h:1347

llvm::VPInstruction::CanonicalIVIncrementForPart
@ CanonicalIVIncrementForPart
Definition VPlan.h:1244

llvm::VPInstruction::ComputeReductionResult
@ ComputeReductionResult
Reduce the operands to the final reduction result using the operation specified via the operation's V...
Definition VPlan.h:1271

llvm::VPInstruction::Not
@ Not
Definition VPlan.h:1232

llvm::VPInstruction::CalculateTripCountMinusVF
@ CalculateTripCountMinusVF
Definition VPlan.h:1242

llvm::VPInstruction::StepVector
@ StepVector
Definition VPlan.h:1345

llvm::VPInstruction::MaskedCond
@ MaskedCond
Definition VPlan.h:1332

llvm::VPInstruction::ExtractLastLane
@ ExtractLastLane
Definition VPlan.h:1275

llvm::VPInstruction::AnyOf
@ AnyOf
Definition VPlan.h:1295

llvm::VPInstruction::NumActiveLanes
@ NumActiveLanes
Definition VPlan.h:1282

llvm::VPInstruction::hasResult
bool hasResult() const
Definition VPlan.h:1437

llvm::VPInstruction::opcodeMayReadOrWriteFromMemory
bool opcodeMayReadOrWriteFromMemory() const
Returns true if the underlying opcode may read from or write to memory.
Definition VPlanRecipes.cpp:1539

llvm::VPInstruction::dump
LLVM_DUMP_METHOD void dump() const
Print the VPInstruction to dbgs() (for debugging).
Definition VPlanRecipes.cpp:1669

llvm::VPInstruction::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the VPInstruction to O.
Definition VPlanRecipes.cpp:1674

llvm::VPInstruction::getName
StringRef getName() const
Returns the symbolic name assigned to the VPInstruction.
Definition VPlan.h:1519

llvm::VPInstruction::getOpcode
unsigned getOpcode() const
Definition VPlan.h:1416

llvm::VPInstruction::usesFirstLaneOnly
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlanRecipes.cpp:1596

llvm::VPInstruction::addOperand
void addOperand(VPValue *Op)
Add Op as operand of this VPInstruction.
Definition VPlanRecipes.cpp:1467

llvm::VPInstruction::isVectorToScalar
bool isVectorToScalar() const
Returns true if this VPInstruction produces a scalar value from a vector, e.g.
Definition VPlanRecipes.cpp:1441

llvm::VPInstruction::isSingleScalar
bool isSingleScalar() const
Returns true if this VPInstruction's operands are single scalars and the result is also a single scal...
Definition VPlanRecipes.cpp:1454

llvm::VPInstruction::getNumOperandsForOpcode
unsigned getNumOperandsForOpcode() const
Return the number of operands determined by the opcode of the VPInstruction, excluding mask.
Definition VPlanRecipes.cpp:563

llvm::VPInstruction::isMasked
bool isMasked() const
Returns true if the VPInstruction has a mask operand.
Definition VPlan.h:1462

llvm::VPInstruction::execute
void execute(VPTransformState &State) override
Generate the instruction.
Definition VPlanRecipes.cpp:1508

llvm::VPInstruction::usesFirstPartOnly
bool usesFirstPartOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first part of operand Op.
Definition VPlanRecipes.cpp:1647

llvm::VPInterleaveBase::needsMaskForGaps
bool needsMaskForGaps() const
Return true if the access needs a mask because of the gaps.
Definition VPlan.h:3113

llvm::VPInterleaveBase::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this recipe.
Definition VPlanRecipes.cpp:4643

llvm::VPInterleaveBase::getInsertPos
Instruction * getInsertPos() const
Definition VPlan.h:3117

llvm::VPInterleaveBase::getInterleaveGroup
const InterleaveGroup< Instruction > * getInterleaveGroup() const
Definition VPlan.h:3115

llvm::VPInterleaveBase::getMask
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3107

llvm::VPInterleaveBase::getStoredValues
ArrayRef< VPValue * > getStoredValues() const
Return the VPValues stored by this interleave group.
Definition VPlan.h:3136

llvm::VPInterleaveBase::getAddr
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3101

llvm::VPInterleaveEVLRecipe::getEVL
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3210

llvm::VPInterleaveEVLRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4613

llvm::VPInterleaveEVLRecipe::getNumStoreOperands
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3223

llvm::VPInterleaveEVLRecipe::execute
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
Definition VPlanRecipes.cpp:4502

llvm::VPInterleaveRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4473

llvm::VPInterleaveRecipe::getNumStoreOperands
unsigned getNumStoreOperands() const override
Returns the number of stored operands of this interleave group.
Definition VPlan.h:3173

llvm::VPInterleaveRecipe::execute
void execute(VPTransformState &State) override
Generate the wide load or store, and shuffles.
Definition VPlanRecipes.cpp:4304

llvm::VPLane
In what follows, the term "input IR" refers to code that is fed into the vectorizer whereas the term ...
Definition VPlanHelpers.h:111

llvm::VPLane::getLastLaneForVF
static VPLane getLastLaneForVF(const ElementCount &VF)
Definition VPlanHelpers.h:152

llvm::VPLane::getLaneFromEnd
static VPLane getLaneFromEnd(const ElementCount &VF, unsigned Offset)
Definition VPlanHelpers.h:138

llvm::VPLane::getFirstLane
static VPLane getFirstLane()
Definition VPlanHelpers.h:136

llvm::VPPhiAccessors::getAsRecipe
virtual const VPRecipeBase * getAsRecipe() const =0
Return a VPRecipeBase* to the current object.

llvm::VPPhiAccessors::getIncomingValueForBlock
VPValue * getIncomingValueForBlock(const VPBasicBlock *VPBB) const
Returns the incoming value for VPBB. VPBB must be an incoming block.
Definition VPlanRecipes.cpp:1949

llvm::VPPhiAccessors::getNumIncoming
virtual unsigned getNumIncoming() const
Returns the number of incoming values, also number of incoming blocks.
Definition VPlan.h:1624

llvm::VPPhiAccessors::removeIncomingValueFor
void removeIncomingValueFor(VPBlockBase *IncomingBlock) const
Removes the incoming value for IncomingBlock, which must be a predecessor.
Definition VPlanRecipes.cpp:1940

llvm::VPPhiAccessors::getIncomingBlock
const VPBasicBlock * getIncomingBlock(unsigned Idx) const
Returns the incoming block with index Idx.
Definition VPlan.h:4543

llvm::VPPhiAccessors::incoming_values_and_blocks
detail::zippy< llvm::detail::zip_first, VPUser::const_operand_range, const_incoming_blocks_range > incoming_values_and_blocks() const
Returns an iterator range over pairs of incoming values and corresponding incoming blocks.
Definition VPlan.h:1649

llvm::VPPhiAccessors::getIncomingValue
VPValue * getIncomingValue(unsigned Idx) const
Returns the incoming VPValue with index Idx.
Definition VPlan.h:1609

llvm::VPPhiAccessors::printPhiOperands
void printPhiOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the recipe.
Definition VPlanRecipes.cpp:1961

llvm::VPPhiAccessors::setIncomingValueForBlock
void setIncomingValueForBlock(const VPBasicBlock *VPBB, VPValue *V) const
Sets the incoming value for VPBB to V.
Definition VPlanRecipes.cpp:1954

llvm::VPPredInstPHIRecipe::execute
void execute(VPTransformState &State) override
Generates phi nodes for live-outs (from a replicate region) as needed to retain SSA form.
Definition VPlanRecipes.cpp:3960

llvm::VPPredInstPHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3965

llvm::VPRecipeBase
VPRecipeBase is a base class modeling a sequence of one or more output IR instructions.
Definition VPlan.h:402

llvm::VPRecipeBase::mayReadFromMemory
bool mayReadFromMemory() const
Returns true if the recipe may read from memory.
Definition VPlanRecipes.cpp:111

llvm::VPRecipeBase::mayHaveSideEffects
bool mayHaveSideEffects() const
Returns true if the recipe may have side-effects.
Definition VPlanRecipes.cpp:164

llvm::VPRecipeBase::printRecipe
virtual void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const =0
Each concrete VPRecipe prints itself, without printing common information, like debug info or metadat...

llvm::VPRecipeBase::getRegion
VPRegionBlock * getRegion()
Definition VPlan.h:4744

llvm::VPRecipeBase::dump
LLVM_ABI_FOR_TEST void dump() const
Dump the recipe to stderr (for debugging).
Definition VPlan.cpp:117

llvm::VPRecipeBase::isPhi
bool isPhi() const
Returns true for PHI-like recipes.
Definition VPlanRecipes.cpp:335

llvm::VPRecipeBase::mayWriteToMemory
bool mayWriteToMemory() const
Returns true if the recipe may write to memory.
Definition VPlanRecipes.cpp:50

llvm::VPRecipeBase::computeCost
virtual InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Compute the cost of this recipe either using a recipe's specialized implementation or using the legac...
Definition VPlanRecipes.cpp:330

llvm::VPRecipeBase::getParent
VPBasicBlock * getParent()
Definition VPlan.h:477

llvm::VPRecipeBase::getDebugLoc
DebugLoc getDebugLoc() const
Returns the debug location of the recipe.
Definition VPlan.h:555

llvm::VPRecipeBase::moveBefore
void moveBefore(VPBasicBlock &BB, iplist< VPRecipeBase >::iterator I)
Unlink this recipe and insert into BB before I.
Definition VPlanRecipes.cpp:290

llvm::VPRecipeBase::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute() const
Return true if we can safely execute this recipe unconditionally even if it is masked originally.
Definition VPlanRecipes.cpp:231

llvm::VPRecipeBase::insertBefore
void insertBefore(VPRecipeBase *InsertPos)
Insert an unlinked recipe into a basic block immediately before the specified recipe.
Definition VPlanRecipes.cpp:253

llvm::VPRecipeBase::insertAfter
void insertAfter(VPRecipeBase *InsertPos)
Insert an unlinked Recipe into a basic block immediately after the specified Recipe.
Definition VPlanRecipes.cpp:267

llvm::VPRecipeBase::eraseFromParent
iplist< VPRecipeBase >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition VPlanRecipes.cpp:280

llvm::VPRecipeBase::cost
InstructionCost cost(ElementCount VF, VPCostContext &Ctx)
Return the cost of this recipe, taking into account if the cost computation should be skipped and the...
Definition VPlanRecipes.cpp:296

llvm::VPRecipeBase::print
void print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const
Print the recipe, delegating to printRecipe().
Definition VPlanRecipes.cpp:412

llvm::VPRecipeBase::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition VPlanRecipes.cpp:274

llvm::VPRecipeBase::getVPRecipeID
unsigned getVPRecipeID() const
Definition VPlan.h:523

llvm::VPRecipeBase::moveAfter
void moveAfter(VPRecipeBase *MovePos)
Unlink this recipe from its current VPBasicBlock and insert it into the VPBasicBlock that MovePos liv...
Definition VPlanRecipes.cpp:285

llvm::VPRecipeBase::VPRecipeBase
VPRecipeBase(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:467

llvm::VPRecipeValue::getScalarType
Type * getScalarType() const
Returns the scalar type of this VPRecipeValue.
Definition VPlanValue.h:337

llvm::VPRecipeValue::VPValue
friend class VPValue
Definition VPlanValue.h:316

llvm::VPReductionEVLRecipe::execute
void execute(VPTransformState &State) override
Generate the reduction in the loop.
Definition VPlanRecipes.cpp:3252

llvm::VPReductionEVLRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3616

llvm::VPReductionEVLRecipe::getEVL
VPValue * getEVL() const
The VPValue of the explicit vector length.
Definition VPlan.h:3382

llvm::VPReductionPHIRecipe::getVFScaleFactor
unsigned getVFScaleFactor() const
Get the factor that the VF of this recipe's output should be scaled by, or 1 if it isn't scaled.
Definition VPlan.h:2909

llvm::VPReductionPHIRecipe::isInLoop
bool isInLoop() const
Returns true if the phi is part of an in-loop reduction.
Definition VPlan.h:2933

llvm::VPReductionPHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4804

llvm::VPReductionPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition VPlanRecipes.cpp:4778

llvm::VPReductionRecipe::isConditional
bool isConditional() const
Return true if the in-loop reduction is conditional.
Definition VPlan.h:3324

llvm::VPReductionRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of VPReductionRecipe.
Definition VPlanRecipes.cpp:3285

llvm::VPReductionRecipe::getVecOp
VPValue * getVecOp() const
The VPValue of the vector value to be reduced.
Definition VPlan.h:3335

llvm::VPReductionRecipe::getCondOp
VPValue * getCondOp() const
The VPValue of the condition for the block.
Definition VPlan.h:3337

llvm::VPReductionRecipe::getRecurrenceKind
RecurKind getRecurrenceKind() const
Return the recurrence kind for the in-loop reduction.
Definition VPlan.h:3320

llvm::VPReductionRecipe::isPartialReduction
bool isPartialReduction() const
Returns true if the reduction outputs a vector with a scaled down VF.
Definition VPlan.h:3326

llvm::VPReductionRecipe::getChainOp
VPValue * getChainOp() const
The VPValue of the scalar Chain being accumulated.
Definition VPlan.h:3333

llvm::VPReductionRecipe::isInLoop
bool isInLoop() const
Returns true if the reduction is in-loop.
Definition VPlan.h:3328

llvm::VPReductionRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3594

llvm::VPReductionRecipe::execute
void execute(VPTransformState &State) override
Generate the reduction in the loop.
Definition VPlanRecipes.cpp:3192

llvm::VPRegionBlock
VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks which form a Single-Entry-S...
Definition VPlan.h:4609

llvm::VPRegionBlock::isReplicator
bool isReplicator() const
An indicator whether this region is to generate multiple replicated instances of output IR correspond...
Definition VPlan.h:4685

llvm::VPReplicateRecipe::execute
void execute(VPTransformState &State) override
Generate replicas of the desired Ingredient.
Definition VPlanRecipes.cpp:3640

llvm::VPReplicateRecipe::isSingleScalar
bool isSingleScalar() const
Definition VPlan.h:3460

llvm::VPReplicateRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPReplicateRecipe.
Definition VPlanRecipes.cpp:3691

llvm::VPReplicateRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3914

llvm::VPReplicateRecipe::computeScalarType
static Type * computeScalarType(const Instruction *I, ArrayRef< VPValue * > Operands)
Compute the scalar result type for a VPReplicateRecipe wrapping I with Operands (excluding any predic...
Definition VPlanRecipes.cpp:533

llvm::VPReplicateRecipe::computeCallCost
static InstructionCost computeCallCost(Function *CalledFn, Type *ResultTy, ArrayRef< const VPValue * > ArgOps, bool IsSingleScalar, ElementCount VF, VPCostContext &Ctx)
Return the cost of scalarizing a call to CalledFn with argument operands ArgOps for a given VF.
Definition VPlanRecipes.cpp:3879

llvm::VPReplicateRecipe::getOpcode
unsigned getOpcode() const
Definition VPlan.h:3484

llvm::VPScalarIVStepsRecipe::getStepValue
VPValue * getStepValue() const
Definition VPlan.h:4288

llvm::VPScalarIVStepsRecipe::getStartIndex
VPValue * getStartIndex() const
Return the StartIndex, or null if known to be zero, valid only after unrolling.
Definition VPlan.h:4296

llvm::VPScalarIVStepsRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2990

llvm::VPScalarIVStepsRecipe::execute
void execute(VPTransformState &State) override
Generate the scalarized versions of the phi node as needed by their users.
Definition VPlanRecipes.cpp:2935

llvm::VPSingleDefRecipe
VPSingleDefRecipe is a base class for recipes that model a sequence of one or more output IR that def...
Definition VPlan.h:608

llvm::VPSingleDefRecipe::getUnderlyingInstr
Instruction * getUnderlyingInstr()
Returns the underlying instruction.
Definition VPlan.h:679

llvm::VPSingleDefRecipe::dump
LLVM_ABI_FOR_TEST LLVM_DUMP_METHOD void dump() const
Print this VPSingleDefRecipe to dbgs() (for debugging).
Definition VPlanRecipes.cpp:410

llvm::VPSingleDefRecipe::VPSingleDefRecipe
VPSingleDefRecipe(const unsigned char SC, ArrayRef< VPValue * > Operands, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:610

llvm::VPSlotTracker
This class can be used to assign names to VPValues.
Definition VPlanHelpers.h:393

llvm::VPUser
This class augments VPValue with operands which provide the inverse def-use edges from VPValue's user...
Definition VPlanValue.h:384

llvm::VPUser::printOperands
void printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const
Print the operands to O.
Definition VPlan.cpp:1527

llvm::VPUser::operands
operand_range operands()
Definition VPlanValue.h:457

llvm::VPUser::getNumOperands
unsigned getNumOperands() const
Definition VPlanValue.h:424

llvm::VPUser::op_end
operand_iterator op_end()
Definition VPlanValue.h:455

llvm::VPUser::op_begin
operand_iterator op_begin()
Definition VPlanValue.h:453

llvm::VPUser::getOperand
VPValue * getOperand(unsigned N) const
Definition VPlanValue.h:425

llvm::VPUser::addOperand
void addOperand(VPValue *Operand)
Definition VPlanValue.h:410

llvm::VPValue
This is the base class of the VPlan Def/Use graph, used for modeling the data flow into,...
Definition VPlanValue.h:50

llvm::VPValue::getScalarType
Type * getScalarType() const
Returns the scalar type of this VPValue, dispatching based on the concrete subclass.
Definition VPlan.cpp:149

llvm::VPValue::getLiveInIRValue
Value * getLiveInIRValue() const
Return the underlying IR value for a VPIRValue.
Definition VPlan.cpp:143

llvm::VPValue::isDefinedOutsideLoopRegions
bool isDefinedOutsideLoopRegions() const
Returns true if the VPValue is defined outside any loop.
Definition VPlan.cpp:1478

llvm::VPValue::getDefiningRecipe
VPRecipeBase * getDefiningRecipe()
Returns the recipe defining this VPValue or nullptr if it is not defined by a recipe,...
Definition VPlan.cpp:130

llvm::VPValue::printAsOperand
void printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const
Definition VPlan.cpp:1523

llvm::VPValue::getUnderlyingValue
Value * getUnderlyingValue() const
Return the underlying Value attached to this VPValue.
Definition VPlanValue.h:75

llvm::VPValue::setUnderlyingValue
void setUnderlyingValue(Value *Val)
Definition VPlanValue.h:208

llvm::VPValue::getSingleUser
VPUser * getSingleUser()
Return the single user of this value, or nullptr if there is not exactly one user.
Definition VPlanValue.h:178

llvm::VPVectorEndPointerRecipe::getVFValue
VPValue * getVFValue() const
Definition VPlan.h:2288

llvm::VPVectorEndPointerRecipe::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlanRecipes.cpp:3103

llvm::VPVectorEndPointerRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3114

llvm::VPVectorEndPointerRecipe::getSourceElementType
Type * getSourceElementType() const
Definition VPlan.h:2285

llvm::VPVectorEndPointerRecipe::getStride
int64_t getStride() const
Definition VPlan.h:2286

llvm::VPVectorEndPointerRecipe::materializeOffset
void materializeOffset(unsigned Part=0)
Adds the offset operand to the recipe.
Definition VPlanRecipes.cpp:3074

llvm::VPVectorPointerRecipe::getStride
VPValue * getStride() const
Definition VPlan.h:2362

llvm::VPVectorPointerRecipe::getSourceElementType
Type * getSourceElementType() const
Definition VPlan.h:2377

llvm::VPVectorPointerRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3143

llvm::VPVectorPointerRecipe::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlanRecipes.cpp:3124

llvm::VPVectorPointerRecipe::getVFxPart
VPValue * getVFxPart() const
Definition VPlan.h:2364

llvm::VPWidenCallRecipe::usesFirstLaneOnly
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlanRecipes.cpp:2076

llvm::VPWidenCallRecipe::args
operand_range args()
Definition VPlan.h:2137

llvm::VPWidenCallRecipe::getCalledScalarFunction
Function * getCalledScalarFunction() const
Definition VPlan.h:2133

llvm::VPWidenCallRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCallRecipe.
Definition VPlanRecipes.cpp:2062

llvm::VPWidenCallRecipe::execute
void execute(VPTransformState &State) override
Produce a widened version of the call instruction.
Definition VPlanRecipes.cpp:2029

llvm::VPWidenCallRecipe::computeCallCost
static InstructionCost computeCallCost(Function *Variant, VPCostContext &Ctx)
Return the cost of widening a call using the vector function Variant.
Definition VPlanRecipes.cpp:2069

llvm::VPWidenCallRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2089

llvm::VPWidenCanonicalIVRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4720

llvm::VPWidenCastRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2802

llvm::VPWidenCastRecipe::execute
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce widened copies of the cast.
Definition VPlanRecipes.cpp:2781

llvm::VPWidenCastRecipe::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenCastRecipe.
Definition VPlanRecipes.cpp:2796

llvm::VPWidenGEPRecipe::execute
void execute(VPTransformState &State) override
Generate the gep nodes.
Definition VPlanRecipes.cpp:3004

llvm::VPWidenGEPRecipe::getSourceElementType
Type * getSourceElementType() const
Definition VPlan.h:2242

llvm::VPWidenGEPRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:3059

llvm::VPWidenGEPRecipe::usesFirstLaneOnly
bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the recipe only uses the first lane of operand Op.
Definition VPlanRecipes.cpp:2999

llvm::VPWidenInductionRecipe::getStartValue
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2565

llvm::VPWidenInductionRecipe::getStepValue
VPValue * getStepValue()
Returns the step value of the induction.
Definition VPlan.h:2568

llvm::VPWidenIntOrFpInductionRecipe::getStartValue
VPIRValue * getStartValue() const
Returns the start value of the induction.
Definition VPlan.h:2671

llvm::VPWidenIntOrFpInductionRecipe::getTruncInst
TruncInst * getTruncInst()
Returns the first defined value as TruncInst, if it is one or nullptr otherwise.
Definition VPlan.h:2686

llvm::VPWidenIntOrFpInductionRecipe::isCanonical
bool isCanonical() const
Returns true if the induction is canonical, i.e.
Definition VPlanRecipes.cpp:2832

llvm::VPWidenIntOrFpInductionRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2819

llvm::VPWidenIntrinsicRecipe::createVectorCall
CallInst * createVectorCall(VPTransformState &State)
Helper function to produce the widened intrinsic call.
Definition VPlanRecipes.cpp:2116

llvm::VPWidenIntrinsicRecipe::getVectorIntrinsicID
Intrinsic::ID getVectorIntrinsicID() const
Return the ID of the intrinsic.
Definition VPlan.h:2022

llvm::VPWidenIntrinsicRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2236

llvm::VPWidenIntrinsicRecipe::getIntrinsicName
StringRef getIntrinsicName() const
Return to name of the intrinsic as string.
Definition VPlanRecipes.cpp:2222

llvm::VPWidenIntrinsicRecipe::computeCallCost
static InstructionCost computeCallCost(Intrinsic::ID ID, ArrayRef< const VPValue * > Operands, const VPRecipeWithIRFlags &R, ElementCount VF, VPCostContext &Ctx)
Compute the cost of a vector intrinsic with ID and Operands.
Definition VPlanRecipes.cpp:2173

llvm::VPWidenIntrinsicRecipe::usesFirstLaneOnly
LLVM_ABI_FOR_TEST bool usesFirstLaneOnly(const VPValue *Op) const override
Returns true if the VPUser only uses the first lane of operand Op.
Definition VPlanRecipes.cpp:2226

llvm::VPWidenIntrinsicRecipe::execute
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Produce a widened version of the vector intrinsic.
Definition VPlanRecipes.cpp:2167

llvm::VPWidenIntrinsicRecipe::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector intrinsic.
Definition VPlanRecipes.cpp:2216

llvm::VPWidenMemIntrinsicRecipe::computeMemIntrinsicCost
static InstructionCost computeMemIntrinsicCost(Intrinsic::ID IID, Type *Ty, bool IsMasked, Align Alignment, VPCostContext &Ctx)
Helper function for computing the cost of vector memory intrinsic.
Definition VPlanRecipes.cpp:2264

llvm::VPWidenMemIntrinsicRecipe::execute
void execute(VPTransformState &State) override
Produce a widened version of the vector memory intrinsic.
Definition VPlanRecipes.cpp:2257

llvm::VPWidenMemIntrinsicRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this vector memory intrinsic.
Definition VPlanRecipes.cpp:2273

llvm::VPWidenMemoryRecipe::IsMasked
bool IsMasked
Whether the memory access is masked.
Definition VPlan.h:3751

llvm::VPWidenMemoryRecipe::isConsecutive
bool isConsecutive() const
Return whether the loaded-from / stored-to addresses are consecutive.
Definition VPlan.h:3776

llvm::VPWidenMemoryRecipe::Ingredient
Instruction & Ingredient
Definition VPlan.h:3742

llvm::VPWidenMemoryRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const
Return the cost of this VPWidenMemoryRecipe.
Definition VPlanRecipes.cpp:3974

llvm::VPWidenMemoryRecipe::Consecutive
bool Consecutive
Whether the accessed addresses are consecutive.
Definition VPlan.h:3748

llvm::VPWidenMemoryRecipe::getMask
VPValue * getMask() const
Return the mask used by this recipe.
Definition VPlan.h:3786

llvm::VPWidenMemoryRecipe::Alignment
Align Alignment
Alignment information for this memory access.
Definition VPlan.h:3745

llvm::VPWidenMemoryRecipe::getAsRecipe
virtual VPRecipeBase * getAsRecipe()=0
Return a VPRecipeBase* to the current object.

llvm::VPWidenMemoryRecipe::getAddr
VPValue * getAddr() const
Return the address accessed by this recipe.
Definition VPlan.h:3779

llvm::VPWidenPHIRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenPHIRecipe.
Definition VPlanRecipes.cpp:4831

llvm::VPWidenPHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4837

llvm::VPWidenPHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi/select nodes.
Definition VPlanRecipes.cpp:4824

llvm::VPWidenPointerInductionRecipe::onlyScalarsGenerated
bool onlyScalarsGenerated(bool IsScalable)
Returns true if only scalar values will be generated.
Definition VPlanRecipes.cpp:4685

llvm::VPWidenPointerInductionRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4691

llvm::VPWidenRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenRecipe.
Definition VPlanRecipes.cpp:2733

llvm::VPWidenRecipe::execute
void execute(VPTransformState &State) override
Produce a widened instruction using the opcode and operands of the recipe, processing State....
Definition VPlanRecipes.cpp:2626

llvm::VPWidenRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:2771

llvm::VPlan
VPlan models a candidate for vectorization, encoding various decisions take to produce efficient outp...
Definition VPlan.h:4757

llvm::VPlan::getDataLayout
const DataLayout & getDataLayout() const
Definition VPlan.h:4962

llvm::VPlan::getTripCount
VPValue * getTripCount() const
The trip count of the original loop.
Definition VPlan.h:4916

llvm::VPlan::getConstantInt
VPIRValue * getConstantInt(Type *Ty, uint64_t Val, bool IsSigned=false)
Return a VPIRValue wrapping a ConstantInt with the given type and value.
Definition VPlan.h:5064

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255

llvm::Value::setName
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258

llvm::Value::mutateType
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:806

llvm::Value::getName
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318

llvm::VectorType
Base class of all SIMD vector types.
Definition DerivedTypes.h:490

llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition DerivedTypes.h:753

llvm::VectorType::get
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.

llvm::VectorType::getElementType
Type * getElementType() const
Definition DerivedTypes.h:523

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200

llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168

llvm::details::FixedOrScalableQuantity::multiplyCoefficientBy
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition TypeSize.h:256

llvm::details::FixedOrScalableQuantity::getKnownMinValue
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165

llvm::details::FixedOrScalableQuantity::divideCoefficientBy
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition ilist_node.h:34

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

llvm::iplist_impl< simple_ilist< T, Options... >, ilist_traits< T > >::iterator
typename base_list_type::iterator iterator
Definition ilist.h:121

llvm::iplist_impl::erase
iterator erase(iterator where)
Definition ilist.h:204

llvm::iplist_impl::remove
pointer remove(iterator &IT)
Definition ilist.h:188

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34

llvm::ISD::BasicBlock
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81

llvm::Intrinsic::getDeinterleaveIntrinsicID
LLVM_ABI Intrinsic::ID getDeinterleaveIntrinsicID(unsigned Factor)
Returns the corresponding llvm.vector.deinterleaveN intrinsic for factor N.
Definition Intrinsics.cpp:1393

llvm::Intrinsic::getOrInsertDeclaration
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Definition Intrinsics.cpp:780

llvm::Intrinsic::ID
unsigned ID
Definition GenericSSAContext.h:28

llvm::Intrinsic::getBaseName
LLVM_ABI StringRef getBaseName(ID id)
Return the LLVM name for an intrinsic, without encoded types for overloading, such as "llvm....
Definition Intrinsics.cpp:53

llvm::MIPatternMatch::m_ZeroInt
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
Definition MIPatternMatch.h:278

llvm::PatternMatchHelpers::m_CombineOr
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
Definition PatternMatchHelpers.h:56

llvm::PatternMatch::m_Cmp
auto m_Cmp()
Matches any compare instruction and ignore it.
Definition PatternMatch.h:144

llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition PatternMatch.h:53

llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition PatternMatch.h:562

llvm::PatternMatch::m_Intrinsic
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
Definition PatternMatch.h:2848

llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition PatternMatch.h:1900

llvm::PatternMatch::m_c_LogicalAnd
LogicalOp_match< LHS, RHS, Instruction::And, true > m_c_LogicalAnd(const LHS &L, const RHS &R)
Matches L && R with LHS and RHS in either order.
Definition PatternMatch.h:3362

llvm::PatternMatch::m_c_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or, true > m_c_LogicalOr(const LHS &L, const RHS &R)
Matches L || R with LHS and RHS in either order.
Definition PatternMatch.h:3380

llvm::RISCVFenceField::R
@ R
Definition RISCVBaseInfo.h:490

llvm::SI
Definition SIInstrInfo.h:1926

llvm::VPlanPatternMatch
Definition VPlanPatternMatch.h:24

llvm::VPlanPatternMatch::m_False
specific_intval< 1 > m_False()
Definition VPlanPatternMatch.h:120

llvm::VPlanPatternMatch::m_True
specific_intval< 1 > m_True()
Definition VPlanPatternMatch.h:124

llvm::VPlanPatternMatch::m_VPValue
auto m_VPValue()
Match an arbitrary VPValue and ignore it.
Definition VPlanPatternMatch.h:51

llvm::VPlanPatternMatch::m_BranchOnCond
VPInstruction_match< VPInstruction::BranchOnCond > m_BranchOnCond()
Definition VPlanPatternMatch.h:356

llvm::VPlanPatternMatch::m_Reverse
VPInstruction_match< VPInstruction::Reverse, Op0_t > m_Reverse(const Op0_t &Op0)
Definition VPlanPatternMatch.h:497

llvm::codeview::CompileSym2Flags::EC
@ EC
Definition CodeView.h:432

llvm::logicalview::LVAttributeKind::Zero
@ Zero
Definition LVOptions.h:130

llvm::ms_demangle::QualifierMangleMode::Result
@ Result
Definition MicrosoftDemangle.h:132

llvm::rdf::Def
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384

llvm::sandboxir::Instruction
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73

llvm::vputils::isSingleScalar
bool isSingleScalar(const VPValue *VPV)
Returns true if VPV is a single scalar, either because it produces the same value for all lanes or on...
Definition VPlanUtils.cpp:404

llvm::vputils::isAddressSCEVForCost
bool isAddressSCEVForCost(const SCEV *Addr, ScalarEvolution &SE, const Loop *L)
Returns true if Addr is an address SCEV that can be passed to TTI::getAddressComputationCost,...
Definition VPlanUtils.cpp:366

llvm::vputils::onlyFirstPartUsed
bool onlyFirstPartUsed(const VPValue *Def)
Returns true if only the first part of Def is used.
Definition VPlanUtils.cpp:30

llvm::vputils::onlyFirstLaneUsed
bool onlyFirstLaneUsed(const VPValue *Def)
Returns true if only the first lane of Def is used.
Definition VPlanUtils.cpp:25

llvm::vputils::onlyScalarValuesUsed
bool onlyScalarValuesUsed(const VPValue *Def)
Returns true if only scalar values of Def are used by all users.
Definition VPlanUtils.cpp:35

llvm::vputils::isUsedByLoadStoreAddress
bool isUsedByLoadStoreAddress(const VPValue *V)
Returns true if V is used as part of the address of another load or store.
Definition VPlanUtils.cpp:852

llvm::vputils::getSCEVExprForVPValue
const SCEV * getSCEVExprForVPValue(const VPValue *V, PredicatedScalarEvolution &PSE, const Loop *L=nullptr)
Return the SCEV expression for V.
Definition VPlanUtils.cpp:170

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315

llvm::createSimpleReduction
LLVM_ABI Value * createSimpleReduction(IRBuilderBase &B, Value *Src, RecurKind RdxKind)
Create a reduction of the given vector.
Definition LoopUtils.cpp:1543

llvm::Offset
@ Offset
Definition DWP.cpp:558

llvm::zip
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738

llvm::getMinMaxReductionIntrinsicOp
LLVM_ABI Intrinsic::ID getMinMaxReductionIntrinsicOp(Intrinsic::ID RdxID)
Returns the min/max intrinsic used when expanding a min/max reduction.
Definition LoopUtils.cpp:1208

llvm::Cost
InstructionCost Cost
Definition FunctionSpecialization.h:103

llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition MachineInstrBuilder.h:65

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::map_to_vector
auto map_to_vector(ContainerTy &&C, FuncTy &&F)
Map a range to a SmallVector with element types deduced from the mapping.
Definition SmallVectorExtras.h:39

llvm::getRuntimeVF
Value * getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF)
Return the runtime value for VF.
Definition LoopVectorize.cpp:741

llvm::dyn_cast_if_present
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732

llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition iterator_range.h:70

llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207

llvm::interleaveComma
void interleaveComma(const Container &c, StreamT &os, UnaryFunctor each_fn)
Definition STLExtras.h:2312

llvm::cast_or_null
auto cast_or_null(const Y &Val)
Definition Casting.h:714

llvm::concatenateVectors
LLVM_ABI Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition VectorUtils.cpp:1231

llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
Definition Instructions.h:5319

llvm::isa_and_nonnull
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676

llvm::createMinMaxOp
LLVM_ABI Value * createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition LoopUtils.cpp:1301

llvm::dyn_cast_or_null
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753

llvm::getOffset
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
Definition RuntimeDyld.cpp:172

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745

llvm::createBitMaskForGaps
LLVM_ABI Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition VectorUtils.cpp:1128

llvm::createStrideMask
LLVM_ABI llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition VectorUtils.cpp:1168

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407

llvm::getVectorizedTypeVF
ElementCount getVectorizedTypeVF(Type *Ty)
Returns the number of vector elements for a vectorized type.
Definition VectorTypeUtils.h:100

llvm::createReplicatedMask
LLVM_ABI llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition VectorUtils.cpp:1148

llvm::ComplexDeinterleavingOperation::Splat
@ Splat
Definition ComplexDeinterleavingPass.h:42

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209

llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752

llvm::to_vector
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition SmallVector.h:1325

llvm::toVectorizedTy
Type * toVectorizedTy(Type *Ty, ElementCount EC)
A helper for converting to vectorized types.
Definition VectorTypeUtils.h:55

llvm::CaptureComponents::Address
@ Address
Definition ModRef.h:368

llvm::ForceTargetInstructionCost
cl::opt< unsigned > ForceTargetInstructionCost

llvm::computeScalarTypeForInstruction
LLVM_ABI Type * computeScalarTypeForInstruction(unsigned Opcode, ArrayRef< VPValue * > Operands)
Compute the scalar result type for an IR Opcode given Operands.
Definition VPlanRecipes.cpp:448

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::drop_end
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322

llvm::isVectorIntrinsicWithStructReturnOverloadAtField
LLVM_ABI bool isVectorIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic that returns a struct is overloaded at the struct elem...
Definition VectorUtils.cpp:219

llvm::IRMemLocation::Other
@ Other
Any other memory.
Definition ModRef.h:68

llvm::addOffset
static const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset)
Definition X86InstrBuilder.h:137

llvm::Data
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221

llvm::createInterleaveMask
LLVM_ABI llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition VectorUtils.cpp:1157

llvm::RecurKind
RecurKind
These are the kinds of recurrences that we support.
Definition IVDescriptors.h:35

llvm::RecurKind::UMin
@ UMin
Unsigned integer min implemented in terms of select(cmp()).
Definition IVDescriptors.h:47

llvm::RecurKind::FMinimumNum
@ FMinimumNum
FP min with llvm.minimumnum semantics.
Definition IVDescriptors.h:59

llvm::RecurKind::FindIV
@ FindIV
FindIV reduction with select(icmp(),x,y) where one of (x,y) is a loop induction variable (increasing ...
Definition IVDescriptors.h:64

llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
Definition IVDescriptors.h:42

llvm::RecurKind::FMinimum
@ FMinimum
FP min with llvm.minimum semantics.
Definition IVDescriptors.h:57

llvm::RecurKind::FMaxNum
@ FMaxNum
FP max with llvm.maxnum semantics including NaNs.
Definition IVDescriptors.h:56

llvm::RecurKind::Mul
@ Mul
Product of integers.
Definition IVDescriptors.h:41

llvm::RecurKind::FSub
@ FSub
Subtraction of floats.
Definition IVDescriptors.h:51

llvm::RecurKind::FAddChainWithSubs
@ FAddChainWithSubs
A chain of fadds and fsubs.
Definition IVDescriptors.h:50

llvm::RecurKind::None
@ None
Not a recurrence.
Definition IVDescriptors.h:37

llvm::RecurKind::AnyOf
@ AnyOf
AnyOf reduction with select(cmp(),x,y) where one of (x,y) is loop invariant, and both x and y are int...
Definition IVDescriptors.h:62

llvm::RecurKind::Xor
@ Xor
Bitwise or logical XOR of integers.
Definition IVDescriptors.h:44

llvm::RecurKind::FindLast
@ FindLast
FindLast reduction with select(cmp(),x,y) where x and y.
Definition IVDescriptors.h:68

llvm::RecurKind::FMax
@ FMax
FP max implemented in terms of select(cmp()).
Definition IVDescriptors.h:54

llvm::RecurKind::FMaximum
@ FMaximum
FP max with llvm.maximum semantics.
Definition IVDescriptors.h:58

llvm::RecurKind::FMulAdd
@ FMulAdd
Sum of float products with llvm.fmuladd(a * b + sum).
Definition IVDescriptors.h:61

llvm::RecurKind::FMul
@ FMul
Product of floats.
Definition IVDescriptors.h:52

llvm::RecurKind::SMax
@ SMax
Signed integer max implemented in terms of select(cmp()).
Definition IVDescriptors.h:46

llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
Definition IVDescriptors.h:43

llvm::RecurKind::SMin
@ SMin
Signed integer min implemented in terms of select(cmp()).
Definition IVDescriptors.h:45

llvm::RecurKind::FMin
@ FMin
FP min implemented in terms of select(cmp()).
Definition IVDescriptors.h:53

llvm::RecurKind::FMinNum
@ FMinNum
FP min with llvm.minnum semantics including NaNs.
Definition IVDescriptors.h:55

llvm::RecurKind::Sub
@ Sub
Subtraction of integers.
Definition IVDescriptors.h:39

llvm::RecurKind::Add
@ Add
Sum of integers.
Definition IVDescriptors.h:38

llvm::RecurKind::AddChainWithSubs
@ AddChainWithSubs
A chain of adds and subs.
Definition IVDescriptors.h:40

llvm::RecurKind::FAdd
@ FAdd
Sum of floats.
Definition IVDescriptors.h:49

llvm::RecurKind::FMaximumNum
@ FMaximumNum
FP max with llvm.maximumnum semantics.
Definition IVDescriptors.h:60

llvm::RecurKind::UMax
@ UMax
Unsigned integer max implemented in terms of select(cmp()).
Definition IVDescriptors.h:48

llvm::isVectorIntrinsicWithScalarOpAtArg
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition VectorUtils.cpp:140

llvm::getRecurrenceIdentity
LLVM_ABI Value * getRecurrenceIdentity(RecurKind K, Type *Tp, FastMathFlags FMF)
Given information about an recurrence kind, return the identity for the @llvm.vector....
Definition LoopUtils.cpp:1535

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:25

llvm::cast
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946

llvm::getLoadStoreType
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
Definition Instructions.h:5348

llvm::createOrderedReduction
LLVM_ABI Value * createOrderedReduction(IRBuilderBase &B, RecurKind RdxKind, Value *Src, Value *Start)
Create an ordered reduction intrinsic using the given recurrence kind RdxKind.
Definition LoopUtils.cpp:1598

llvm::getContainedTypes
ArrayRef< Type * > getContainedTypes(Type *const &Ty)
Returns the types contained in Ty.
Definition VectorTypeUtils.h:93

llvm::toVectorTy
Type * toVectorTy(Type *Scalar, ElementCount EC)
A helper function for converting Scalar types to vector types.
Definition VectorTypeUtils.h:20

llvm::WinX64EHUnwindV2Mode::Required
@ Required
Definition CodeGen.h:173

llvm::isVectorIntrinsicWithOverloadTypeAtArg
LLVM_ABI bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
Definition VectorUtils.cpp:179

raw_ostream.h

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::TargetTransformInfo::OperandValueInfo
Definition TargetTransformInfo.h:1281

llvm::TargetTransformInfo::OperandValueInfo::Kind
OperandValueKind Kind
Definition TargetTransformInfo.h:1282

llvm::VPCostContext
Struct to hold various analysis needed for cost computations.
Definition VPlanHelpers.h:322

llvm::VPCostContext::isFreeScalarIntrinsic
static bool isFreeScalarIntrinsic(Intrinsic::ID ID)
Returns true if ID is a pseudo intrinsic that is dropped via scalarization rather than widened.
Definition VPlan.cpp:1946

llvm::VPCostContext::CostKind
TargetTransformInfo::TargetCostKind CostKind
Definition VPlanHelpers.h:328

llvm::VPFirstOrderRecurrencePHIRecipe::execute
void execute(VPTransformState &State) override
Generate the phi nodes.
Definition VPlanRecipes.cpp:4730

llvm::VPFirstOrderRecurrencePHIRecipe::computeCost
InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this first-order recurrence phi recipe.
Definition VPlanRecipes.cpp:4760

llvm::VPFirstOrderRecurrencePHIRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4769

llvm::VPIRFlags::DisjointFlagsTy
Definition VPlan.h:724

llvm::VPIRFlags::NonNegFlagsTy
Definition VPlan.h:729

llvm::VPIRFlags::TruncFlagsTy
Definition VPlan.h:717

llvm::VPIRFlags::WrapFlagsTy
Definition VPlan.h:710

llvm::VPIRPhi
An overlay for VPIRInstructions wrapping PHI nodes enabling convenient use cast/dyn_cast/isa and exec...
Definition VPlan.h:1778

llvm::VPIRPhi::getIRPhi
PHINode & getIRPhi()
Definition VPlan.h:1791

llvm::VPIRPhi::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1975

llvm::VPIRPhi::execute
void execute(VPTransformState &State) override
The method which generates the output IR instructions that correspond to this VPRecipe,...
Definition VPlanRecipes.cpp:1913

llvm::VPPhi::execute
void execute(VPTransformState &State) override
Generate the instruction.
Definition VPlanRecipes.cpp:1856

llvm::VPPhi::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:1875

llvm::VPRecipeWithIRFlags::getCostForRecipeWithOpcode
InstructionCost getCostForRecipeWithOpcode(unsigned Opcode, ElementCount VF, VPCostContext &Ctx) const
Compute the cost for this recipe for VF, using Opcode and Ctx.
Definition VPlanRecipes.cpp:1052

llvm::VPRecipeWithIRFlags::VPRecipeWithIRFlags
VPRecipeWithIRFlags(const unsigned char SC, ArrayRef< VPValue * > Operands, const VPIRFlags &Flags, DebugLoc DL=DebugLoc::getUnknown())
Definition VPlan.h:1117

llvm::VPSymbolicValue
A symbolic live-in VPValue, used for values like vector trip count, VF, and VFxUF.
Definition VPlanValue.h:286

llvm::VPTransformState::CFGState::VPBB2IRBB
SmallDenseMap< const VPBasicBlock *, BasicBlock * > VPBB2IRBB
A mapping of each VPBasicBlock to the corresponding BasicBlock.
Definition VPlanHelpers.h:292

llvm::VPTransformState
VPTransformState holds information passed down when "executing" a VPlan, needed for generating the ou...
Definition VPlanHelpers.h:191

llvm::VPTransformState::CFG
struct llvm::VPTransformState::CFGState CFG

llvm::VPTransformState::get
Value * get(const VPValue *Def, bool IsScalar=false)
Get the generated vector Value for a given VPValue Def if IsScalar is false, otherwise return the gen...
Definition VPlan.cpp:313

llvm::VPTransformState::Builder
IRBuilderBase & Builder
Hold a reference to the IRBuilder used to generate output IR code.
Definition VPlanHelpers.h:309

llvm::VPTransformState::VF
ElementCount VF
The chosen Vectorization Factor of the loop being vectorized.
Definition VPlanHelpers.h:200

llvm::VPWidenLoadEVLRecipe::execute
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide load or gather.
Definition VPlanRecipes.cpp:4073

llvm::VPWidenLoadEVLRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4122

llvm::VPWidenLoadEVLRecipe::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenLoadEVLRecipe.
Definition VPlanRecipes.cpp:4103

llvm::VPWidenLoadEVLRecipe::getEVL
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3870

llvm::VPWidenLoadRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4064

llvm::VPWidenLoadRecipe::execute
void execute(VPTransformState &State) override
Generate a wide load or gather.
Definition VPlanRecipes.cpp:4037

llvm::VPWidenStoreEVLRecipe::getStoredValue
VPValue * getStoredValue() const
Return the address accessed by this recipe.
Definition VPlan.h:3971

llvm::VPWidenStoreEVLRecipe::execute
LLVM_ABI_FOR_TEST void execute(VPTransformState &State) override
Generate the wide store or scatter.
Definition VPlanRecipes.cpp:4161

llvm::VPWidenStoreEVLRecipe::printRecipe
LLVM_ABI_FOR_TEST void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4210

llvm::VPWidenStoreEVLRecipe::computeCost
LLVM_ABI_FOR_TEST InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const override
Return the cost of this VPWidenStoreEVLRecipe.
Definition VPlanRecipes.cpp:4191

llvm::VPWidenStoreEVLRecipe::getEVL
VPValue * getEVL() const
Return the EVL operand.
Definition VPlan.h:3974

llvm::VPWidenStoreRecipe::execute
void execute(VPTransformState &State) override
Generate a wide store or scatter.
Definition VPlanRecipes.cpp:4131

llvm::VPWidenStoreRecipe::printRecipe
void printRecipe(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const override
Print the recipe.
Definition VPlanRecipes.cpp:4154

llvm::VPWidenStoreRecipe::getStoredValue
VPValue * getStoredValue() const
Return the value stored by this recipe.
Definition VPlan.h:3920